#!/usr/bin/perl # # obfuscidl.pl - OMG IDL obfuscator # # Requires: # UniLexer.pm (source http://okellogg.de/UniLexer.pm) # # Usage: # perl -I obfuscidl.pl [-q] [-c] [-l] # option -q reduces verbosity (quiet) # option -c compresses consecutive empty lines into a single empty line # option -l yields longer obfuscated names # option -r promotes upper-casing of IDL names to file names # (the default is to lowercase them for the file names) # Reads all IDL files from source_dir and writes obfuscated copies to # the current working dir. # # Additionally, a file "map.txt" is created which contains the map of # source file names to obfuscated file names and of the identifiers in # all files processed to their obfuscated names: # * A source file map entry begins with "CU" followed by the original # filename and then the obfuscated filename. # * An identifier map entry begins with "ID" followed by the original # identifier and then the obfuscated identifier. # This file is only for reference by humans, it is not used as an input # to the obfuscation process. # # $Log: obfuscidl.pl,v $ # Revision 0.4 2016/03/17 19:56:23 okellogg # New option -r promotes upper-casing of IDL names to file names # (the default is to lowercase them for the file names.) # # Revision 0.3 2016/03/11 20:47:24 okellogg # New option -c compresses consecutive empty lines into a single empty line. # # Revision 0.2 2015/09/12 15:54:59 okellogg # Improvements resulting from obfuscating a large real-life project: # - Switch "-l" now generates obfuscated names that are similar in length # to the original names. # - Bugfix in sub push_once: Initialize $last from pop(@components) instead # of shift(@components). # - New sub is_comment returns true if the given argument is a comment. # - New sub is_literal returns true if the given argument is a boolean, # numeric, character, or string literal. # - sub parse_block is dissolved into main program. # - $UniLexer::promote_comments is enabled due to supporting special # comment directives which shall be promoted to the obfuscated output. # - New sub mangle returns the entry in %id keyed by the given argument but # dies if no such key exists. # - New sub join_name splices together a qualified name in @lex which begins # at index $lndx. This is necessary because the UniLexer.pm module was # changed to return the elements of qualified names as separate tokens. # join_name shortens the @lex list in-place on finding a qualified name. # - In sub process, # - New flag $in_enum is true while an enum declaration is being parsed; # - Parsing of 'typedef' source type is tightened up; # - Parsing of 'union' switch type and 'case' names is tightened up; # - Parsing of 'enum' values and structured type members is tightened up. # - In sub emit, the logic for "newline was printed already" no longer gives # false positives. # - In main program, # - RTI DDS special comment //@key is promoted to the obfuscated output; # - On encountering an #ifndef which is followed by a #define of the same # name, this is interpreted as a header fence, and the header fence name # is obfuscated; # - Parsing of 'enum' values is tightened up; # - Printing of single and multi line comments is explicitly suppressed. # # Revision 0.1 2014/10/18 08:36:21 kellogg # *** empty log message *** # # # Copyright (C) 2014, Oliver Kellogg # # This program is free software; you can redistribute it and/or modify # it under the terms of the GNU General Public License as published by # the Free Software Foundation; either version 2 of the License, or # (at your option) any later version. # # This program is distributed in the hope that it will be useful, # but WITHOUT ANY WARRANTY; without even the implied warranty of # MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the # GNU General Public License for more details. # # You should have received a copy of the GNU General Public License # along with this program; if not, write to the Free Software # Foundation, 59 Temple Place - Suite 330, Boston, MA 02111-1307, USA. # use UniLexer; @do_not_mangle = qw( in inout out boolean char octet short long unsigned float double string any Object ); sub base35($) { my $number = shift; # exclude letter 'l' (small L) to avoid confusion with the number 1 my @char = qw( 0 1 2 3 4 5 6 7 8 9 a b c d e f g h i j k m n o p q r s t u v w x y z ); $char[$number]; } sub base35count($$) { my $number = shift; my $name = shift; my $extra_digits = ""; if ($use_longer_names) { my $d4 = int($number / 42875); $number -= $d4 * 42875; if (length($name) > 5) { my $leading_zeros = length($name) - 5; $extra_digits = '0' x $leading_zeros; } $extra_digits .= base35($d4); } my $d3 = int($number / 1225); $number -= $d3 * 1225; my $d2 = int($number / 35); $number -= $d2 * 35; my $d1 = $number; return $extra_digits . base35($d3) . base35($d2) . base35($d1); } %MNEMON = ('module' => 'M', 'const' => 'C', 'enum' => 'E', 'typedef' => 'T', 'struct' => 'S', 'union' => 'U', 'valuetype' => 'V', 'interface' => 'I'); %count = ( 'M' => 0, # module 'C' => 0, # const 'E' => 0, # enum 'T' => 0, # typedef 'S' => 0, # struct 'U' => 0, # union 'V' => 0, # valuetype 'I' => 0, # interface 'X' => 0, # exception 'N' => 0); # name (method, attribute, or enum literal) %id = (); sub push_once($$) { my $name = shift; my $prefix = shift; $name =~ s/^:://; my @components = split(/::/, $name); my $last = pop @components; foreach my $nm (@components) { next if grep(/^${nm}$/, @do_not_mangle); next if exists($id{$nm}); $id{$nm} = 'M' . base35count(++$count{'M'}, $nm); } unless (grep(/^${last}$/, @do_not_mangle) || exists($id{$last})) { $id{$last} = $prefix . base35count(++$count{$prefix}, $last); } } sub mangled_name($$) { my($compound_name, $prefix) = @_; push_once($compound_name, $prefix); my $global = $compound_name =~ /^::/; if ($global) { $compound_name =~ s/^:://; } my @components = split(/::/, $compound_name); my $first = 1; my $result; foreach my $nm (@components) { if ($first) { $first = 0; } else { $result .= "::"; } if (exists $id{$nm}) { $nm = $id{$nm}; } $result .= $nm; } if ($global) { $result = "::" . $result; } return $result; } sub idlfile($;$) { my $compound_name = shift; my $prefix = 'M'; if (@_) { $prefix = shift; } my $stem = mangled_name($compound_name, $prefix); $stem =~ s/^:://; $stem =~ s/::/-/g; unless ($promote_idl_uppercasing_to_filename) { $stem = lc($stem); } return $stem . ".idl"; } @lex = (); # global buffer for return value from UniLexer::lex() $lndx = 0; # global index into @lex sub is_comment($) { my $item = shift; return $item =~ /^ ?\/[\/\*]/; } sub is_literal($) { my $lx = shift; # Return true for boolean/numeric/character/string literal return $lx eq "TRUE" || $lx eq "FALSE" || $lx =~ /^[\d'"]/; } # Advance $lndx to the next token (excluding numeric/character/string literals and comments) sub nexttoken() { my $result = ""; while (++$lndx < scalar(@lex)) { my $lx = $lex[$lndx]; next if is_literal($lx); next if is_comment($lx); $result = $lx; last; } return $result; } sub skip_to($) { my $end = shift; my $nxt; while ((($nxt = &nexttoken))) { last if ($nxt eq $end); } } sub skip_to_first_of { my @end = @_; my $nxt; OUTER: while ((($nxt = &nexttoken))) { foreach (@end) { last OUTER if ($nxt eq $_); } } } sub decl2mnemon($) { my $decl = shift; $decl or return undef; return $MNEMON{$decl}; } sub process($); sub emit($); sub amount(); sub spaces(); sub dent($); sub indent($); sub decr_indentlevel($); sub dedent($); sub is_reserved_word($); sub prettytoken(); sub prettyprint(); @reswd = qw( abstract any attribute boolean case char component const consumes context custom default double emits enum exception eventtype factory false finder fixed float getraises home import in inout interface local long module multiple native object octet oneway out primarykey private provides public publishes raises readonly setraises sequence short string struct supports switch true truncatable typedef typeid typeprefix unsigned union uses valuebase valuetype void wchar wstring ); %reserved_word = map { $_ => 1 } @reswd; $srcpath = "."; $quiet = 0; $use_longer_names = 0; $compress_empty_lines = 0; $promote_idl_uppercasing_to_filename = 0; foreach my $arg (@ARGV) { if ($arg =~ /^-/) { if ($arg eq "-q") { $quiet = 1; } elsif ($arg eq "-c") { $compress_empty_lines = 1; } elsif ($arg eq "-l") { $use_longer_names = 1; } elsif ($arg eq "-r") { $promote_idl_uppercasing_to_filename = 1; } else { warn "usage: perl -I obfuscidl.pl [-q] [-c] [-l] \n"; warn " option -q reduces verbosity (quiet)\n"; warn " option -c compresses consecutive empty lines into a single empty line\n"; die " option -l yields longer obfuscated names\n"; } } else { $srcpath = $arg; } } # Enable comments in return list from UniLexer::lex() # The default behaviour is that comment lines are replaced by empty lines. # This is done so that line numbers in the obfuscation correspond to line # numbers in the original code. The command line switch "-c" changes it. $UniLexer::promote_comments = 1; # Key : output file name after obfuscation # Value: reference to copy of list returned from UniLexer::lex() %tokens = (); # Key : output file name after obfuscation # Value: reference to copy of @UniLexer::linenum %linenums = (); %files = (); open(MAP, ">map.txt") or die "cannot create file map.txt\n"; # cpp -H $filename /dev/null >& tmp.inc # perl -n -e 'BEGIN { @a=(); } # chop; push @a, $_; # END { print(join("\n", reverse(sort(@a))) . "\n"); }' tmp.inc # while (<$srcpath/*.idl>) { my $filename = $_; exists($files{$filename}) and next; $quiet or print "Filename: $filename\n"; `cpp -H $filename /dev/null 2> tmp.inc`; $? and die "obfuscidl.pl: \"cpp -H $filename\" returned error, aborting.\n"; open(INCLIST, ") { chop; s/^\.+ //; push @inclist, $_; } close (INCLIST); unlink "tmp.inc"; if (@inclist) { # foreach (@inclist) { # s/ .*\// /; # } @inclist = reverse(sort @inclist); # print("----------- inclist:\n" . join("\n", @inclist) . "\n\n"); foreach (@inclist) { my $i = $_; $i =~ s/^\.+ //; process($i); } } process($filename); } sub mangle($) { my $nm = shift; exists($id{$nm}) or die "Fatal error: no mangle found for $nm\n"; return $id{$nm}; } sub join_name() { if ($lex[$lndx] eq "::") { my $nm = splice(@lex, $lndx + 1, 1); $lex[$lndx] .= mangle($nm); } while ($lndx < scalar(@lex) - 1 && $lex[$lndx + 1] eq "::") { splice(@lex, $lndx + 1, 1); # discard "::" my $nm = splice(@lex, $lndx + 1, 1); $lex[$lndx] .= "::" . mangle($nm); } return $lex[$lndx]; } sub process($) { my $filename = shift; exists($files{$filename}) and return; $files{$filename} = 1; @lex = UniLexer::lex($filename); @lex or die "an error happened in UniLexer::lex, cannot continue\n"; my $idlname = $filename; $idlname =~ s@^.*/@@; $idlname =~ s/\.idl$//; $idlname =~ s/-/::/g; my $ending; my $mnemon = 'M'; if ($idlname =~ /\:\:/) { # Find the correct mnemon. # This code is ugly... $ending = $idlname; $ending =~ s/^.*\:\://; my $decl = `sed -n "s/^ *\\(const\\|enum\\|struct\\|union\\|valuetype\\|interface\\) ${ending} .*\$/\\1/p; s/^ *typedef .* ${ending};.*\$/typedef/p" $filename`; chomp $decl; $mnemon = decl2mnemon($decl); if ($mnemon) { push_once($ending, $mnemon); $idlname =~ s/\:\:[^:]+$//; } } push_once($idlname, 'M'); if ($mnemon && $mnemon ne 'M') { $idlname .= "::" . $ending; } else { $mnemon = 'M'; } my $outfilename = idlfile($idlname, $mnemon); $quiet or print " => $outfilename\n"; my $path = $filename; $path =~ s/\/[^\/]+$//; my $fname_without_path = $filename; $fname_without_path =~ s/^.*\///; print MAP "CU $fname_without_path $outfilename\n"; my $in_enum = 0; $lndx = -1; my $l; while (($l = nexttoken)) { # print " lex[$lndx] = $lex[$lndx]\n"; if ($l eq '#include') { my $incfile = $lex[++$lndx]; $incfile =~ s/^["<]//; $incfile =~ s/[">]$//; my @lex_sav = @lex; my $lndx_sav = $lndx; process("$path/$incfile"); @lex = @lex_sav; $lndx = $lndx_sav; } elsif ($l eq '#if' || $l eq '#ifdef' || $l eq '#ifndef' || $l eq '#define' || $l eq '#pragma') { # @todo improve UniLexer joining for #if, #define, and #pragma nexttoken; # header fence to be recomputed later } elsif ($l eq 'module') { $in_enum = 0; my $m = nexttoken; push_once($m, 'M'); nexttoken; # skip '{' } elsif ($l eq 'const') { $in_enum = 0; nexttoken; push_once(nexttoken, 'C'); skip_to(';'); } elsif ($l eq 'enum') { push_once(nexttoken, 'E'); $in_enum = 1; nexttoken; # skip '{' } elsif ($l eq 'typedef') { $in_enum = 0; my $nx = nexttoken; my $in_seq = 0; if ($nx eq 'sequence') { $in_seq = 1; nexttoken; # skip '<' unless (is_reserved_word($lex[$lndx])) { join_name; } } elsif (! is_reserved_word($lex[$lndx])) { join_name; } skip_to(';'); while ($lndx > 1 && $lex[--$lndx] eq ']') { while ($lndx > 0 && $lex[--$lndx] ne '[') { ; } } push_once($lex[$lndx], 'T'); skip_to(';'); } elsif ($l eq 'struct') { $in_enum = 0; my $t = nexttoken; push_once($t, 'S'); nexttoken; # skip '{' } elsif ($l eq 'union') { $in_enum = 0; my $u = nexttoken; push_once($u, 'U'); nexttoken; # skip "switch" nexttoken; # skip '(' unless (is_reserved_word($lex[$lndx])) { join_name; } skip_to('{'); } elsif ($l eq 'case') { unless (is_literal($lex[$lndx])) { join_name; } skip_to(':'); } elsif ($l eq 'valuetype') { $in_enum = 0; push_once(nexttoken, 'V'); skip_to('{'); } elsif ($l eq 'interface') { $in_enum = 0; push_once(nexttoken, 'I'); skip_to('{'); } elsif ($l =~ /^[\w\:]/) { if ($in_enum) { my @def_ids = ($l); my $nx; while (($nx = nexttoken)) { $nx eq ',' or last; my $def_id = nexttoken; $def_id or die "Fatal error after $l\n"; push @def_ids, $def_id; } foreach (@def_ids) { push_once($_, 'N'); } next; } if ($l eq 'public' || $l eq 'private') { # valuetype qualifiers $l = nexttoken; } if (is_reserved_word($l)) { while (is_reserved_word($l)) { $l = nexttoken; } if ($l eq '<') { skip_to('>'); $l = nexttoken; } } else { $l =~ /^[\w\:]/ or die "Expecting type name at $l\n"; join_name; $l = nexttoken; } $l =~ /^\w/ or die "Expecting member name at $l\n"; push_once($l, 'N'); skip_to(';'); } elsif ($l ne '}' && $l ne ';' && $l !~ /^#/) { $quiet or print " unexpected next token $l\n"; } } $tokens{$outfilename} = [ @lex ]; $linenums{$outfilename} = [ @UniLexer::linenum ]; } foreach my $name (sort(keys %id)) { my $mangled = $id{$name}; print MAP "ID $name $mangled\n"; } close MAP; $one_indentation = 3; # one indentation measured in spaces $right_border = 117; # column after which to wrap lines $indentlevel = 0; $prindentlevel = 0; # "pretty indent level", used for long RHS expressions # that span multiple lines $line = 1; # output line number $column = 0; # output column of next character that will be printed @linenum = (); # working buffer for current value retrieved from %linenums $at_new_line = 1; sub emit($) { my $txt = shift; if ($txt eq "\n" && $at_new_line && $lndx < scalar(@lex) - 1 && defined($linenum[$lndx + 1]) && $line >= $linenum[$lndx + 1]) { # newline was printed already # Have to print at least one space to separate consecutive # keywords/identifiers print OUT ' '; $at_new_line = 0; return; } print OUT $txt; $at_new_line = 0; if ($txt =~ /\n/) { $column = 0; if ($txt =~ /\n$/) { $at_new_line = 1; } $txt =~ s/[^\n]//g; $line += length($txt); if ($at_new_line && $lndx < scalar(@lex) - 1 && defined($linenum[$lndx + 1])) { my $nxtline = $linenum[$lndx + 1]; while ($line < $nxtline - 1) { $compress_empty_lines or print OUT "\n"; ++$line; } } } else { $column += length($txt); } } sub amount() { return $one_indentation * ($indentlevel + $prindentlevel); } sub spaces() { return ' ' x &amount; } sub dent($) { emit(&spaces . shift); } sub indent($) { dent shift; $indentlevel++; } sub decr_indentlevel($) { my $where = shift; if ($indentlevel == 0) { warn "$where (tokenindex $lndx): internal error - negative indentlevel\n"; } else { $indentlevel--; } } sub dedent($) { my $text = shift; decr_indentlevel("dedent $text"); dent $text; } sub is_reserved_word($) { my $lx = shift; return (exists $reserved_word{$lx}); } sub prettytoken() { # Leaves $lndx pointing to one past the last token printed. if ($lndx >= scalar(@lex)) { warn "internal error: illegal lndx ($lndx) at call to sub prettytoken\n"; return undef; } my $global = ""; if ($lex[$lndx] eq "::") { $global = "::"; ++$lndx; } my $accu = $lex[$lndx]; unless ($accu =~ /^\w/) { my $nx = $lex[++$lndx]; if (($accu eq ',' || $accu eq ';') && defined($linenum[$lndx]) && $line < $linenum[$lndx]) { # sub emit increments $line (also for embedded newlines) $accu .= "\n" . spaces; } elsif ($accu eq ')' && $nx ne ')' && $nx ne ',' && $nx ne ';' && $nx ne '::') { $accu .= ' '; } elsif ($accu ne "." && $accu ne "(" && $accu ne ")" && $nx ne "::" && $nx ne "'" && $nx ne ")" && $nx ne "," && $nx ne ";") { $accu .= ' '; } return $accu; } if (exists $id{$accu}) { $accu = $id{$accu}; } $accu = $global . $accu; while ($lndx < scalar(@lex) - 1 && $lex[++$lndx] eq '::') { my $lx = $lex[++$lndx]; if (exists $id{$lx}) { $lx = $id{$lx}; } $accu .= "::" . $lx; } my $nx = $lex[$lndx]; if ($nx eq "}") { $accu .= "\n"; } elsif ($nx ne "(" && $nx ne ")" && $nx ne "," && $nx ne ";") { $accu .= ' '; } return $accu; } sub prettyprint() { # Leaves $lndx pointing to one past the last token printed. if ($column >= $right_border) { warn("prettyprint: inserting newline at $lndx ($lex[$lndx]) " . "because column exceeds $right_border\n"); emit "\n"; emit spaces; } my $text = &prettytoken; emit $text; } #### Main program #### foreach my $outfilename (sort keys %tokens) { open(OUT, ">$outfilename") or die "cannot open $outfilename\n"; $quiet or print "writing $outfilename ...\n"; @lex = @{$tokens{$outfilename}}; @linenum = @{$linenums{$outfilename}}; $lndx = 0; $line = 1; while ($line < $linenum[0]) { $compress_empty_lines or print OUT "\n"; ++$line; } $column = 0; $at_new_line = 1; $indentlevel = 0; $prindentlevel = 0; my $seen_case = 0; for (; $lndx < scalar(@lex); $lndx++) { my $lx = $lex[$lndx]; if ($lx eq ';') { emit ";"; if ($lndx < scalar(@lex) - 1 && $lex[$lndx + 1] =~ /^ \/\/\@/) { # Special handling - preserve RTI DDS comment //@key emit $lex[++$lndx]; } emit "\n"; next; } if ($lx eq '}') { $lndx++; dedent "};\n"; } elsif ($lx eq '#include') { my $nm = $lex[++$lndx]; $nm =~ s/^["<]//; $nm =~ s/[">]$//; $nm =~ s/\.idl$//; $nm =~ s/-/::/g; my $incfile = idlfile($nm); emit "#include \"$incfile\"\n"; } elsif ($lx eq '#ifndef') { my $nm = $lex[++$lndx]; if ($lex[$lndx + 1] eq '#define' && $lex[$lndx + 2] eq $nm) { # Assume this is a header fence $nm = uc($outfilename); $nm =~ s/-/_/g; $nm =~ s/\./__/; emit "#ifndef $nm\n"; $lndx += 2; emit "#define $nm\n\n"; } else { emit "$lx $nm\n"; } } elsif ($lx eq '#endif') { emit "$lx\n"; } elsif ($lx =~ /^#/) { emit("$lx " . $lex[++$lndx] . "\n"); } elsif ($lx eq 'enum') { my $nm = $lex[++$lndx]; ++$lndx; # skip '{' indent("$lx " . $id{$nm} . " {\n"); dent($id{&nexttoken}); while (nexttoken eq ',') { emit($line < $linenum[$lndx] ? ",\n" . spaces : ", "); emit($id{&nexttoken}); } emit("\n"); dedent("}"); } elsif ($lx eq 'typedef') { dent("$lx "); } elsif ($lx eq 'union') { dent("$lx " . $id{&nexttoken} . " switch ("); $lndx += 2; # skip 'switch (' # emit(prettytoken . ")"); } elsif ($lx eq 'module' || $lx eq 'struct' || $lx eq 'exception' || $lx eq 'valuetype' || $lx eq 'interface') { dent("$lx " . $id{$lex[++$lndx]}); } elsif ($lx eq 'case') { if ($lex[$lndx - 1] eq ':') { decr_indentlevel("repeated \"case\""); } emit spaces; $indentlevel++; while ($lndx < scalar(@lex)) { emit prettytoken; last if ($lex[$lndx - 1] eq ':'); } $seen_case = 1; --$lndx; emit "\n"; } elsif ($lx eq '{') { emit " {\n"; $indentlevel++; } elsif ($lx =~ /^ ?\/\//) { ; # do not print single line comment } elsif ($lx =~ /^ ?\/\*/) { ; # do not print multi line comment } else { if ($at_new_line) { emit spaces; $prindentlevel = 0; } prettyprint; --$lndx; if ($seen_case) { $seen_case = 0; decr_indentlevel("case " . $lex[$lndx]); } } } if ($lndx < scalar(@lex)) { emit("// after token printing loop\n"); while ($lndx < scalar(@lex)) { emit(prettytoken . "\n"); } } emit "\n"; close OUT; } $quiet or print "all done.\n"; 1;