| 1 | #!/usr/bin/env perl |
| 2 | # by David Conrad |
| 3 | # This code is licensed under GPLv2 or later; go to gnu.org to read it |
| 4 | # (not that it much matters for an asm preprocessor) |
| 5 | # usage: set your assembler to be something like "perl gas-preprocessor.pl gcc" |
| 6 | use strict; |
| 7 | |
| 8 | # Apple's gas is ancient and doesn't support modern preprocessing features like |
| 9 | # .rept and has ugly macro syntax, among other things. Thus, this script |
| 10 | # implements the subset of the gas preprocessor used by x264 and ffmpeg |
| 11 | # that isn't supported by Apple's gas. |
| 12 | |
| 13 | my @gcc_cmd = @ARGV; |
| 14 | my @preprocess_c_cmd; |
| 15 | |
| 16 | my $fix_unreq = $^O eq "darwin"; |
| 17 | |
| 18 | if ($gcc_cmd[0] eq "-fix-unreq") { |
| 19 | $fix_unreq = 1; |
| 20 | shift @gcc_cmd; |
| 21 | } elsif ($gcc_cmd[0] eq "-no-fix-unreq") { |
| 22 | $fix_unreq = 0; |
| 23 | shift @gcc_cmd; |
| 24 | } |
| 25 | |
| 26 | if (grep /\.c$/, @gcc_cmd) { |
| 27 | # C file (inline asm?) - compile |
| 28 | @preprocess_c_cmd = (@gcc_cmd, "-S"); |
| 29 | } elsif (grep /\.[sS]$/, @gcc_cmd) { |
| 30 | # asm file, just do C preprocessor |
| 31 | @preprocess_c_cmd = (@gcc_cmd, "-E"); |
| 32 | } else { |
| 33 | die "Unrecognized input filetype"; |
| 34 | } |
| 35 | |
| 36 | # if compiling, avoid creating an output file named '-.o' |
| 37 | if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) { |
| 38 | foreach my $i (@gcc_cmd) { |
| 39 | if ($i =~ /\.[csS]$/) { |
| 40 | my $outputfile = $i; |
| 41 | $outputfile =~ s/\.[csS]$/.o/; |
| 42 | push(@gcc_cmd, "-o"); |
| 43 | push(@gcc_cmd, $outputfile); |
| 44 | last; |
| 45 | } |
| 46 | } |
| 47 | } |
| 48 | @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd; |
| 49 | @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd; |
| 50 | |
| 51 | my $comm; |
| 52 | |
| 53 | # detect architecture from gcc binary name |
| 54 | if ($gcc_cmd[0] =~ /arm/) { |
| 55 | $comm = '@'; |
| 56 | } elsif ($gcc_cmd[0] =~ /powerpc|ppc/) { |
| 57 | $comm = '#'; |
| 58 | } |
| 59 | |
| 60 | # look for -arch flag |
| 61 | foreach my $i (1 .. $#gcc_cmd-1) { |
| 62 | if ($gcc_cmd[$i] eq "-arch") { |
| 63 | if ($gcc_cmd[$i+1] =~ /arm/) { |
| 64 | $comm = '@'; |
| 65 | } elsif ($gcc_cmd[$i+1] =~ /powerpc|ppc/) { |
| 66 | $comm = '#'; |
| 67 | } |
| 68 | } |
| 69 | } |
| 70 | |
| 71 | # assume we're not cross-compiling if no -arch or the binary doesn't have the arch name |
| 72 | if (!$comm) { |
| 73 | my $native_arch = qx/arch/; |
| 74 | if ($native_arch =~ /arm/) { |
| 75 | $comm = '@'; |
| 76 | } elsif ($native_arch =~ /powerpc|ppc/) { |
| 77 | $comm = '#'; |
| 78 | } |
| 79 | } |
| 80 | |
| 81 | if (!$comm) { |
| 82 | die "Unable to identify target architecture"; |
| 83 | } |
| 84 | |
| 85 | my %ppc_spr = (ctr => 9, |
| 86 | vrsave => 256); |
| 87 | |
| 88 | open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor"; |
| 89 | |
| 90 | my $current_macro = ''; |
| 91 | my $macro_level = 0; |
| 92 | my %macro_lines; |
| 93 | my %macro_args; |
| 94 | my %macro_args_default; |
| 95 | my $macro_count = 0; |
| 96 | my $altmacro = 0; |
| 97 | |
| 98 | my @pass1_lines; |
| 99 | my @ifstack; |
| 100 | |
| 101 | my %symbols; |
| 102 | |
| 103 | # pass 1: parse .macro |
| 104 | # note that the handling of arguments is probably overly permissive vs. gas |
| 105 | # but it should be the same for valid cases |
| 106 | while (<ASMFILE>) { |
| 107 | # remove all comments (to avoid interfering with evaluating directives) |
| 108 | s/(?<!\\)$comm.*//x; |
| 109 | |
| 110 | # comment out unsupported directives |
| 111 | s/\.type/$comm.type/x; |
| 112 | s/\.func/$comm.func/x; |
| 113 | s/\.endfunc/$comm.endfunc/x; |
| 114 | s/\.ltorg/$comm.ltorg/x; |
| 115 | s/\.size/$comm.size/x; |
| 116 | s/\.fpu/$comm.fpu/x; |
| 117 | s/\.arch/$comm.arch/x; |
| 118 | s/\.object_arch/$comm.object_arch/x; |
| 119 | |
| 120 | # the syntax for these is a little different |
| 121 | s/\.global/.globl/x; |
| 122 | # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const |
| 123 | s/(.*)\.rodata/.const_data/x; |
| 124 | s/\.bss/.data/x; |
| 125 | s/\.int/.long/x; |
| 126 | s/\.float/.single/x; |
| 127 | |
| 128 | # catch unknown section names that aren't mach-o style (with a comma) |
| 129 | if (/.section ([^,]*)$/) { |
| 130 | die ".section $1 unsupported; figure out the mach-o section name and add it"; |
| 131 | } |
| 132 | |
| 133 | parse_line($_); |
| 134 | } |
| 135 | |
| 136 | sub eval_expr { |
| 137 | my $expr = $_[0]; |
| 138 | $expr =~ s/([A-Za-z._][A-Za-z0-9._]*)/$symbols{$1}/g; |
| 139 | eval $expr; |
| 140 | } |
| 141 | |
| 142 | sub handle_if { |
| 143 | my $line = $_[0]; |
| 144 | # handle .if directives; apple's assembler doesn't support important non-basic ones |
| 145 | # evaluating them is also needed to handle recursive macros |
| 146 | if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) { |
| 147 | my $result = $1 eq "n"; |
| 148 | my $type = $2; |
| 149 | my $expr = $3; |
| 150 | |
| 151 | if ($type eq "b") { |
| 152 | $expr =~ s/\s//g; |
| 153 | $result ^= $expr eq ""; |
| 154 | } elsif ($type eq "c") { |
| 155 | if ($expr =~ /(.*)\s*,\s*(.*)/) { |
| 156 | $result ^= $1 eq $2; |
| 157 | } else { |
| 158 | die "argument to .ifc not recognized"; |
| 159 | } |
| 160 | } elsif ($type eq "") { |
| 161 | $result ^= eval_expr($expr) != 0; |
| 162 | } elsif ($type eq "eq") { |
| 163 | $result = eval_expr($expr) == 0; |
| 164 | } elsif ($type eq "lt") { |
| 165 | $result = eval_expr($expr) < 0; |
| 166 | } else { |
| 167 | chomp($line); |
| 168 | die "unhandled .if varient. \"$line\""; |
| 169 | } |
| 170 | push (@ifstack, $result); |
| 171 | return 1; |
| 172 | } else { |
| 173 | return 0; |
| 174 | } |
| 175 | } |
| 176 | |
| 177 | sub parse_line { |
| 178 | my $line = @_[0]; |
| 179 | |
| 180 | # evaluate .if blocks |
| 181 | if (scalar(@ifstack)) { |
| 182 | if (/\.endif/) { |
| 183 | pop(@ifstack); |
| 184 | return; |
| 185 | } elsif ($line =~ /\.elseif\s+(.*)/) { |
| 186 | if ($ifstack[-1] == 0) { |
| 187 | $ifstack[-1] = !!eval_expr($1); |
| 188 | } elsif ($ifstack[-1] > 0) { |
| 189 | $ifstack[-1] = -$ifstack[-1]; |
| 190 | } |
| 191 | return; |
| 192 | } elsif (/\.else/) { |
| 193 | $ifstack[-1] = !$ifstack[-1]; |
| 194 | return; |
| 195 | } elsif (handle_if($line)) { |
| 196 | return; |
| 197 | } |
| 198 | |
| 199 | # discard lines in false .if blocks |
| 200 | foreach my $i (0 .. $#ifstack) { |
| 201 | if ($ifstack[$i] <= 0) { |
| 202 | return; |
| 203 | } |
| 204 | } |
| 205 | } |
| 206 | |
| 207 | if (/\.macro/) { |
| 208 | $macro_level++; |
| 209 | if ($macro_level > 1 && !$current_macro) { |
| 210 | die "nested macros but we don't have master macro"; |
| 211 | } |
| 212 | } elsif (/\.endm/) { |
| 213 | $macro_level--; |
| 214 | if ($macro_level < 0) { |
| 215 | die "unmatched .endm"; |
| 216 | } elsif ($macro_level == 0) { |
| 217 | $current_macro = ''; |
| 218 | return; |
| 219 | } |
| 220 | } |
| 221 | |
| 222 | if ($macro_level > 1) { |
| 223 | push(@{$macro_lines{$current_macro}}, $line); |
| 224 | } elsif ($macro_level == 0) { |
| 225 | expand_macros($line); |
| 226 | } else { |
| 227 | if ($line =~ /\.macro\s+([\d\w\.]+)\s*(.*)/) { |
| 228 | $current_macro = $1; |
| 229 | |
| 230 | # commas in the argument list are optional, so only use whitespace as the separator |
| 231 | my $arglist = $2; |
| 232 | $arglist =~ s/,/ /g; |
| 233 | |
| 234 | my @args = split(/\s+/, $arglist); |
| 235 | foreach my $i (0 .. $#args) { |
| 236 | my @argpair = split(/=/, $args[$i]); |
| 237 | $macro_args{$current_macro}[$i] = $argpair[0]; |
| 238 | $argpair[0] =~ s/:vararg$//; |
| 239 | $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1]; |
| 240 | } |
| 241 | # ensure %macro_lines has the macro name added as a key |
| 242 | $macro_lines{$current_macro} = []; |
| 243 | |
| 244 | } elsif ($current_macro) { |
| 245 | push(@{$macro_lines{$current_macro}}, $line); |
| 246 | } else { |
| 247 | die "macro level without a macro name"; |
| 248 | } |
| 249 | } |
| 250 | } |
| 251 | |
| 252 | sub expand_macros { |
| 253 | my $line = @_[0]; |
| 254 | |
| 255 | # handle .if directives; apple's assembler doesn't support important non-basic ones |
| 256 | # evaluating them is also needed to handle recursive macros |
| 257 | if (handle_if($line)) { |
| 258 | return; |
| 259 | } |
| 260 | |
| 261 | if (/\.purgem\s+([\d\w\.]+)/) { |
| 262 | delete $macro_lines{$1}; |
| 263 | delete $macro_args{$1}; |
| 264 | delete $macro_args_default{$1}; |
| 265 | return; |
| 266 | } |
| 267 | |
| 268 | if ($line =~ /\.altmacro/) { |
| 269 | $altmacro = 1; |
| 270 | return; |
| 271 | } |
| 272 | |
| 273 | if ($line =~ /\.noaltmacro/) { |
| 274 | $altmacro = 0; |
| 275 | return; |
| 276 | } |
| 277 | |
| 278 | $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro; |
| 279 | |
| 280 | if ($line =~ /\.set\s+(.*),\s*(.*)/) { |
| 281 | $symbols{$1} = eval_expr($2); |
| 282 | } |
| 283 | |
| 284 | if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) { |
| 285 | push(@pass1_lines, $1); |
| 286 | my $macro = $2; |
| 287 | |
| 288 | # commas are optional here too, but are syntactically important because |
| 289 | # parameters can be blank |
| 290 | my @arglist = split(/,/, $3); |
| 291 | my @args; |
| 292 | my @args_seperator; |
| 293 | |
| 294 | my $comma_sep_required = 0; |
| 295 | foreach (@arglist) { |
| 296 | # allow arithmetic/shift operators in macro arguments |
| 297 | $_ =~ s/\s*(\+|-|\*|\/|<<|>>)\s*/$1/g; |
| 298 | |
| 299 | my @whitespace_split = split(/\s+/, $_); |
| 300 | if (!@whitespace_split) { |
| 301 | push(@args, ''); |
| 302 | push(@args_seperator, ''); |
| 303 | } else { |
| 304 | foreach (@whitespace_split) { |
| 305 | #print ("arglist = \"$_\"\n"); |
| 306 | if (length($_)) { |
| 307 | push(@args, $_); |
| 308 | my $sep = $comma_sep_required ? "," : " "; |
| 309 | push(@args_seperator, $sep); |
| 310 | #print ("sep = \"$sep\", arg = \"$_\"\n"); |
| 311 | $comma_sep_required = 0; |
| 312 | } |
| 313 | } |
| 314 | } |
| 315 | |
| 316 | $comma_sep_required = 1; |
| 317 | } |
| 318 | |
| 319 | my %replacements; |
| 320 | if ($macro_args_default{$macro}){ |
| 321 | %replacements = %{$macro_args_default{$macro}}; |
| 322 | } |
| 323 | |
| 324 | # construct hashtable of text to replace |
| 325 | foreach my $i (0 .. $#args) { |
| 326 | my $argname = $macro_args{$macro}[$i]; |
| 327 | my @macro_args = @{ $macro_args{$macro} }; |
| 328 | if ($args[$i] =~ m/=/) { |
| 329 | # arg=val references the argument name |
| 330 | # XXX: I'm not sure what the expected behaviour if a lot of |
| 331 | # these are mixed with unnamed args |
| 332 | my @named_arg = split(/=/, $args[$i]); |
| 333 | $replacements{$named_arg[0]} = $named_arg[1]; |
| 334 | } elsif ($i > $#{$macro_args{$macro}}) { |
| 335 | # more args given than the macro has named args |
| 336 | # XXX: is vararg allowed on arguments before the last? |
| 337 | $argname = $macro_args{$macro}[-1]; |
| 338 | if ($argname =~ s/:vararg$//) { |
| 339 | #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n"; |
| 340 | #$replacements{$argname} .= ", $args[$i]"; |
| 341 | $replacements{$argname} .= "$args_seperator[$i] $args[$i]"; |
| 342 | } else { |
| 343 | die "Too many arguments to macro $macro"; |
| 344 | } |
| 345 | } else { |
| 346 | $argname =~ s/:vararg$//; |
| 347 | $replacements{$argname} = $args[$i]; |
| 348 | } |
| 349 | } |
| 350 | |
| 351 | my $count = $macro_count++; |
| 352 | |
| 353 | # apply replacements as regex |
| 354 | foreach (@{$macro_lines{$macro}}) { |
| 355 | my $macro_line = $_; |
| 356 | # do replacements by longest first, this avoids wrong replacement |
| 357 | # when argument names are subsets of each other |
| 358 | foreach (reverse sort {length $a <=> length $b} keys %replacements) { |
| 359 | $macro_line =~ s/\\$_/$replacements{$_}/g; |
| 360 | } |
| 361 | $macro_line =~ s/\\\@/$count/g; |
| 362 | $macro_line =~ s/\\\(\)//g; # remove \() |
| 363 | parse_line($macro_line); |
| 364 | } |
| 365 | } else { |
| 366 | push(@pass1_lines, $line); |
| 367 | } |
| 368 | } |
| 369 | |
| 370 | close(ASMFILE) or exit 1; |
| 371 | open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler"; |
| 372 | #open(ASMFILE, ">/tmp/a.S") or die "Error running assembler"; |
| 373 | |
| 374 | my @sections; |
| 375 | my $num_repts; |
| 376 | my $rept_lines; |
| 377 | |
| 378 | my %literal_labels; # for ldr <reg>, =<expr> |
| 379 | my $literal_num = 0; |
| 380 | |
| 381 | my $thumb = 0; |
| 382 | |
| 383 | my %thumb_labels; |
| 384 | my %call_targets; |
| 385 | |
| 386 | my $in_irp = 0; |
| 387 | my @irp_args; |
| 388 | my $irp_param; |
| 389 | |
| 390 | # pass 2: parse .rept and .if variants |
| 391 | # NOTE: since we don't implement a proper parser, using .rept with a |
| 392 | # variable assigned from .set is not supported |
| 393 | foreach my $line (@pass1_lines) { |
| 394 | # handle .previous (only with regard to .section not .subsection) |
| 395 | if ($line =~ /\.(section|text|const_data)/) { |
| 396 | push(@sections, $line); |
| 397 | } elsif ($line =~ /\.previous/) { |
| 398 | if (!$sections[-2]) { |
| 399 | die ".previous without a previous section"; |
| 400 | } |
| 401 | $line = $sections[-2]; |
| 402 | push(@sections, $line); |
| 403 | } |
| 404 | |
| 405 | $thumb = 1 if $line =~ /\.code\s+16|\.thumb/; |
| 406 | $thumb = 0 if $line =~ /\.code\s+32|\.arm/; |
| 407 | |
| 408 | # handle ldr <reg>, =<expr> |
| 409 | if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) { |
| 410 | my $label = $literal_labels{$3}; |
| 411 | if (!$label) { |
| 412 | $label = "Literal_$literal_num"; |
| 413 | $literal_num++; |
| 414 | $literal_labels{$3} = $label; |
| 415 | } |
| 416 | $line = "$1 ldr$2, $label\n"; |
| 417 | } elsif ($line =~ /\.ltorg/) { |
| 418 | $line .= ".align 2\n"; |
| 419 | foreach my $literal (keys %literal_labels) { |
| 420 | $line .= "$literal_labels{$literal}:\n .word $literal\n"; |
| 421 | } |
| 422 | %literal_labels = (); |
| 423 | } |
| 424 | |
| 425 | # thumb add with large immediate needs explicit add.w |
| 426 | if ($thumb and $line =~ /add\s+.*#([^@]+)/) { |
| 427 | $line =~ s/add/add.w/ if eval_expr($1) > 255; |
| 428 | } |
| 429 | |
| 430 | # mach-o local symbol names start with L (no dot) |
| 431 | $line =~ s/(?<!\w)\.(L\w+)/$1/g; |
| 432 | |
| 433 | if ($thumb and $line =~ /^\s*(\w+)\s*:/) { |
| 434 | $thumb_labels{$1}++; |
| 435 | } |
| 436 | |
| 437 | if ($line =~ /^\s*((\w+:)?blx?|\.globl)\s+(\w+)/) { |
| 438 | $call_targets{$3}++; |
| 439 | } |
| 440 | |
| 441 | # @l -> lo16() @ha -> ha16() |
| 442 | $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g; |
| 443 | $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g; |
| 444 | |
| 445 | # move to/from SPR |
| 446 | if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) { |
| 447 | if ($2 eq 'mt') { |
| 448 | $line = "$1${2}spr $ppc_spr{$3}, $4\n"; |
| 449 | } else { |
| 450 | $line = "$1${2}spr $4, $ppc_spr{$3}\n"; |
| 451 | } |
| 452 | } |
| 453 | |
| 454 | # old gas versions store upper and lower case names on .req, |
| 455 | # but they remove only one on .unreq |
| 456 | if ($fix_unreq) { |
| 457 | if ($line =~ /\.unreq\s+(.*)/) { |
| 458 | $line = ".unreq " . lc($1) . "\n"; |
| 459 | print ASMFILE ".unreq " . uc($1) . "\n"; |
| 460 | } |
| 461 | } |
| 462 | |
| 463 | if ($line =~ /\.rept\s+(.*)/) { |
| 464 | $num_repts = $1; |
| 465 | $rept_lines = "\n"; |
| 466 | |
| 467 | # handle the possibility of repeating another directive on the same line |
| 468 | # .endr on the same line is not valid, I don't know if a non-directive is |
| 469 | if ($num_repts =~ s/(\.\w+.*)//) { |
| 470 | $rept_lines .= "$1\n"; |
| 471 | } |
| 472 | $num_repts = eval($num_repts); |
| 473 | } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) { |
| 474 | $in_irp = 1; |
| 475 | $num_repts = 1; |
| 476 | $rept_lines = "\n"; |
| 477 | $irp_param = $1; |
| 478 | |
| 479 | # only use whitespace as the separator |
| 480 | my $irp_arglist = $2; |
| 481 | $irp_arglist =~ s/,/ /g; |
| 482 | $irp_arglist =~ s/^\s+//; |
| 483 | @irp_args = split(/\s+/, $irp_arglist); |
| 484 | } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) { |
| 485 | $in_irp = 1; |
| 486 | $num_repts = 1; |
| 487 | $rept_lines = "\n"; |
| 488 | $irp_param = $1; |
| 489 | |
| 490 | my $irp_arglist = $2; |
| 491 | $irp_arglist =~ s/,/ /g; |
| 492 | $irp_arglist =~ s/^\s+//; |
| 493 | @irp_args = split(//, $irp_arglist); |
| 494 | } elsif ($line =~ /\.endr/) { |
| 495 | if ($in_irp != 0) { |
| 496 | foreach my $i (@irp_args) { |
| 497 | my $line = $rept_lines; |
| 498 | $line =~ s/\\$irp_param/$i/g; |
| 499 | $line =~ s/\\\(\)//g; # remove \() |
| 500 | print ASMFILE $line; |
| 501 | } |
| 502 | } else { |
| 503 | for (1 .. $num_repts) { |
| 504 | print ASMFILE $rept_lines; |
| 505 | } |
| 506 | } |
| 507 | $rept_lines = ''; |
| 508 | $in_irp = 0; |
| 509 | @irp_args = ''; |
| 510 | } elsif ($rept_lines) { |
| 511 | $rept_lines .= $line; |
| 512 | } else { |
| 513 | print ASMFILE $line; |
| 514 | } |
| 515 | } |
| 516 | |
| 517 | print ASMFILE ".text\n"; |
| 518 | print ASMFILE ".align 2\n"; |
| 519 | foreach my $literal (keys %literal_labels) { |
| 520 | print ASMFILE "$literal_labels{$literal}:\n .word $literal\n"; |
| 521 | } |
| 522 | |
| 523 | map print(ASMFILE ".thumb_func $_\n"), |
| 524 | grep exists $thumb_labels{$_}, keys %call_targets; |
| 525 | |
| 526 | close(ASMFILE) or exit 1; |
| 527 | #exit 1 |