3 # This code is licensed under GPLv2 or later; go to gnu.org to read it
4 # (not that it much matters for an asm preprocessor)
5 # usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
8 # Apple's gas is ancient and doesn't support modern preprocessing features like
9 # .rept and has ugly macro syntax, among other things. Thus, this script
10 # implements the subset of the gas preprocessor used by x264 and ffmpeg
11 # that isn't supported by Apple's gas.
16 my $fix_unreq = $^O eq "darwin";
18 if ($gcc_cmd[0] eq "-fix-unreq") {
21 } elsif ($gcc_cmd[0] eq "-no-fix-unreq") {
26 if (grep /\.c$/, @gcc_cmd) {
27 # C file (inline asm?) - compile
28 @preprocess_c_cmd = (@gcc_cmd, "-S");
29 } elsif (grep /\.[sS]$/, @gcc_cmd) {
30 # asm file, just do C preprocessor
31 @preprocess_c_cmd = (@gcc_cmd, "-E");
33 die "Unrecognized input filetype";
36 # if compiling, avoid creating an output file named '-.o'
37 if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
38 foreach my $i (@gcc_cmd) {
39 if ($i =~ /\.[csS]$/) {
41 $outputfile =~ s/\.[csS]$/.o/;
43 push(@gcc_cmd, $outputfile);
48 @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
49 @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
53 # detect architecture from gcc binary name
54 if ($gcc_cmd[0] =~ /arm/) {
56 } elsif ($gcc_cmd[0] =~ /powerpc|ppc/) {
61 foreach my $i (1 .. $#gcc_cmd-1) {
62 if ($gcc_cmd[$i] eq "-arch") {
63 if ($gcc_cmd[$i+1] =~ /arm/) {
65 } elsif ($gcc_cmd[$i+1] =~ /powerpc|ppc/) {
71 # assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
73 my $native_arch = qx/arch/;
74 if ($native_arch =~ /arm/) {
76 } elsif ($native_arch =~ /powerpc|ppc/) {
82 die "Unable to identify target architecture";
85 my %ppc_spr = (ctr => 9,
88 open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
90 my $current_macro = '';
94 my %macro_args_default;
103 # pass 1: parse .macro
104 # note that the handling of arguments is probably overly permissive vs. gas
105 # but it should be the same for valid cases
107 # remove all comments (to avoid interfering with evaluating directives)
110 # comment out unsupported directives
111 s/\.type/$comm.type/x;
112 s/\.func/$comm.func/x;
113 s/\.endfunc/$comm.endfunc/x;
114 s/\.ltorg/$comm.ltorg/x;
115 s/\.size/$comm.size/x;
117 s/\.arch/$comm.arch/x;
118 s/\.object_arch/$comm.object_arch/x;
120 # the syntax for these is a little different
122 # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
123 s/(.*)\.rodata/.const_data/x;
128 # catch unknown section names that aren't mach-o style (with a comma)
129 if (/.section ([^,]*)$/) {
130 die ".section $1 unsupported; figure out the mach-o section name and add it";
138 $expr =~ s/([A-Za-z._][A-Za-z0-9._]*)/$symbols{$1}/g;
144 # handle .if directives; apple's assembler doesn't support important non-basic ones
145 # evaluating them is also needed to handle recursive macros
146 if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) {
147 my $result = $1 eq "n";
153 $result ^= $expr eq "";
154 } elsif ($type eq "c") {
155 if ($expr =~ /(.*)\s*,\s*(.*)/) {
158 die "argument to .ifc not recognized";
160 } elsif ($type eq "") {
161 $result ^= eval_expr($expr) != 0;
162 } elsif ($type eq "eq") {
163 $result = eval_expr($expr) == 0;
164 } elsif ($type eq "lt") {
165 $result = eval_expr($expr) < 0;
168 die "unhandled .if varient. \"$line\"";
170 push (@ifstack, $result);
180 # evaluate .if blocks
181 if (scalar(@ifstack)) {
185 } elsif ($line =~ /\.elseif\s+(.*)/) {
186 if ($ifstack[-1] == 0) {
187 $ifstack[-1] = !!eval_expr($1);
188 } elsif ($ifstack[-1] > 0) {
189 $ifstack[-1] = -$ifstack[-1];
193 $ifstack[-1] = !$ifstack[-1];
195 } elsif (handle_if($line)) {
199 # discard lines in false .if blocks
200 foreach my $i (0 .. $#ifstack) {
201 if ($ifstack[$i] <= 0) {
209 if ($macro_level > 1 && !$current_macro) {
210 die "nested macros but we don't have master macro";
214 if ($macro_level < 0) {
215 die "unmatched .endm";
216 } elsif ($macro_level == 0) {
222 if ($macro_level > 1) {
223 push(@{$macro_lines{$current_macro}}, $line);
224 } elsif ($macro_level == 0) {
225 expand_macros($line);
227 if ($line =~ /\.macro\s+([\d\w\.]+)\s*(.*)/) {
230 # commas in the argument list are optional, so only use whitespace as the separator
234 my @args = split(/\s+/, $arglist);
235 foreach my $i (0 .. $#args) {
236 my @argpair = split(/=/, $args[$i]);
237 $macro_args{$current_macro}[$i] = $argpair[0];
238 $argpair[0] =~ s/:vararg$//;
239 $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
241 # ensure %macro_lines has the macro name added as a key
242 $macro_lines{$current_macro} = [];
244 } elsif ($current_macro) {
245 push(@{$macro_lines{$current_macro}}, $line);
247 die "macro level without a macro name";
255 # handle .if directives; apple's assembler doesn't support important non-basic ones
256 # evaluating them is also needed to handle recursive macros
257 if (handle_if($line)) {
261 if (/\.purgem\s+([\d\w\.]+)/) {
262 delete $macro_lines{$1};
263 delete $macro_args{$1};
264 delete $macro_args_default{$1};
268 if ($line =~ /\.altmacro/) {
273 if ($line =~ /\.noaltmacro/) {
278 $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro;
280 if ($line =~ /\.set\s+(.*),\s*(.*)/) {
281 $symbols{$1} = eval_expr($2);
284 if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
285 push(@pass1_lines, $1);
288 # commas are optional here too, but are syntactically important because
289 # parameters can be blank
290 my @arglist = split(/,/, $3);
294 my $comma_sep_required = 0;
296 # allow arithmetic/shift operators in macro arguments
297 $_ =~ s/\s*(\+|-|\*|\/|<<|>>)\s*/$1/g;
299 my @whitespace_split = split(/\s+/, $_);
300 if (!@whitespace_split) {
302 push(@args_seperator, '');
304 foreach (@whitespace_split) {
305 #print ("arglist = \"$_\"\n");
308 my $sep = $comma_sep_required ? "," : " ";
309 push(@args_seperator, $sep);
310 #print ("sep = \"$sep\", arg = \"$_\"\n");
311 $comma_sep_required = 0;
316 $comma_sep_required = 1;
320 if ($macro_args_default{$macro}){
321 %replacements = %{$macro_args_default{$macro}};
324 # construct hashtable of text to replace
325 foreach my $i (0 .. $#args) {
326 my $argname = $macro_args{$macro}[$i];
327 my @macro_args = @{ $macro_args{$macro} };
328 if ($args[$i] =~ m/=/) {
329 # arg=val references the argument name
330 # XXX: I'm not sure what the expected behaviour if a lot of
331 # these are mixed with unnamed args
332 my @named_arg = split(/=/, $args[$i]);
333 $replacements{$named_arg[0]} = $named_arg[1];
334 } elsif ($i > $#{$macro_args{$macro}}) {
335 # more args given than the macro has named args
336 # XXX: is vararg allowed on arguments before the last?
337 $argname = $macro_args{$macro}[-1];
338 if ($argname =~ s/:vararg$//) {
339 #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n";
340 #$replacements{$argname} .= ", $args[$i]";
341 $replacements{$argname} .= "$args_seperator[$i] $args[$i]";
343 die "Too many arguments to macro $macro";
346 $argname =~ s/:vararg$//;
347 $replacements{$argname} = $args[$i];
351 my $count = $macro_count++;
353 # apply replacements as regex
354 foreach (@{$macro_lines{$macro}}) {
356 # do replacements by longest first, this avoids wrong replacement
357 # when argument names are subsets of each other
358 foreach (reverse sort {length $a <=> length $b} keys %replacements) {
359 $macro_line =~ s/\\$_/$replacements{$_}/g;
361 $macro_line =~ s/\\\@/$count/g;
362 $macro_line =~ s/\\\(\)//g; # remove \()
363 parse_line($macro_line);
366 push(@pass1_lines, $line);
370 close(ASMFILE) or exit 1;
371 open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
372 #open(ASMFILE, ">/tmp/a.S") or die "Error running assembler";
378 my %literal_labels; # for ldr <reg>, =<expr>
390 # pass 2: parse .rept and .if variants
391 # NOTE: since we don't implement a proper parser, using .rept with a
392 # variable assigned from .set is not supported
393 foreach my $line (@pass1_lines) {
394 # handle .previous (only with regard to .section not .subsection)
395 if ($line =~ /\.(section|text|const_data)/) {
396 push(@sections, $line);
397 } elsif ($line =~ /\.previous/) {
398 if (!$sections[-2]) {
399 die ".previous without a previous section";
401 $line = $sections[-2];
402 push(@sections, $line);
405 $thumb = 1 if $line =~ /\.code\s+16|\.thumb/;
406 $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
408 # handle ldr <reg>, =<expr>
409 if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
410 my $label = $literal_labels{$3};
412 $label = "Literal_$literal_num";
414 $literal_labels{$3} = $label;
416 $line = "$1 ldr$2, $label\n";
417 } elsif ($line =~ /\.ltorg/) {
418 $line .= ".align 2\n";
419 foreach my $literal (keys %literal_labels) {
420 $line .= "$literal_labels{$literal}:\n .word $literal\n";
422 %literal_labels = ();
425 # thumb add with large immediate needs explicit add.w
426 if ($thumb and $line =~ /add\s+.*#([^@]+)/) {
427 $line =~ s/add/add.w/ if eval_expr($1) > 255;
430 # mach-o local symbol names start with L (no dot)
431 $line =~ s/(?<!\w)\.(L\w+)/$1/g;
433 if ($thumb and $line =~ /^\s*(\w+)\s*:/) {
437 if ($line =~ /^\s*((\w+:)?blx?|\.globl)\s+(\w+)/) {
441 # @l -> lo16() @ha -> ha16()
442 $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g;
443 $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g;
446 if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) {
448 $line = "$1${2}spr $ppc_spr{$3}, $4\n";
450 $line = "$1${2}spr $4, $ppc_spr{$3}\n";
454 # old gas versions store upper and lower case names on .req,
455 # but they remove only one on .unreq
457 if ($line =~ /\.unreq\s+(.*)/) {
458 $line = ".unreq " . lc($1) . "\n";
459 print ASMFILE ".unreq " . uc($1) . "\n";
463 if ($line =~ /\.rept\s+(.*)/) {
467 # handle the possibility of repeating another directive on the same line
468 # .endr on the same line is not valid, I don't know if a non-directive is
469 if ($num_repts =~ s/(\.\w+.*)//) {
470 $rept_lines .= "$1\n";
472 $num_repts = eval($num_repts);
473 } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) {
479 # only use whitespace as the separator
480 my $irp_arglist = $2;
481 $irp_arglist =~ s/,/ /g;
482 $irp_arglist =~ s/^\s+//;
483 @irp_args = split(/\s+/, $irp_arglist);
484 } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) {
490 my $irp_arglist = $2;
491 $irp_arglist =~ s/,/ /g;
492 $irp_arglist =~ s/^\s+//;
493 @irp_args = split(//, $irp_arglist);
494 } elsif ($line =~ /\.endr/) {
496 foreach my $i (@irp_args) {
497 my $line = $rept_lines;
498 $line =~ s/\\$irp_param/$i/g;
499 $line =~ s/\\\(\)//g; # remove \()
503 for (1 .. $num_repts) {
504 print ASMFILE $rept_lines;
510 } elsif ($rept_lines) {
511 $rept_lines .= $line;
517 print ASMFILE ".text\n";
518 print ASMFILE ".align 2\n";
519 foreach my $literal (keys %literal_labels) {
520 print ASMFILE "$literal_labels{$literal}:\n .word $literal\n";
523 map print(ASMFILE ".thumb_func $_\n"),
524 grep exists $thumb_labels{$_}, keys %call_targets;
526 close(ASMFILE) or exit 1;