8ad07683 |
1 | #!/usr/bin/env perl |
2 | # by David Conrad |
3 | # This code is licensed under GPLv2 or later; go to gnu.org to read it |
4 | # (not that it much matters for an asm preprocessor) |
5 | # usage: set your assembler to be something like "perl gas-preprocessor.pl gcc" |
6 | use strict; |
7 | |
8 | # Apple's gas is ancient and doesn't support modern preprocessing features like |
9 | # .rept and has ugly macro syntax, among other things. Thus, this script |
10 | # implements the subset of the gas preprocessor used by x264 and ffmpeg |
11 | # that isn't supported by Apple's gas. |
12 | |
13 | my @gcc_cmd = @ARGV; |
14 | my @preprocess_c_cmd; |
15 | |
16 | my $fix_unreq = $^O eq "darwin"; |
17 | |
18 | if ($gcc_cmd[0] eq "-fix-unreq") { |
19 | $fix_unreq = 1; |
20 | shift @gcc_cmd; |
21 | } elsif ($gcc_cmd[0] eq "-no-fix-unreq") { |
22 | $fix_unreq = 0; |
23 | shift @gcc_cmd; |
24 | } |
25 | |
26 | if (grep /\.c$/, @gcc_cmd) { |
27 | # C file (inline asm?) - compile |
28 | @preprocess_c_cmd = (@gcc_cmd, "-S"); |
29 | } elsif (grep /\.[sS]$/, @gcc_cmd) { |
30 | # asm file, just do C preprocessor |
31 | @preprocess_c_cmd = (@gcc_cmd, "-E"); |
32 | } else { |
33 | die "Unrecognized input filetype"; |
34 | } |
35 | |
36 | # if compiling, avoid creating an output file named '-.o' |
37 | if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) { |
38 | foreach my $i (@gcc_cmd) { |
39 | if ($i =~ /\.[csS]$/) { |
40 | my $outputfile = $i; |
41 | $outputfile =~ s/\.[csS]$/.o/; |
42 | push(@gcc_cmd, "-o"); |
43 | push(@gcc_cmd, $outputfile); |
44 | last; |
45 | } |
46 | } |
47 | } |
48 | @gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd; |
49 | @preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd; |
50 | |
51 | my $comm; |
52 | |
53 | # detect architecture from gcc binary name |
54 | if ($gcc_cmd[0] =~ /arm/) { |
55 | $comm = '@'; |
56 | } elsif ($gcc_cmd[0] =~ /powerpc|ppc/) { |
57 | $comm = '#'; |
58 | } |
59 | |
60 | # look for -arch flag |
61 | foreach my $i (1 .. $#gcc_cmd-1) { |
62 | if ($gcc_cmd[$i] eq "-arch") { |
63 | if ($gcc_cmd[$i+1] =~ /arm/) { |
64 | $comm = '@'; |
65 | } elsif ($gcc_cmd[$i+1] =~ /powerpc|ppc/) { |
66 | $comm = '#'; |
67 | } |
68 | } |
69 | } |
70 | |
71 | # assume we're not cross-compiling if no -arch or the binary doesn't have the arch name |
72 | if (!$comm) { |
73 | my $native_arch = qx/arch/; |
74 | if ($native_arch =~ /arm/) { |
75 | $comm = '@'; |
76 | } elsif ($native_arch =~ /powerpc|ppc/) { |
77 | $comm = '#'; |
78 | } |
79 | } |
80 | |
81 | if (!$comm) { |
82 | die "Unable to identify target architecture"; |
83 | } |
84 | |
85 | my %ppc_spr = (ctr => 9, |
86 | vrsave => 256); |
87 | |
88 | open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor"; |
89 | |
90 | my $current_macro = ''; |
91 | my $macro_level = 0; |
92 | my %macro_lines; |
93 | my %macro_args; |
94 | my %macro_args_default; |
95 | my $macro_count = 0; |
96 | my $altmacro = 0; |
97 | |
98 | my @pass1_lines; |
99 | my @ifstack; |
100 | |
101 | my %symbols; |
102 | |
103 | # pass 1: parse .macro |
104 | # note that the handling of arguments is probably overly permissive vs. gas |
105 | # but it should be the same for valid cases |
106 | while (<ASMFILE>) { |
107 | # remove all comments (to avoid interfering with evaluating directives) |
108 | s/(?<!\\)$comm.*//x; |
109 | |
110 | # comment out unsupported directives |
111 | s/\.type/$comm.type/x; |
112 | s/\.func/$comm.func/x; |
113 | s/\.endfunc/$comm.endfunc/x; |
114 | s/\.ltorg/$comm.ltorg/x; |
115 | s/\.size/$comm.size/x; |
116 | s/\.fpu/$comm.fpu/x; |
117 | s/\.arch/$comm.arch/x; |
118 | s/\.object_arch/$comm.object_arch/x; |
119 | |
120 | # the syntax for these is a little different |
121 | s/\.global/.globl/x; |
122 | # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const |
123 | s/(.*)\.rodata/.const_data/x; |
124 | s/\.int/.long/x; |
125 | s/\.float/.single/x; |
126 | |
127 | # catch unknown section names that aren't mach-o style (with a comma) |
128 | if (/.section ([^,]*)$/) { |
129 | die ".section $1 unsupported; figure out the mach-o section name and add it"; |
130 | } |
131 | |
132 | parse_line($_); |
133 | } |
134 | |
135 | sub eval_expr { |
136 | my $expr = $_[0]; |
137 | $expr =~ s/([A-Za-z._][A-Za-z0-9._]*)/$symbols{$1}/g; |
138 | eval $expr; |
139 | } |
140 | |
141 | sub handle_if { |
142 | my $line = $_[0]; |
143 | # handle .if directives; apple's assembler doesn't support important non-basic ones |
144 | # evaluating them is also needed to handle recursive macros |
145 | if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) { |
146 | my $result = $1 eq "n"; |
147 | my $type = $2; |
148 | my $expr = $3; |
149 | |
150 | if ($type eq "b") { |
151 | $expr =~ s/\s//g; |
152 | $result ^= $expr eq ""; |
153 | } elsif ($type eq "c") { |
154 | if ($expr =~ /(.*)\s*,\s*(.*)/) { |
155 | $result ^= $1 eq $2; |
156 | } else { |
157 | die "argument to .ifc not recognized"; |
158 | } |
159 | } elsif ($type eq "") { |
160 | $result ^= eval_expr($expr) != 0; |
161 | } elsif ($type eq "eq") { |
162 | $result = eval_expr($expr) == 0; |
163 | } elsif ($type eq "lt") { |
164 | $result = eval_expr($expr) < 0; |
165 | } else { |
166 | chomp($line); |
167 | die "unhandled .if varient. \"$line\""; |
168 | } |
169 | push (@ifstack, $result); |
170 | return 1; |
171 | } else { |
172 | return 0; |
173 | } |
174 | } |
175 | |
176 | sub parse_line { |
177 | my $line = @_[0]; |
178 | |
179 | # evaluate .if blocks |
180 | if (scalar(@ifstack)) { |
181 | if (/\.endif/) { |
182 | pop(@ifstack); |
183 | return; |
184 | } elsif ($line =~ /\.elseif\s+(.*)/) { |
185 | if ($ifstack[-1] == 0) { |
186 | $ifstack[-1] = !!eval_expr($1); |
187 | } elsif ($ifstack[-1] > 0) { |
188 | $ifstack[-1] = -$ifstack[-1]; |
189 | } |
190 | return; |
191 | } elsif (/\.else/) { |
192 | $ifstack[-1] = !$ifstack[-1]; |
193 | return; |
194 | } elsif (handle_if($line)) { |
195 | return; |
196 | } |
197 | |
198 | # discard lines in false .if blocks |
199 | foreach my $i (0 .. $#ifstack) { |
200 | if ($ifstack[$i] <= 0) { |
201 | return; |
202 | } |
203 | } |
204 | } |
205 | |
206 | if (/\.macro/) { |
207 | $macro_level++; |
208 | if ($macro_level > 1 && !$current_macro) { |
209 | die "nested macros but we don't have master macro"; |
210 | } |
211 | } elsif (/\.endm/) { |
212 | $macro_level--; |
213 | if ($macro_level < 0) { |
214 | die "unmatched .endm"; |
215 | } elsif ($macro_level == 0) { |
216 | $current_macro = ''; |
217 | return; |
218 | } |
219 | } |
220 | |
221 | if ($macro_level > 1) { |
222 | push(@{$macro_lines{$current_macro}}, $line); |
223 | } elsif ($macro_level == 0) { |
224 | expand_macros($line); |
225 | } else { |
226 | if ($line =~ /\.macro\s+([\d\w\.]+)\s*(.*)/) { |
227 | $current_macro = $1; |
228 | |
229 | # commas in the argument list are optional, so only use whitespace as the separator |
230 | my $arglist = $2; |
231 | $arglist =~ s/,/ /g; |
232 | |
233 | my @args = split(/\s+/, $arglist); |
234 | foreach my $i (0 .. $#args) { |
235 | my @argpair = split(/=/, $args[$i]); |
236 | $macro_args{$current_macro}[$i] = $argpair[0]; |
237 | $argpair[0] =~ s/:vararg$//; |
238 | $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1]; |
239 | } |
240 | # ensure %macro_lines has the macro name added as a key |
241 | $macro_lines{$current_macro} = []; |
242 | |
243 | } elsif ($current_macro) { |
244 | push(@{$macro_lines{$current_macro}}, $line); |
245 | } else { |
246 | die "macro level without a macro name"; |
247 | } |
248 | } |
249 | } |
250 | |
251 | sub expand_macros { |
252 | my $line = @_[0]; |
253 | |
254 | # handle .if directives; apple's assembler doesn't support important non-basic ones |
255 | # evaluating them is also needed to handle recursive macros |
256 | if (handle_if($line)) { |
257 | return; |
258 | } |
259 | |
260 | if (/\.purgem\s+([\d\w\.]+)/) { |
261 | delete $macro_lines{$1}; |
262 | delete $macro_args{$1}; |
263 | delete $macro_args_default{$1}; |
264 | return; |
265 | } |
266 | |
267 | if ($line =~ /\.altmacro/) { |
268 | $altmacro = 1; |
269 | return; |
270 | } |
271 | |
272 | if ($line =~ /\.noaltmacro/) { |
273 | $altmacro = 0; |
274 | return; |
275 | } |
276 | |
277 | $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro; |
278 | |
279 | if ($line =~ /\.set\s+(.*),\s*(.*)/) { |
280 | $symbols{$1} = eval_expr($2); |
281 | } |
282 | |
283 | if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) { |
284 | push(@pass1_lines, $1); |
285 | my $macro = $2; |
286 | |
287 | # commas are optional here too, but are syntactically important because |
288 | # parameters can be blank |
289 | my @arglist = split(/,/, $3); |
290 | my @args; |
291 | my @args_seperator; |
292 | |
293 | my $comma_sep_required = 0; |
294 | foreach (@arglist) { |
295 | # allow arithmetic/shift operators in macro arguments |
296 | $_ =~ s/\s*(\+|-|\*|\/|<<|>>)\s*/$1/g; |
297 | |
298 | my @whitespace_split = split(/\s+/, $_); |
299 | if (!@whitespace_split) { |
300 | push(@args, ''); |
301 | push(@args_seperator, ''); |
302 | } else { |
303 | foreach (@whitespace_split) { |
304 | #print ("arglist = \"$_\"\n"); |
305 | if (length($_)) { |
306 | push(@args, $_); |
307 | my $sep = $comma_sep_required ? "," : " "; |
308 | push(@args_seperator, $sep); |
309 | #print ("sep = \"$sep\", arg = \"$_\"\n"); |
310 | $comma_sep_required = 0; |
311 | } |
312 | } |
313 | } |
314 | |
315 | $comma_sep_required = 1; |
316 | } |
317 | |
318 | my %replacements; |
319 | if ($macro_args_default{$macro}){ |
320 | %replacements = %{$macro_args_default{$macro}}; |
321 | } |
322 | |
323 | # construct hashtable of text to replace |
324 | foreach my $i (0 .. $#args) { |
325 | my $argname = $macro_args{$macro}[$i]; |
326 | my @macro_args = @{ $macro_args{$macro} }; |
327 | if ($args[$i] =~ m/=/) { |
328 | # arg=val references the argument name |
329 | # XXX: I'm not sure what the expected behaviour if a lot of |
330 | # these are mixed with unnamed args |
331 | my @named_arg = split(/=/, $args[$i]); |
332 | $replacements{$named_arg[0]} = $named_arg[1]; |
333 | } elsif ($i > $#{$macro_args{$macro}}) { |
334 | # more args given than the macro has named args |
335 | # XXX: is vararg allowed on arguments before the last? |
336 | $argname = $macro_args{$macro}[-1]; |
337 | if ($argname =~ s/:vararg$//) { |
338 | #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n"; |
339 | #$replacements{$argname} .= ", $args[$i]"; |
340 | $replacements{$argname} .= "$args_seperator[$i] $args[$i]"; |
341 | } else { |
342 | die "Too many arguments to macro $macro"; |
343 | } |
344 | } else { |
345 | $argname =~ s/:vararg$//; |
346 | $replacements{$argname} = $args[$i]; |
347 | } |
348 | } |
349 | |
350 | my $count = $macro_count++; |
351 | |
352 | # apply replacements as regex |
353 | foreach (@{$macro_lines{$macro}}) { |
354 | my $macro_line = $_; |
355 | # do replacements by longest first, this avoids wrong replacement |
356 | # when argument names are subsets of each other |
357 | foreach (reverse sort {length $a <=> length $b} keys %replacements) { |
358 | $macro_line =~ s/\\$_/$replacements{$_}/g; |
359 | } |
360 | $macro_line =~ s/\\\@/$count/g; |
361 | $macro_line =~ s/\\\(\)//g; # remove \() |
362 | parse_line($macro_line); |
363 | } |
364 | } else { |
365 | push(@pass1_lines, $line); |
366 | } |
367 | } |
368 | |
369 | close(ASMFILE) or exit 1; |
370 | open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler"; |
371 | #open(ASMFILE, ">/tmp/a.S") or die "Error running assembler"; |
372 | |
373 | my @sections; |
374 | my $num_repts; |
375 | my $rept_lines; |
376 | |
377 | my %literal_labels; # for ldr <reg>, =<expr> |
378 | my $literal_num = 0; |
379 | |
380 | my $thumb = 0; |
381 | |
382 | my %thumb_labels; |
383 | my %call_targets; |
384 | |
385 | my $in_irp = 0; |
386 | my @irp_args; |
387 | my $irp_param; |
388 | |
389 | # pass 2: parse .rept and .if variants |
390 | # NOTE: since we don't implement a proper parser, using .rept with a |
391 | # variable assigned from .set is not supported |
392 | foreach my $line (@pass1_lines) { |
393 | # handle .previous (only with regard to .section not .subsection) |
394 | if ($line =~ /\.(section|text|const_data)/) { |
395 | push(@sections, $line); |
396 | } elsif ($line =~ /\.previous/) { |
397 | if (!$sections[-2]) { |
398 | die ".previous without a previous section"; |
399 | } |
400 | $line = $sections[-2]; |
401 | push(@sections, $line); |
402 | } |
403 | |
404 | $thumb = 1 if $line =~ /\.code\s+16|\.thumb/; |
405 | $thumb = 0 if $line =~ /\.code\s+32|\.arm/; |
406 | |
407 | # handle ldr <reg>, =<expr> |
408 | if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) { |
409 | my $label = $literal_labels{$3}; |
410 | if (!$label) { |
411 | $label = "Literal_$literal_num"; |
412 | $literal_num++; |
413 | $literal_labels{$3} = $label; |
414 | } |
415 | $line = "$1 ldr$2, $label\n"; |
416 | } elsif ($line =~ /\.ltorg/) { |
417 | $line .= ".align 2\n"; |
418 | foreach my $literal (keys %literal_labels) { |
419 | $line .= "$literal_labels{$literal}:\n .word $literal\n"; |
420 | } |
421 | %literal_labels = (); |
422 | } |
423 | |
424 | # thumb add with large immediate needs explicit add.w |
425 | if ($thumb and $line =~ /add\s+.*#([^@]+)/) { |
426 | $line =~ s/add/add.w/ if eval_expr($1) > 255; |
427 | } |
428 | |
429 | # mach-o local symbol names start with L (no dot) |
430 | $line =~ s/(?<!\w)\.(L\w+)/$1/g; |
431 | |
432 | if ($thumb and $line =~ /^\s*(\w+)\s*:/) { |
433 | $thumb_labels{$1}++; |
434 | } |
435 | |
436 | if ($line =~ /^\s*((\w+:)?blx?|\.globl)\s+(\w+)/) { |
437 | $call_targets{$3}++; |
438 | } |
439 | |
440 | # @l -> lo16() @ha -> ha16() |
441 | $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g; |
442 | $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g; |
443 | |
444 | # move to/from SPR |
445 | if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) { |
446 | if ($2 eq 'mt') { |
447 | $line = "$1${2}spr $ppc_spr{$3}, $4\n"; |
448 | } else { |
449 | $line = "$1${2}spr $4, $ppc_spr{$3}\n"; |
450 | } |
451 | } |
452 | |
453 | # old gas versions store upper and lower case names on .req, |
454 | # but they remove only one on .unreq |
455 | if ($fix_unreq) { |
456 | if ($line =~ /\.unreq\s+(.*)/) { |
457 | $line = ".unreq " . lc($1) . "\n"; |
458 | print ASMFILE ".unreq " . uc($1) . "\n"; |
459 | } |
460 | } |
461 | |
462 | if ($line =~ /\.rept\s+(.*)/) { |
463 | $num_repts = $1; |
464 | $rept_lines = "\n"; |
465 | |
466 | # handle the possibility of repeating another directive on the same line |
467 | # .endr on the same line is not valid, I don't know if a non-directive is |
468 | if ($num_repts =~ s/(\.\w+.*)//) { |
469 | $rept_lines .= "$1\n"; |
470 | } |
471 | $num_repts = eval($num_repts); |
472 | } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) { |
473 | $in_irp = 1; |
474 | $num_repts = 1; |
475 | $rept_lines = "\n"; |
476 | $irp_param = $1; |
477 | |
478 | # only use whitespace as the separator |
479 | my $irp_arglist = $2; |
480 | $irp_arglist =~ s/,/ /g; |
481 | $irp_arglist =~ s/^\s+//; |
482 | @irp_args = split(/\s+/, $irp_arglist); |
483 | } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) { |
484 | $in_irp = 1; |
485 | $num_repts = 1; |
486 | $rept_lines = "\n"; |
487 | $irp_param = $1; |
488 | |
489 | my $irp_arglist = $2; |
490 | $irp_arglist =~ s/,/ /g; |
491 | $irp_arglist =~ s/^\s+//; |
492 | @irp_args = split(//, $irp_arglist); |
493 | } elsif ($line =~ /\.endr/) { |
494 | if ($in_irp != 0) { |
495 | foreach my $i (@irp_args) { |
496 | my $line = $rept_lines; |
497 | $line =~ s/\\$irp_param/$i/g; |
498 | $line =~ s/\\\(\)//g; # remove \() |
499 | print ASMFILE $line; |
500 | } |
501 | } else { |
502 | for (1 .. $num_repts) { |
503 | print ASMFILE $rept_lines; |
504 | } |
505 | } |
506 | $rept_lines = ''; |
507 | $in_irp = 0; |
508 | @irp_args = ''; |
509 | } elsif ($rept_lines) { |
510 | $rept_lines .= $line; |
511 | } else { |
512 | print ASMFILE $line; |
513 | } |
514 | } |
515 | |
516 | print ASMFILE ".text\n"; |
517 | print ASMFILE ".align 2\n"; |
518 | foreach my $literal (keys %literal_labels) { |
519 | print ASMFILE "$literal_labels{$literal}:\n .word $literal\n"; |
520 | } |
521 | |
522 | map print(ASMFILE ".thumb_func $_\n"), |
523 | grep exists $thumb_labels{$_}, keys %call_targets; |
524 | |
525 | close(ASMFILE) or exit 1; |
526 | #exit 1 |