use gas-preprocessor for ios
[pcsx_rearmed.git] / tools / gas-preprocessor.pl
... / ...
CommitLineData
1#!/usr/bin/env perl
2# by David Conrad
3# This code is licensed under GPLv2 or later; go to gnu.org to read it
4# (not that it much matters for an asm preprocessor)
5# usage: set your assembler to be something like "perl gas-preprocessor.pl gcc"
6use strict;
7
8# Apple's gas is ancient and doesn't support modern preprocessing features like
9# .rept and has ugly macro syntax, among other things. Thus, this script
10# implements the subset of the gas preprocessor used by x264 and ffmpeg
11# that isn't supported by Apple's gas.
12
13my @gcc_cmd = @ARGV;
14my @preprocess_c_cmd;
15
16my $fix_unreq = $^O eq "darwin";
17
18if ($gcc_cmd[0] eq "-fix-unreq") {
19 $fix_unreq = 1;
20 shift @gcc_cmd;
21} elsif ($gcc_cmd[0] eq "-no-fix-unreq") {
22 $fix_unreq = 0;
23 shift @gcc_cmd;
24}
25
26if (grep /\.c$/, @gcc_cmd) {
27 # C file (inline asm?) - compile
28 @preprocess_c_cmd = (@gcc_cmd, "-S");
29} elsif (grep /\.[sS]$/, @gcc_cmd) {
30 # asm file, just do C preprocessor
31 @preprocess_c_cmd = (@gcc_cmd, "-E");
32} else {
33 die "Unrecognized input filetype";
34}
35
36# if compiling, avoid creating an output file named '-.o'
37if ((grep /^-c$/, @gcc_cmd) && !(grep /^-o/, @gcc_cmd)) {
38 foreach my $i (@gcc_cmd) {
39 if ($i =~ /\.[csS]$/) {
40 my $outputfile = $i;
41 $outputfile =~ s/\.[csS]$/.o/;
42 push(@gcc_cmd, "-o");
43 push(@gcc_cmd, $outputfile);
44 last;
45 }
46 }
47}
48@gcc_cmd = map { /\.[csS]$/ ? qw(-x assembler -) : $_ } @gcc_cmd;
49@preprocess_c_cmd = map { /\.o$/ ? "-" : $_ } @preprocess_c_cmd;
50
51my $comm;
52
53# detect architecture from gcc binary name
54if ($gcc_cmd[0] =~ /arm/) {
55 $comm = '@';
56} elsif ($gcc_cmd[0] =~ /powerpc|ppc/) {
57 $comm = '#';
58}
59
60# look for -arch flag
61foreach my $i (1 .. $#gcc_cmd-1) {
62 if ($gcc_cmd[$i] eq "-arch") {
63 if ($gcc_cmd[$i+1] =~ /arm/) {
64 $comm = '@';
65 } elsif ($gcc_cmd[$i+1] =~ /powerpc|ppc/) {
66 $comm = '#';
67 }
68 }
69}
70
71# assume we're not cross-compiling if no -arch or the binary doesn't have the arch name
72if (!$comm) {
73 my $native_arch = qx/arch/;
74 if ($native_arch =~ /arm/) {
75 $comm = '@';
76 } elsif ($native_arch =~ /powerpc|ppc/) {
77 $comm = '#';
78 }
79}
80
81if (!$comm) {
82 die "Unable to identify target architecture";
83}
84
85my %ppc_spr = (ctr => 9,
86 vrsave => 256);
87
88open(ASMFILE, "-|", @preprocess_c_cmd) || die "Error running preprocessor";
89
90my $current_macro = '';
91my $macro_level = 0;
92my %macro_lines;
93my %macro_args;
94my %macro_args_default;
95my $macro_count = 0;
96my $altmacro = 0;
97
98my @pass1_lines;
99my @ifstack;
100
101my %symbols;
102
103# pass 1: parse .macro
104# note that the handling of arguments is probably overly permissive vs. gas
105# but it should be the same for valid cases
106while (<ASMFILE>) {
107 # remove all comments (to avoid interfering with evaluating directives)
108 s/(?<!\\)$comm.*//x;
109
110 # comment out unsupported directives
111 s/\.type/$comm.type/x;
112 s/\.func/$comm.func/x;
113 s/\.endfunc/$comm.endfunc/x;
114 s/\.ltorg/$comm.ltorg/x;
115 s/\.size/$comm.size/x;
116 s/\.fpu/$comm.fpu/x;
117 s/\.arch/$comm.arch/x;
118 s/\.object_arch/$comm.object_arch/x;
119
120 # the syntax for these is a little different
121 s/\.global/.globl/x;
122 # also catch .section .rodata since the equivalent to .const_data is .section __DATA,__const
123 s/(.*)\.rodata/.const_data/x;
124 s/\.int/.long/x;
125 s/\.float/.single/x;
126
127 # catch unknown section names that aren't mach-o style (with a comma)
128 if (/.section ([^,]*)$/) {
129 die ".section $1 unsupported; figure out the mach-o section name and add it";
130 }
131
132 parse_line($_);
133}
134
135sub eval_expr {
136 my $expr = $_[0];
137 $expr =~ s/([A-Za-z._][A-Za-z0-9._]*)/$symbols{$1}/g;
138 eval $expr;
139}
140
141sub handle_if {
142 my $line = $_[0];
143 # handle .if directives; apple's assembler doesn't support important non-basic ones
144 # evaluating them is also needed to handle recursive macros
145 if ($line =~ /\.if(n?)([a-z]*)\s+(.*)/) {
146 my $result = $1 eq "n";
147 my $type = $2;
148 my $expr = $3;
149
150 if ($type eq "b") {
151 $expr =~ s/\s//g;
152 $result ^= $expr eq "";
153 } elsif ($type eq "c") {
154 if ($expr =~ /(.*)\s*,\s*(.*)/) {
155 $result ^= $1 eq $2;
156 } else {
157 die "argument to .ifc not recognized";
158 }
159 } elsif ($type eq "") {
160 $result ^= eval_expr($expr) != 0;
161 } elsif ($type eq "eq") {
162 $result = eval_expr($expr) == 0;
163 } elsif ($type eq "lt") {
164 $result = eval_expr($expr) < 0;
165 } else {
166 chomp($line);
167 die "unhandled .if varient. \"$line\"";
168 }
169 push (@ifstack, $result);
170 return 1;
171 } else {
172 return 0;
173 }
174}
175
176sub parse_line {
177 my $line = @_[0];
178
179 # evaluate .if blocks
180 if (scalar(@ifstack)) {
181 if (/\.endif/) {
182 pop(@ifstack);
183 return;
184 } elsif ($line =~ /\.elseif\s+(.*)/) {
185 if ($ifstack[-1] == 0) {
186 $ifstack[-1] = !!eval_expr($1);
187 } elsif ($ifstack[-1] > 0) {
188 $ifstack[-1] = -$ifstack[-1];
189 }
190 return;
191 } elsif (/\.else/) {
192 $ifstack[-1] = !$ifstack[-1];
193 return;
194 } elsif (handle_if($line)) {
195 return;
196 }
197
198 # discard lines in false .if blocks
199 foreach my $i (0 .. $#ifstack) {
200 if ($ifstack[$i] <= 0) {
201 return;
202 }
203 }
204 }
205
206 if (/\.macro/) {
207 $macro_level++;
208 if ($macro_level > 1 && !$current_macro) {
209 die "nested macros but we don't have master macro";
210 }
211 } elsif (/\.endm/) {
212 $macro_level--;
213 if ($macro_level < 0) {
214 die "unmatched .endm";
215 } elsif ($macro_level == 0) {
216 $current_macro = '';
217 return;
218 }
219 }
220
221 if ($macro_level > 1) {
222 push(@{$macro_lines{$current_macro}}, $line);
223 } elsif ($macro_level == 0) {
224 expand_macros($line);
225 } else {
226 if ($line =~ /\.macro\s+([\d\w\.]+)\s*(.*)/) {
227 $current_macro = $1;
228
229 # commas in the argument list are optional, so only use whitespace as the separator
230 my $arglist = $2;
231 $arglist =~ s/,/ /g;
232
233 my @args = split(/\s+/, $arglist);
234 foreach my $i (0 .. $#args) {
235 my @argpair = split(/=/, $args[$i]);
236 $macro_args{$current_macro}[$i] = $argpair[0];
237 $argpair[0] =~ s/:vararg$//;
238 $macro_args_default{$current_macro}{$argpair[0]} = $argpair[1];
239 }
240 # ensure %macro_lines has the macro name added as a key
241 $macro_lines{$current_macro} = [];
242
243 } elsif ($current_macro) {
244 push(@{$macro_lines{$current_macro}}, $line);
245 } else {
246 die "macro level without a macro name";
247 }
248 }
249}
250
251sub expand_macros {
252 my $line = @_[0];
253
254 # handle .if directives; apple's assembler doesn't support important non-basic ones
255 # evaluating them is also needed to handle recursive macros
256 if (handle_if($line)) {
257 return;
258 }
259
260 if (/\.purgem\s+([\d\w\.]+)/) {
261 delete $macro_lines{$1};
262 delete $macro_args{$1};
263 delete $macro_args_default{$1};
264 return;
265 }
266
267 if ($line =~ /\.altmacro/) {
268 $altmacro = 1;
269 return;
270 }
271
272 if ($line =~ /\.noaltmacro/) {
273 $altmacro = 0;
274 return;
275 }
276
277 $line =~ s/\%([^,]*)/eval_expr($1)/eg if $altmacro;
278
279 if ($line =~ /\.set\s+(.*),\s*(.*)/) {
280 $symbols{$1} = eval_expr($2);
281 }
282
283 if ($line =~ /(\S+:|)\s*([\w\d\.]+)\s*(.*)/ && exists $macro_lines{$2}) {
284 push(@pass1_lines, $1);
285 my $macro = $2;
286
287 # commas are optional here too, but are syntactically important because
288 # parameters can be blank
289 my @arglist = split(/,/, $3);
290 my @args;
291 my @args_seperator;
292
293 my $comma_sep_required = 0;
294 foreach (@arglist) {
295 # allow arithmetic/shift operators in macro arguments
296 $_ =~ s/\s*(\+|-|\*|\/|<<|>>)\s*/$1/g;
297
298 my @whitespace_split = split(/\s+/, $_);
299 if (!@whitespace_split) {
300 push(@args, '');
301 push(@args_seperator, '');
302 } else {
303 foreach (@whitespace_split) {
304 #print ("arglist = \"$_\"\n");
305 if (length($_)) {
306 push(@args, $_);
307 my $sep = $comma_sep_required ? "," : " ";
308 push(@args_seperator, $sep);
309 #print ("sep = \"$sep\", arg = \"$_\"\n");
310 $comma_sep_required = 0;
311 }
312 }
313 }
314
315 $comma_sep_required = 1;
316 }
317
318 my %replacements;
319 if ($macro_args_default{$macro}){
320 %replacements = %{$macro_args_default{$macro}};
321 }
322
323 # construct hashtable of text to replace
324 foreach my $i (0 .. $#args) {
325 my $argname = $macro_args{$macro}[$i];
326 my @macro_args = @{ $macro_args{$macro} };
327 if ($args[$i] =~ m/=/) {
328 # arg=val references the argument name
329 # XXX: I'm not sure what the expected behaviour if a lot of
330 # these are mixed with unnamed args
331 my @named_arg = split(/=/, $args[$i]);
332 $replacements{$named_arg[0]} = $named_arg[1];
333 } elsif ($i > $#{$macro_args{$macro}}) {
334 # more args given than the macro has named args
335 # XXX: is vararg allowed on arguments before the last?
336 $argname = $macro_args{$macro}[-1];
337 if ($argname =~ s/:vararg$//) {
338 #print "macro = $macro, args[$i] = $args[$i], args_seperator=@args_seperator, argname = $argname, arglist[$i] = $arglist[$i], arglist = @arglist, args=@args, macro_args=@macro_args\n";
339 #$replacements{$argname} .= ", $args[$i]";
340 $replacements{$argname} .= "$args_seperator[$i] $args[$i]";
341 } else {
342 die "Too many arguments to macro $macro";
343 }
344 } else {
345 $argname =~ s/:vararg$//;
346 $replacements{$argname} = $args[$i];
347 }
348 }
349
350 my $count = $macro_count++;
351
352 # apply replacements as regex
353 foreach (@{$macro_lines{$macro}}) {
354 my $macro_line = $_;
355 # do replacements by longest first, this avoids wrong replacement
356 # when argument names are subsets of each other
357 foreach (reverse sort {length $a <=> length $b} keys %replacements) {
358 $macro_line =~ s/\\$_/$replacements{$_}/g;
359 }
360 $macro_line =~ s/\\\@/$count/g;
361 $macro_line =~ s/\\\(\)//g; # remove \()
362 parse_line($macro_line);
363 }
364 } else {
365 push(@pass1_lines, $line);
366 }
367}
368
369close(ASMFILE) or exit 1;
370open(ASMFILE, "|-", @gcc_cmd) or die "Error running assembler";
371#open(ASMFILE, ">/tmp/a.S") or die "Error running assembler";
372
373my @sections;
374my $num_repts;
375my $rept_lines;
376
377my %literal_labels; # for ldr <reg>, =<expr>
378my $literal_num = 0;
379
380my $thumb = 0;
381
382my %thumb_labels;
383my %call_targets;
384
385my $in_irp = 0;
386my @irp_args;
387my $irp_param;
388
389# pass 2: parse .rept and .if variants
390# NOTE: since we don't implement a proper parser, using .rept with a
391# variable assigned from .set is not supported
392foreach my $line (@pass1_lines) {
393 # handle .previous (only with regard to .section not .subsection)
394 if ($line =~ /\.(section|text|const_data)/) {
395 push(@sections, $line);
396 } elsif ($line =~ /\.previous/) {
397 if (!$sections[-2]) {
398 die ".previous without a previous section";
399 }
400 $line = $sections[-2];
401 push(@sections, $line);
402 }
403
404 $thumb = 1 if $line =~ /\.code\s+16|\.thumb/;
405 $thumb = 0 if $line =~ /\.code\s+32|\.arm/;
406
407 # handle ldr <reg>, =<expr>
408 if ($line =~ /(.*)\s*ldr([\w\s\d]+)\s*,\s*=(.*)/) {
409 my $label = $literal_labels{$3};
410 if (!$label) {
411 $label = "Literal_$literal_num";
412 $literal_num++;
413 $literal_labels{$3} = $label;
414 }
415 $line = "$1 ldr$2, $label\n";
416 } elsif ($line =~ /\.ltorg/) {
417 $line .= ".align 2\n";
418 foreach my $literal (keys %literal_labels) {
419 $line .= "$literal_labels{$literal}:\n .word $literal\n";
420 }
421 %literal_labels = ();
422 }
423
424 # thumb add with large immediate needs explicit add.w
425 if ($thumb and $line =~ /add\s+.*#([^@]+)/) {
426 $line =~ s/add/add.w/ if eval_expr($1) > 255;
427 }
428
429 # mach-o local symbol names start with L (no dot)
430 $line =~ s/(?<!\w)\.(L\w+)/$1/g;
431
432 if ($thumb and $line =~ /^\s*(\w+)\s*:/) {
433 $thumb_labels{$1}++;
434 }
435
436 if ($line =~ /^\s*((\w+:)?blx?|\.globl)\s+(\w+)/) {
437 $call_targets{$3}++;
438 }
439
440 # @l -> lo16() @ha -> ha16()
441 $line =~ s/,\s+([^,]+)\@l\b/, lo16($1)/g;
442 $line =~ s/,\s+([^,]+)\@ha\b/, ha16($1)/g;
443
444 # move to/from SPR
445 if ($line =~ /(\s+)(m[ft])([a-z]+)\s+(\w+)/ and exists $ppc_spr{$3}) {
446 if ($2 eq 'mt') {
447 $line = "$1${2}spr $ppc_spr{$3}, $4\n";
448 } else {
449 $line = "$1${2}spr $4, $ppc_spr{$3}\n";
450 }
451 }
452
453 # old gas versions store upper and lower case names on .req,
454 # but they remove only one on .unreq
455 if ($fix_unreq) {
456 if ($line =~ /\.unreq\s+(.*)/) {
457 $line = ".unreq " . lc($1) . "\n";
458 print ASMFILE ".unreq " . uc($1) . "\n";
459 }
460 }
461
462 if ($line =~ /\.rept\s+(.*)/) {
463 $num_repts = $1;
464 $rept_lines = "\n";
465
466 # handle the possibility of repeating another directive on the same line
467 # .endr on the same line is not valid, I don't know if a non-directive is
468 if ($num_repts =~ s/(\.\w+.*)//) {
469 $rept_lines .= "$1\n";
470 }
471 $num_repts = eval($num_repts);
472 } elsif ($line =~ /\.irp\s+([\d\w\.]+)\s*(.*)/) {
473 $in_irp = 1;
474 $num_repts = 1;
475 $rept_lines = "\n";
476 $irp_param = $1;
477
478 # only use whitespace as the separator
479 my $irp_arglist = $2;
480 $irp_arglist =~ s/,/ /g;
481 $irp_arglist =~ s/^\s+//;
482 @irp_args = split(/\s+/, $irp_arglist);
483 } elsif ($line =~ /\.irpc\s+([\d\w\.]+)\s*(.*)/) {
484 $in_irp = 1;
485 $num_repts = 1;
486 $rept_lines = "\n";
487 $irp_param = $1;
488
489 my $irp_arglist = $2;
490 $irp_arglist =~ s/,/ /g;
491 $irp_arglist =~ s/^\s+//;
492 @irp_args = split(//, $irp_arglist);
493 } elsif ($line =~ /\.endr/) {
494 if ($in_irp != 0) {
495 foreach my $i (@irp_args) {
496 my $line = $rept_lines;
497 $line =~ s/\\$irp_param/$i/g;
498 $line =~ s/\\\(\)//g; # remove \()
499 print ASMFILE $line;
500 }
501 } else {
502 for (1 .. $num_repts) {
503 print ASMFILE $rept_lines;
504 }
505 }
506 $rept_lines = '';
507 $in_irp = 0;
508 @irp_args = '';
509 } elsif ($rept_lines) {
510 $rept_lines .= $line;
511 } else {
512 print ASMFILE $line;
513 }
514}
515
516print ASMFILE ".text\n";
517print ASMFILE ".align 2\n";
518foreach my $literal (keys %literal_labels) {
519 print ASMFILE "$literal_labels{$literal}:\n .word $literal\n";
520}
521
522map print(ASMFILE ".thumb_func $_\n"),
523 grep exists $thumb_labels{$_}, keys %call_targets;
524
525close(ASMFILE) or exit 1;
526#exit 1