partial ARM support
[ia32rtools.git] / tools / cvt_data.c
CommitLineData
054f95b2 1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
054f95b2 17
b545ba7c 18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
77f3a833 21static int g_cconv_novalidate;
f0be238a 22static int g_arm_mode;
b545ba7c 23
36595fd2 24// note: must be in ascending order
054f95b2 25enum dx_type {
26 DXT_UNSPEC,
27 DXT_BYTE,
28 DXT_WORD,
29 DXT_DWORD,
30 DXT_QUAD,
31 DXT_TEN,
32};
33
36595fd2 34#define anote(fmt, ...) \
35 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
b545ba7c 36#define awarn(fmt, ...) do { \
37 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
38 if (++g_warn_cnt == 10) { \
39 fcloseall(); \
40 exit(1); \
41 } \
42} while (0)
054f95b2 43#define aerr(fmt, ...) do { \
44 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 fcloseall(); \
46 exit(1); \
47} while (0)
48
49#include "masm_tools.h"
50
51static char *next_word_s(char *w, size_t wsize, char *s)
52{
53 int quote = 0;
54 size_t i;
55
56 s = sskip(s);
57
58 for (i = 0; i < wsize - 1; i++) {
59 if (s[i] == '\'')
60 quote ^= 1;
61 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
62 break;
63 w[i] = s[i];
64 }
65 w[i] = 0;
66
67 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
68 printf("warning: '%s' truncated\n", w);
69
70 return s + i;
71}
72
73static void next_section(FILE *fasm, char *name)
74{
75 char words[2][256];
76 char line[256];
77 int wordc;
78 char *p;
79
80 name[0] = 0;
81
82 while (fgets(line, sizeof(line), fasm))
83 {
84 wordc = 0;
85 asmln++;
86
87 p = sskip(line);
88 if (*p == 0)
89 continue;
90
91 if (*p == ';') {
92 while (strlen(line) == sizeof(line) - 1) {
93 // one of those long comment lines..
94 if (!fgets(line, sizeof(line), fasm))
95 break;
96 }
97 continue;
98 }
99
100 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
101 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
102 if (*p == 0 || *p == ';') {
103 wordc++;
104 break;
105 }
106 }
107
108 if (wordc < 2)
109 continue;
110
111 if (!IS(words[1], "segment"))
112 continue;
113
114 strcpy(name, words[0]);
115 break;
116 }
117}
118
119static enum dx_type parse_dx_directive(const char *name)
120{
121 if (IS(name, "dd"))
122 return DXT_DWORD;
123 if (IS(name, "dw"))
124 return DXT_WORD;
125 if (IS(name, "db"))
126 return DXT_BYTE;
127 if (IS(name, "dq"))
128 return DXT_QUAD;
129 if (IS(name, "dt"))
130 return DXT_TEN;
131
132 return DXT_UNSPEC;
133}
134
135static const char *type_name(enum dx_type type)
136{
137 switch (type) {
138 case DXT_BYTE:
139 return ".byte";
140 case DXT_WORD:
141 return ".word";
142 case DXT_DWORD:
143 return ".long";
144 case DXT_QUAD:
145 return ".quad";
146 case DXT_TEN:
147 return ".tfloat";
148 case DXT_UNSPEC:
149 break;
150 }
151 return "<bad>";
152}
153
36595fd2 154static const char *type_name_float(enum dx_type type)
155{
156 switch (type) {
157 case DXT_DWORD:
158 return ".float";
159 case DXT_QUAD:
160 return ".double";
161 case DXT_TEN:
162 return ".tfloat";
163 default:
164 break;
165 }
166 return "<bad_float>";
167}
168
054f95b2 169static int type_size(enum dx_type type)
170{
171 switch (type) {
172 case DXT_BYTE:
173 return 1;
174 case DXT_WORD:
175 return 2;
176 case DXT_DWORD:
177 return 4;
178 case DXT_QUAD:
179 return 8;
180 case DXT_TEN:
181 return 10;
182 case DXT_UNSPEC:
183 break;
184 }
185 return -1;
186}
187
188static char *escape_string(char *s)
189{
190 char buf[256];
191 char *t = buf;
192
193 for (; *s != 0; s++) {
194 if (*s == '"') {
195 strcpy(t, "\\22");
196 t += strlen(t);
197 continue;
198 }
199 if (*s == '\\') {
200 strcpy(t, "\\\\");
201 t += strlen(t);
202 continue;
203 }
204 *t++ = *s;
205 }
206 *t = *s;
207 return strcpy(s, buf);
208}
209
b545ba7c 210static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
211 size_t buf_size)
212{
213 char *p = buf;
214 size_t l;
215 int i;
216
217 if (pp->ret_type.is_ptr)
218 *p++ = 'p';
219 else if (IS(pp->ret_type.name, "void"))
220 *p++ = 'v';
221 else
222 *p++ = 'i';
223 *p++ = '(';
224 l = 2;
225
226 for (i = 0; i < pp->argc; i++) {
227 if (pp->arg[i].reg != NULL)
228 snprintf(buf + l, buf_size - l, "%s%s",
229 i == 0 ? "" : ",", pp->arg[i].reg);
230 else
231 snprintf(buf + l, buf_size - l, "%sa%d",
232 i == 0 ? "" : ",", i + 1);
233 l = strlen(buf);
234 }
235 snprintf(buf + l, buf_size - l, ")");
236}
237
c0050df6 238static const struct parsed_proto *check_var(FILE *fhdr,
239 const char *sym, const char *varname)
b545ba7c 240{
241 const struct parsed_proto *pp, *pp_sym;
242 char fp_sym[256], fp_var[256];
243 int i, bad = 0;
244
245 pp = proto_parse(fhdr, varname, 1);
36595fd2 246 if (pp == NULL) {
b545ba7c 247 if (IS_START(varname, "sub_"))
248 awarn("sub_ sym missing proto: '%s'\n", varname);
c0050df6 249 return NULL;
36595fd2 250 }
251
252 if (!pp->is_func && !pp->is_fptr)
c0050df6 253 return NULL;
b545ba7c 254
b74c31e3 255 pp_print(fp_var, sizeof(fp_var), pp);
b545ba7c 256
36595fd2 257 if (pp->argc_reg == 0)
b545ba7c 258 goto check_sym;
36595fd2 259 if (pp->argc_reg == 1 && pp->argc_stack == 0
260 && IS(pp->arg[0].reg, "ecx"))
261 {
b545ba7c 262 goto check_sym;
36595fd2 263 }
77f3a833 264 if (!g_cconv_novalidate
265 && (pp->argc_reg != 2
266 || !IS(pp->arg[0].reg, "ecx")
267 || !IS(pp->arg[1].reg, "edx")))
36595fd2 268 {
b545ba7c 269 awarn("unhandled reg call: %s\n", fp_var);
36595fd2 270 }
36595fd2 271
b545ba7c 272check_sym:
273 sprint_pp_short(pp, g_comment, sizeof(g_comment));
274
275 if (sym != NULL) {
276 g_func_sym_pp = NULL;
277 pp_sym = proto_parse(fhdr, sym, 1);
278 if (pp_sym == NULL)
c0050df6 279 return pp;
b545ba7c 280 if (!pp_sym->is_fptr)
281 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
282 g_func_sym_pp = pp_sym;
36595fd2 283 }
b545ba7c 284 else {
285 pp_sym = g_func_sym_pp;
286 if (pp_sym == NULL)
c0050df6 287 return pp;
b545ba7c 288 }
289
290 if (pp->argc != pp_sym->argc || pp->argc_reg != pp_sym->argc_reg)
291 bad = 1;
292 else {
293 for (i = 0; i < pp->argc; i++) {
294 if ((pp->arg[i].reg != NULL) != (pp_sym->arg[i].reg != NULL)) {
295 bad = 1;
296 break;
297 }
298 if ((pp->arg[i].reg != NULL)
299 && !IS(pp->arg[i].reg, pp_sym->arg[i].reg))
300 {
301 bad = 1;
302 break;
303 }
304 }
305 }
306
307 if (bad) {
b74c31e3 308 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
b545ba7c 309 anote("var: %s\n", fp_var);
310 anote("sym: %s\n", fp_sym);
311 awarn("^ mismatch\n");
36595fd2 312 }
c0050df6 313
314 return pp;
36595fd2 315}
316
aa1aa2c2 317static void output_decorated_pp(FILE *fout,
318 const struct parsed_proto *pp)
319{
320 if (pp->name[0] != '_')
321 fprintf(fout, pp->is_fastcall ? "@" : "_");
322 fprintf(fout, "%s", pp->name);
323 if (pp->is_stdcall && pp->argc > 0)
324 fprintf(fout, "@%d", pp->argc * 4);
325}
326
f0be238a 327static int align_value(int src_val)
328{
329 if (src_val <= 0) {
330 awarn("bad align: %d\n", src_val);
331 src_val = 1;
332 }
333 if (!g_arm_mode)
334 return src_val;
335
336 return __builtin_ffs(src_val) - 1;
337}
338
36595fd2 339static int cmpstringp(const void *p1, const void *p2)
340{
341 return strcmp(*(char * const *)p1, *(char * const *)p2);
342}
343
c87eb470 344/* XXX: maybe move to external file? */
345static const char *unwanted_syms[] = {
346 "aRuntimeError",
347 "aTlossError",
348 "aSingError",
349 "aDomainError",
350 "aR6029ThisAppli",
351 "aR6028UnableToI",
352 "aR6027NotEnough",
353 "aR6026NotEnough",
354 "aR6025PureVirtu",
355 "aR6024NotEnough",
356 "aR6019UnableToO",
357 "aR6018Unexpecte",
358 "aR6017Unexpecte",
359 "aR6016NotEnough",
360 "aAbnormalProgra",
361 "aR6009NotEnough",
362 "aR6008NotEnough",
363 "aR6002FloatingP",
364 "aMicrosoftVisua",
365 "aRuntimeErrorPr",
366 "aThisApplicatio",
367 "aMicrosoftFindF",
368 "aMicrosoftOffic",
369};
370
371static int is_unwanted_sym(const char *sym)
372{
373 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
374 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
375}
376
054f95b2 377int main(int argc, char *argv[])
378{
36595fd2 379 FILE *fout, *fasm, *fhdr, *frlist;
b545ba7c 380 const struct parsed_proto *pp;
aa1aa2c2 381 int no_decorations = 0;
f0be238a 382 char comment_char = '#';
054f95b2 383 char words[20][256];
054f95b2 384 char word[256];
385 char line[256];
c87eb470 386 char last_sym[32];
054f95b2 387 unsigned long val;
388 unsigned long cnt;
389 const char *sym;
390 enum dx_type type;
36595fd2 391 char **pub_syms;
392 int pub_sym_cnt = 0;
393 int pub_sym_alloc;
394 char **rlist;
395 int rlist_cnt = 0;
396 int rlist_alloc;
054f95b2 397 int is_label;
36595fd2 398 int is_bss;
054f95b2 399 int wordc;
400 int first;
401 int arg_out;
402 int arg = 1;
403 int len;
36595fd2 404 int w, i;
054f95b2 405 char *p;
406 char *p2;
407
36595fd2 408 if (argc < 4) {
aa1aa2c2 409 // -nd: no symbol decorations
f0be238a 410 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
054f95b2 411 argv[0]);
412 return 1;
413 }
414
aa1aa2c2 415 for (arg = 1; arg < argc; arg++) {
416 if (IS(argv[arg], "-nd"))
417 no_decorations = 1;
77f3a833 418 else if (IS(argv[arg], "-i"))
419 g_cconv_novalidate = 1;
f0be238a 420 else if (IS(argv[arg], "-a")) {
421 comment_char = '@';
422 g_arm_mode = 1;
423 }
aa1aa2c2 424 else
425 break;
426 }
427
054f95b2 428 arg_out = arg++;
429
430 asmfn = argv[arg++];
431 fasm = fopen(asmfn, "r");
432 my_assert_not(fasm, NULL);
433
434 hdrfn = argv[arg++];
36595fd2 435 fhdr = fopen(hdrfn, "r");
436 my_assert_not(fhdr, NULL);
054f95b2 437
438 fout = fopen(argv[arg_out], "w");
439 my_assert_not(fout, NULL);
440
36595fd2 441 pub_sym_alloc = 64;
442 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
443 my_assert_not(pub_syms, NULL);
444
445 rlist_alloc = 64;
446 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
447 my_assert_not(rlist, NULL);
448
449 for (; arg < argc; arg++) {
450 frlist = fopen(argv[arg], "r");
451 my_assert_not(frlist, NULL);
452
453 while (fgets(line, sizeof(line), frlist)) {
454 p = sskip(line);
455 if (*p == 0 || *p == ';')
456 continue;
457
458 p = next_word(words[0], sizeof(words[0]), p);
459 if (words[0][0] == 0)
460 continue;
461
462 if (rlist_cnt >= rlist_alloc) {
463 rlist_alloc = rlist_alloc * 2 + 64;
464 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
465 my_assert_not(rlist, NULL);
466 }
467 rlist[rlist_cnt++] = strdup(words[0]);
468 }
469
470 fclose(frlist);
471 frlist = NULL;
472 }
473
474 if (rlist_cnt > 0)
475 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
476
c87eb470 477 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
478 sizeof(unwanted_syms[0]), cmpstringp);
479
480 last_sym[0] = 0;
481
36595fd2 482 while (1) {
054f95b2 483 next_section(fasm, line);
36595fd2 484 if (feof(fasm))
485 break;
054f95b2 486 if (IS(line + 1, "text"))
487 continue;
488
489 if (IS(line + 1, "rdata"))
490 fprintf(fout, "\n.section .rodata\n");
491 else if (IS(line + 1, "data"))
492 fprintf(fout, "\n.data\n");
493 else
494 aerr("unhandled section: '%s'\n", line);
495
f0be238a 496 fprintf(fout, ".align %d\n", align_value(4));
054f95b2 497
498 while (fgets(line, sizeof(line), fasm))
499 {
500 sym = NULL;
501 asmln++;
502
503 p = sskip(line);
504 if (*p == 0 || *p == ';')
505 continue;
506
507 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
054f95b2 508 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
509 if (*p == 0 || *p == ';') {
510 wordc++;
511 break;
512 }
513 if (*p == ',') {
054f95b2 514 p = sskip(p + 1);
515 }
516 }
517
b545ba7c 518 if (*p == ';') {
519 p = sskip(p + 1);
520 if (IS_START(p, "sctclrtype"))
521 g_func_sym_pp = NULL;
522 }
523
054f95b2 524 if (wordc == 2 && IS(words[1], "ends"))
525 break;
36595fd2 526 if (wordc <= 2 && IS(words[0], "end"))
527 break;
054f95b2 528 if (wordc < 2)
529 aerr("unhandled: '%s'\n", words[0]);
530
531 // don't cares
532 if (IS(words[0], "assume"))
533 continue;
534
535 if (IS(words[0], "align")) {
536 val = parse_number(words[1]);
f0be238a 537 fprintf(fout, "\t\t .align %d", align_value(val));
054f95b2 538 goto fin;
539 }
540
541 w = 1;
542 type = parse_dx_directive(words[0]);
543 if (type == DXT_UNSPEC) {
544 type = parse_dx_directive(words[1]);
545 sym = words[0];
546 w = 2;
547 }
548 if (type == DXT_UNSPEC)
549 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
550
551 if (sym != NULL) {
c87eb470 552 snprintf(last_sym, sizeof(last_sym), "%s", sym);
36595fd2 553
b545ba7c 554 pp = proto_parse(fhdr, sym, 1);
c87eb470 555 if (pp != NULL) {
b545ba7c 556 g_func_sym_pp = NULL;
557
c87eb470 558 // public/global name
559 if (pub_sym_cnt >= pub_sym_alloc) {
560 pub_sym_alloc *= 2;
561 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
562 my_assert_not(pub_syms, NULL);
563 }
564 pub_syms[pub_sym_cnt++] = strdup(sym);
565 }
566
054f95b2 567 len = strlen(sym);
aa1aa2c2 568 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
054f95b2 569
570 len += 2;
571 if (len < 8)
572 fprintf(fout, "\t");
573 if (len < 16)
574 fprintf(fout, "\t");
575 if (len <= 16)
576 fprintf(fout, " ");
577 else
578 fprintf(fout, " ");
579 }
580 else {
581 fprintf(fout, "\t\t ");
582 }
583
c87eb470 584 // fill out some unwanted strings with zeroes..
585 if (type == DXT_BYTE && words[w][0] == '\''
586 && is_unwanted_sym(last_sym))
587 {
588 len = 0;
589 for (; w < wordc; w++) {
590 if (words[w][0] == '\'') {
591 p = words[w] + 1;
592 for (; *p && *p != '\''; p++)
593 len++;
594 }
595 else {
596 // assume encoded byte
597 len++;
598 }
599 }
600 fprintf(fout, ".skip %d", len);
601 goto fin;
602 }
603 else if (type == DXT_BYTE
efea2951 604 && (words[w][0] == '\''
605 || (w + 1 < wordc && words[w + 1][0] == '\'')))
606 {
054f95b2 607 // string; use asciz for most common case
608 if (w == wordc - 2 && IS(words[w + 1], "0")) {
609 fprintf(fout, ".asciz \"");
610 wordc--;
611 }
612 else
613 fprintf(fout, ".ascii \"");
614
615 for (; w < wordc; w++) {
616 if (words[w][0] == '\'') {
617 p = words[w] + 1;
618 p2 = strchr(p, '\'');
619 if (p2 == NULL)
620 aerr("unterminated string? '%s'\n", p);
621 memcpy(word, p, p2 - p);
622 word[p2 - p] = 0;
623 fprintf(fout, "%s", escape_string(word));
624 }
625 else {
626 val = parse_number(words[w]);
627 if (val & ~0xff)
628 aerr("bad string trailing byte?\n");
629 fprintf(fout, "\\x%02lx", val);
630 }
631 }
632 fprintf(fout, "\"");
633 goto fin;
634 }
635
636 if (w == wordc - 2) {
637 if (IS_START(words[w + 1], "dup(")) {
638 cnt = parse_number(words[w]);
639 p = words[w + 1] + 4;
640 p2 = strchr(p, ')');
641 if (p2 == NULL)
642 aerr("bad dup?\n");
643 memmove(word, p, p2 - p);
644 word[p2 - p] = 0;
36595fd2 645
646 val = 0;
647 if (!IS(word, "?"))
648 val = parse_number(word);
054f95b2 649
650 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
651 cnt, type_size(type), val);
652 goto fin;
653 }
654 }
655
656 if (type == DXT_DWORD && words[w][0] == '\''
657 && words[w][5] == '\'' && strlen(words[w]) == 6)
658 {
659 if (w != wordc - 1)
660 aerr("TODO\n");
661
662 p = words[w];
663 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
664 fprintf(fout, ".long 0x%lx", val);
b545ba7c 665 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
054f95b2 666 goto fin;
667 }
668
36595fd2 669 if (type >= DXT_DWORD && strchr(words[w], '.'))
054f95b2 670 {
671 if (w != wordc - 1)
672 aerr("TODO\n");
673
f0be238a 674 if (g_arm_mode && type == DXT_TEN) {
675 fprintf(fout, ".fill 10");
676 snprintf(g_comment, sizeof(g_comment), "%s %s",
677 type_name_float(type), words[w]);
678 }
679 else
680 fprintf(fout, "%s %s", type_name_float(type), words[w]);
054f95b2 681 goto fin;
682 }
683
684 first = 1;
685 fprintf(fout, "%s ", type_name(type));
686 for (; w < wordc; w++)
687 {
688 if (!first)
689 fprintf(fout, ", ");
690
36595fd2 691 is_label = is_bss = 0;
692 if (w <= wordc - 2 && IS(words[w], "offset")) {
054f95b2 693 is_label = 1;
694 w++;
695 }
36595fd2 696 else if (IS(words[w], "?")) {
697 is_bss = 1;
698 }
054f95b2 699 else if (type == DXT_DWORD
700 && !('0' <= words[w][0] && words[w][0] <= '9'))
701 {
702 // assume label
703 is_label = 1;
704 }
705
36595fd2 706 if (is_bss) {
707 fprintf(fout, "0");
708 }
709 else if (is_label) {
054f95b2 710 p = words[w];
ddaf8bd7 711 if (IS_START(p, "loc_") || IS_START(p, "__imp")
712 || strchr(p, '?') || strchr(p, '@')
36595fd2 713 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
714 cmpstringp))
054f95b2 715 {
716 fprintf(fout, "0");
b545ba7c 717 snprintf(g_comment, sizeof(g_comment), "%s", p);
36595fd2 718 }
719 else {
c0050df6 720 pp = check_var(fhdr, sym, p);
aa1aa2c2 721 if (pp == NULL) {
722 fprintf(fout, "%s%s",
723 (no_decorations || p[0] == '_') ? "" : "_", p);
724 }
725 else {
726 if (no_decorations)
727 fprintf(fout, "%s", pp->name);
728 else
729 output_decorated_pp(fout, pp);
730 }
054f95b2 731 }
054f95b2 732 }
733 else {
734 val = parse_number(words[w]);
735 if (val < 10)
736 fprintf(fout, "%ld", val);
737 else
738 fprintf(fout, "0x%lx", val);
739 }
740
741 first = 0;
742 }
743
744fin:
b545ba7c 745 if (g_comment[0] != 0) {
f0be238a 746 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
b545ba7c 747 g_comment[0] = 0;
054f95b2 748 }
749 fprintf(fout, "\n");
054f95b2 750 }
751 }
752
36595fd2 753 fprintf(fout, "\n");
754
755 // dump public syms
756 for (i = 0; i < pub_sym_cnt; i++)
aa1aa2c2 757 fprintf(fout, ".global %s%s\n",
758 no_decorations ? "" : "_", pub_syms[i]);
36595fd2 759
054f95b2 760 fclose(fout);
761 fclose(fasm);
36595fd2 762 fclose(fhdr);
054f95b2 763
764 return 0;
765}
766
767// vim:ts=2:shiftwidth=2:expandtab