translate: some vararg improvements
[ia32rtools.git] / tools / cvt_data.c
CommitLineData
7637b6cc 1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
054f95b2 9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "my_assert.h"
15#include "my_str.h"
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
18#define IS(w, y) !strcmp(w, y)
19#define IS_START(w, y) !strncmp(w, y, strlen(y))
20
21#include "protoparse.h"
22
23static const char *asmfn;
24static int asmln;
054f95b2 25
b545ba7c 26static const struct parsed_proto *g_func_sym_pp;
27static char g_comment[256];
28static int g_warn_cnt;
77f3a833 29static int g_cconv_novalidate;
f0be238a 30static int g_arm_mode;
b545ba7c 31
36595fd2 32// note: must be in ascending order
054f95b2 33enum dx_type {
34 DXT_UNSPEC,
35 DXT_BYTE,
36 DXT_WORD,
37 DXT_DWORD,
38 DXT_QUAD,
39 DXT_TEN,
40};
41
36595fd2 42#define anote(fmt, ...) \
43 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
b545ba7c 44#define awarn(fmt, ...) do { \
45 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
46 if (++g_warn_cnt == 10) { \
47 fcloseall(); \
48 exit(1); \
49 } \
50} while (0)
054f95b2 51#define aerr(fmt, ...) do { \
52 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
53 fcloseall(); \
54 exit(1); \
55} while (0)
56
57#include "masm_tools.h"
58
59static char *next_word_s(char *w, size_t wsize, char *s)
60{
61 int quote = 0;
62 size_t i;
63
64 s = sskip(s);
65
66 for (i = 0; i < wsize - 1; i++) {
67 if (s[i] == '\'')
68 quote ^= 1;
69 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
70 break;
71 w[i] = s[i];
72 }
73 w[i] = 0;
74
75 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
76 printf("warning: '%s' truncated\n", w);
77
78 return s + i;
79}
80
81static void next_section(FILE *fasm, char *name)
82{
83 char words[2][256];
84 char line[256];
85 int wordc;
86 char *p;
87
88 name[0] = 0;
89
90 while (fgets(line, sizeof(line), fasm))
91 {
92 wordc = 0;
93 asmln++;
94
95 p = sskip(line);
96 if (*p == 0)
97 continue;
98
99 if (*p == ';') {
100 while (strlen(line) == sizeof(line) - 1) {
101 // one of those long comment lines..
102 if (!fgets(line, sizeof(line), fasm))
103 break;
104 }
105 continue;
106 }
107
108 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
109 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
110 if (*p == 0 || *p == ';') {
111 wordc++;
112 break;
113 }
114 }
115
116 if (wordc < 2)
117 continue;
118
119 if (!IS(words[1], "segment"))
120 continue;
121
122 strcpy(name, words[0]);
123 break;
124 }
125}
126
127static enum dx_type parse_dx_directive(const char *name)
128{
129 if (IS(name, "dd"))
130 return DXT_DWORD;
131 if (IS(name, "dw"))
132 return DXT_WORD;
133 if (IS(name, "db"))
134 return DXT_BYTE;
135 if (IS(name, "dq"))
136 return DXT_QUAD;
137 if (IS(name, "dt"))
138 return DXT_TEN;
139
140 return DXT_UNSPEC;
141}
142
143static const char *type_name(enum dx_type type)
144{
145 switch (type) {
146 case DXT_BYTE:
147 return ".byte";
148 case DXT_WORD:
b0d802b2 149 return ".hword";
054f95b2 150 case DXT_DWORD:
151 return ".long";
152 case DXT_QUAD:
153 return ".quad";
154 case DXT_TEN:
155 return ".tfloat";
156 case DXT_UNSPEC:
157 break;
158 }
159 return "<bad>";
160}
161
36595fd2 162static const char *type_name_float(enum dx_type type)
163{
164 switch (type) {
165 case DXT_DWORD:
166 return ".float";
167 case DXT_QUAD:
168 return ".double";
169 case DXT_TEN:
170 return ".tfloat";
171 default:
172 break;
173 }
174 return "<bad_float>";
175}
176
054f95b2 177static int type_size(enum dx_type type)
178{
179 switch (type) {
180 case DXT_BYTE:
181 return 1;
182 case DXT_WORD:
183 return 2;
184 case DXT_DWORD:
185 return 4;
186 case DXT_QUAD:
187 return 8;
188 case DXT_TEN:
189 return 10;
190 case DXT_UNSPEC:
191 break;
192 }
193 return -1;
194}
195
196static char *escape_string(char *s)
197{
198 char buf[256];
199 char *t = buf;
200
201 for (; *s != 0; s++) {
202 if (*s == '"') {
203 strcpy(t, "\\22");
204 t += strlen(t);
205 continue;
206 }
207 if (*s == '\\') {
208 strcpy(t, "\\\\");
209 t += strlen(t);
210 continue;
211 }
212 *t++ = *s;
213 }
214 *t = *s;
215 return strcpy(s, buf);
216}
217
b545ba7c 218static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
219 size_t buf_size)
220{
221 char *p = buf;
222 size_t l;
223 int i;
224
225 if (pp->ret_type.is_ptr)
226 *p++ = 'p';
227 else if (IS(pp->ret_type.name, "void"))
228 *p++ = 'v';
229 else
230 *p++ = 'i';
231 *p++ = '(';
232 l = 2;
233
234 for (i = 0; i < pp->argc; i++) {
235 if (pp->arg[i].reg != NULL)
236 snprintf(buf + l, buf_size - l, "%s%s",
237 i == 0 ? "" : ",", pp->arg[i].reg);
238 else
239 snprintf(buf + l, buf_size - l, "%sa%d",
240 i == 0 ? "" : ",", i + 1);
241 l = strlen(buf);
242 }
243 snprintf(buf + l, buf_size - l, ")");
244}
245
c0050df6 246static const struct parsed_proto *check_var(FILE *fhdr,
247 const char *sym, const char *varname)
b545ba7c 248{
249 const struct parsed_proto *pp, *pp_sym;
250 char fp_sym[256], fp_var[256];
b545ba7c 251
252 pp = proto_parse(fhdr, varname, 1);
36595fd2 253 if (pp == NULL) {
b545ba7c 254 if (IS_START(varname, "sub_"))
255 awarn("sub_ sym missing proto: '%s'\n", varname);
c0050df6 256 return NULL;
36595fd2 257 }
258
259 if (!pp->is_func && !pp->is_fptr)
c0050df6 260 return NULL;
b545ba7c 261
b74c31e3 262 pp_print(fp_var, sizeof(fp_var), pp);
b545ba7c 263
36595fd2 264 if (pp->argc_reg == 0)
b545ba7c 265 goto check_sym;
36595fd2 266 if (pp->argc_reg == 1 && pp->argc_stack == 0
267 && IS(pp->arg[0].reg, "ecx"))
268 {
b545ba7c 269 goto check_sym;
36595fd2 270 }
77f3a833 271 if (!g_cconv_novalidate
272 && (pp->argc_reg != 2
273 || !IS(pp->arg[0].reg, "ecx")
274 || !IS(pp->arg[1].reg, "edx")))
36595fd2 275 {
b545ba7c 276 awarn("unhandled reg call: %s\n", fp_var);
36595fd2 277 }
36595fd2 278
b545ba7c 279check_sym:
280 sprint_pp_short(pp, g_comment, sizeof(g_comment));
281
282 if (sym != NULL) {
283 g_func_sym_pp = NULL;
284 pp_sym = proto_parse(fhdr, sym, 1);
285 if (pp_sym == NULL)
c0050df6 286 return pp;
b545ba7c 287 if (!pp_sym->is_fptr)
288 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
289 g_func_sym_pp = pp_sym;
36595fd2 290 }
b545ba7c 291 else {
292 pp_sym = g_func_sym_pp;
293 if (pp_sym == NULL)
c0050df6 294 return pp;
b545ba7c 295 }
296
27ebfaed 297 if (pp_cmp_func(pp, pp_sym)) {
b74c31e3 298 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
b545ba7c 299 anote("var: %s\n", fp_var);
300 anote("sym: %s\n", fp_sym);
301 awarn("^ mismatch\n");
36595fd2 302 }
c0050df6 303
304 return pp;
36595fd2 305}
306
aa1aa2c2 307static void output_decorated_pp(FILE *fout,
308 const struct parsed_proto *pp)
309{
310 if (pp->name[0] != '_')
311 fprintf(fout, pp->is_fastcall ? "@" : "_");
312 fprintf(fout, "%s", pp->name);
313 if (pp->is_stdcall && pp->argc > 0)
314 fprintf(fout, "@%d", pp->argc * 4);
315}
316
f0be238a 317static int align_value(int src_val)
318{
319 if (src_val <= 0) {
320 awarn("bad align: %d\n", src_val);
321 src_val = 1;
322 }
323 if (!g_arm_mode)
324 return src_val;
325
326 return __builtin_ffs(src_val) - 1;
327}
328
36595fd2 329static int cmpstringp(const void *p1, const void *p2)
330{
331 return strcmp(*(char * const *)p1, *(char * const *)p2);
332}
333
c87eb470 334/* XXX: maybe move to external file? */
335static const char *unwanted_syms[] = {
336 "aRuntimeError",
337 "aTlossError",
338 "aSingError",
339 "aDomainError",
340 "aR6029ThisAppli",
341 "aR6028UnableToI",
342 "aR6027NotEnough",
343 "aR6026NotEnough",
344 "aR6025PureVirtu",
345 "aR6024NotEnough",
346 "aR6019UnableToO",
347 "aR6018Unexpecte",
348 "aR6017Unexpecte",
349 "aR6016NotEnough",
350 "aAbnormalProgra",
351 "aR6009NotEnough",
352 "aR6008NotEnough",
353 "aR6002FloatingP",
354 "aMicrosoftVisua",
355 "aRuntimeErrorPr",
356 "aThisApplicatio",
357 "aMicrosoftFindF",
358 "aMicrosoftOffic",
359};
360
361static int is_unwanted_sym(const char *sym)
362{
363 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
364 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
365}
366
054f95b2 367int main(int argc, char *argv[])
368{
36595fd2 369 FILE *fout, *fasm, *fhdr, *frlist;
b545ba7c 370 const struct parsed_proto *pp;
aa1aa2c2 371 int no_decorations = 0;
f0be238a 372 char comment_char = '#';
054f95b2 373 char words[20][256];
054f95b2 374 char word[256];
375 char line[256];
c87eb470 376 char last_sym[32];
054f95b2 377 unsigned long val;
378 unsigned long cnt;
379 const char *sym;
380 enum dx_type type;
36595fd2 381 char **pub_syms;
382 int pub_sym_cnt = 0;
383 int pub_sym_alloc;
384 char **rlist;
385 int rlist_cnt = 0;
386 int rlist_alloc;
054f95b2 387 int is_label;
36595fd2 388 int is_bss;
054f95b2 389 int wordc;
390 int first;
391 int arg_out;
392 int arg = 1;
393 int len;
36595fd2 394 int w, i;
054f95b2 395 char *p;
396 char *p2;
397
36595fd2 398 if (argc < 4) {
aa1aa2c2 399 // -nd: no symbol decorations
f0be238a 400 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
054f95b2 401 argv[0]);
402 return 1;
403 }
404
aa1aa2c2 405 for (arg = 1; arg < argc; arg++) {
406 if (IS(argv[arg], "-nd"))
407 no_decorations = 1;
77f3a833 408 else if (IS(argv[arg], "-i"))
409 g_cconv_novalidate = 1;
f0be238a 410 else if (IS(argv[arg], "-a")) {
411 comment_char = '@';
412 g_arm_mode = 1;
413 }
aa1aa2c2 414 else
415 break;
416 }
417
054f95b2 418 arg_out = arg++;
419
420 asmfn = argv[arg++];
421 fasm = fopen(asmfn, "r");
422 my_assert_not(fasm, NULL);
423
424 hdrfn = argv[arg++];
36595fd2 425 fhdr = fopen(hdrfn, "r");
426 my_assert_not(fhdr, NULL);
054f95b2 427
428 fout = fopen(argv[arg_out], "w");
429 my_assert_not(fout, NULL);
430
36595fd2 431 pub_sym_alloc = 64;
432 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
433 my_assert_not(pub_syms, NULL);
434
435 rlist_alloc = 64;
436 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
437 my_assert_not(rlist, NULL);
438
439 for (; arg < argc; arg++) {
440 frlist = fopen(argv[arg], "r");
441 my_assert_not(frlist, NULL);
442
443 while (fgets(line, sizeof(line), frlist)) {
444 p = sskip(line);
445 if (*p == 0 || *p == ';')
446 continue;
447
448 p = next_word(words[0], sizeof(words[0]), p);
449 if (words[0][0] == 0)
450 continue;
451
452 if (rlist_cnt >= rlist_alloc) {
453 rlist_alloc = rlist_alloc * 2 + 64;
454 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
455 my_assert_not(rlist, NULL);
456 }
457 rlist[rlist_cnt++] = strdup(words[0]);
458 }
459
460 fclose(frlist);
461 frlist = NULL;
462 }
463
464 if (rlist_cnt > 0)
465 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
466
c87eb470 467 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
468 sizeof(unwanted_syms[0]), cmpstringp);
469
470 last_sym[0] = 0;
471
36595fd2 472 while (1) {
054f95b2 473 next_section(fasm, line);
36595fd2 474 if (feof(fasm))
475 break;
054f95b2 476 if (IS(line + 1, "text"))
477 continue;
478
479 if (IS(line + 1, "rdata"))
480 fprintf(fout, "\n.section .rodata\n");
481 else if (IS(line + 1, "data"))
482 fprintf(fout, "\n.data\n");
483 else
484 aerr("unhandled section: '%s'\n", line);
485
f0be238a 486 fprintf(fout, ".align %d\n", align_value(4));
054f95b2 487
488 while (fgets(line, sizeof(line), fasm))
489 {
490 sym = NULL;
491 asmln++;
492
493 p = sskip(line);
b0d802b2 494 if (*p == 0)
495 continue;
496
497 if (*p == ';') {
498 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
499 // ;org is only seen at section start, so assume . addr 0
500 i &= 0xfff;
501 if (i != 0)
502 fprintf(fout, "\t\t .skip 0x%x\n", i);
503 }
054f95b2 504 continue;
b0d802b2 505 }
054f95b2 506
507 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
054f95b2 508 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
509 if (*p == 0 || *p == ';') {
510 wordc++;
511 break;
512 }
513 if (*p == ',') {
054f95b2 514 p = sskip(p + 1);
515 }
516 }
517
b545ba7c 518 if (*p == ';') {
519 p = sskip(p + 1);
520 if (IS_START(p, "sctclrtype"))
521 g_func_sym_pp = NULL;
522 }
523
054f95b2 524 if (wordc == 2 && IS(words[1], "ends"))
525 break;
36595fd2 526 if (wordc <= 2 && IS(words[0], "end"))
527 break;
054f95b2 528 if (wordc < 2)
529 aerr("unhandled: '%s'\n", words[0]);
530
531 // don't cares
532 if (IS(words[0], "assume"))
533 continue;
534
535 if (IS(words[0], "align")) {
536 val = parse_number(words[1]);
f0be238a 537 fprintf(fout, "\t\t .align %d", align_value(val));
054f95b2 538 goto fin;
539 }
540
541 w = 1;
542 type = parse_dx_directive(words[0]);
543 if (type == DXT_UNSPEC) {
544 type = parse_dx_directive(words[1]);
545 sym = words[0];
546 w = 2;
547 }
548 if (type == DXT_UNSPEC)
549 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
550
551 if (sym != NULL) {
c87eb470 552 snprintf(last_sym, sizeof(last_sym), "%s", sym);
36595fd2 553
b545ba7c 554 pp = proto_parse(fhdr, sym, 1);
c87eb470 555 if (pp != NULL) {
b545ba7c 556 g_func_sym_pp = NULL;
557
c87eb470 558 // public/global name
559 if (pub_sym_cnt >= pub_sym_alloc) {
560 pub_sym_alloc *= 2;
561 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
562 my_assert_not(pub_syms, NULL);
563 }
564 pub_syms[pub_sym_cnt++] = strdup(sym);
565 }
566
054f95b2 567 len = strlen(sym);
aa1aa2c2 568 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
054f95b2 569
570 len += 2;
571 if (len < 8)
572 fprintf(fout, "\t");
573 if (len < 16)
574 fprintf(fout, "\t");
575 if (len <= 16)
576 fprintf(fout, " ");
577 else
578 fprintf(fout, " ");
579 }
580 else {
581 fprintf(fout, "\t\t ");
582 }
583
c87eb470 584 // fill out some unwanted strings with zeroes..
585 if (type == DXT_BYTE && words[w][0] == '\''
586 && is_unwanted_sym(last_sym))
587 {
588 len = 0;
589 for (; w < wordc; w++) {
590 if (words[w][0] == '\'') {
591 p = words[w] + 1;
592 for (; *p && *p != '\''; p++)
593 len++;
594 }
595 else {
596 // assume encoded byte
597 len++;
598 }
599 }
600 fprintf(fout, ".skip %d", len);
601 goto fin;
602 }
603 else if (type == DXT_BYTE
efea2951 604 && (words[w][0] == '\''
605 || (w + 1 < wordc && words[w + 1][0] == '\'')))
606 {
054f95b2 607 // string; use asciz for most common case
608 if (w == wordc - 2 && IS(words[w + 1], "0")) {
609 fprintf(fout, ".asciz \"");
610 wordc--;
611 }
612 else
613 fprintf(fout, ".ascii \"");
614
615 for (; w < wordc; w++) {
616 if (words[w][0] == '\'') {
617 p = words[w] + 1;
618 p2 = strchr(p, '\'');
619 if (p2 == NULL)
620 aerr("unterminated string? '%s'\n", p);
621 memcpy(word, p, p2 - p);
622 word[p2 - p] = 0;
623 fprintf(fout, "%s", escape_string(word));
624 }
625 else {
626 val = parse_number(words[w]);
627 if (val & ~0xff)
628 aerr("bad string trailing byte?\n");
629 fprintf(fout, "\\x%02lx", val);
630 }
631 }
632 fprintf(fout, "\"");
633 goto fin;
634 }
635
636 if (w == wordc - 2) {
637 if (IS_START(words[w + 1], "dup(")) {
638 cnt = parse_number(words[w]);
639 p = words[w + 1] + 4;
640 p2 = strchr(p, ')');
641 if (p2 == NULL)
642 aerr("bad dup?\n");
643 memmove(word, p, p2 - p);
644 word[p2 - p] = 0;
36595fd2 645
646 val = 0;
647 if (!IS(word, "?"))
648 val = parse_number(word);
054f95b2 649
650 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
651 cnt, type_size(type), val);
652 goto fin;
653 }
654 }
655
656 if (type == DXT_DWORD && words[w][0] == '\''
657 && words[w][5] == '\'' && strlen(words[w]) == 6)
658 {
659 if (w != wordc - 1)
660 aerr("TODO\n");
661
662 p = words[w];
663 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
664 fprintf(fout, ".long 0x%lx", val);
b545ba7c 665 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
054f95b2 666 goto fin;
667 }
668
36595fd2 669 if (type >= DXT_DWORD && strchr(words[w], '.'))
054f95b2 670 {
671 if (w != wordc - 1)
672 aerr("TODO\n");
673
f0be238a 674 if (g_arm_mode && type == DXT_TEN) {
675 fprintf(fout, ".fill 10");
676 snprintf(g_comment, sizeof(g_comment), "%s %s",
677 type_name_float(type), words[w]);
678 }
679 else
680 fprintf(fout, "%s %s", type_name_float(type), words[w]);
054f95b2 681 goto fin;
682 }
683
684 first = 1;
685 fprintf(fout, "%s ", type_name(type));
686 for (; w < wordc; w++)
687 {
688 if (!first)
689 fprintf(fout, ", ");
690
36595fd2 691 is_label = is_bss = 0;
692 if (w <= wordc - 2 && IS(words[w], "offset")) {
054f95b2 693 is_label = 1;
694 w++;
695 }
36595fd2 696 else if (IS(words[w], "?")) {
697 is_bss = 1;
698 }
054f95b2 699 else if (type == DXT_DWORD
700 && !('0' <= words[w][0] && words[w][0] <= '9'))
701 {
702 // assume label
703 is_label = 1;
704 }
705
36595fd2 706 if (is_bss) {
707 fprintf(fout, "0");
708 }
709 else if (is_label) {
054f95b2 710 p = words[w];
ddaf8bd7 711 if (IS_START(p, "loc_") || IS_START(p, "__imp")
712 || strchr(p, '?') || strchr(p, '@')
36595fd2 713 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
714 cmpstringp))
054f95b2 715 {
716 fprintf(fout, "0");
b545ba7c 717 snprintf(g_comment, sizeof(g_comment), "%s", p);
36595fd2 718 }
719 else {
c0050df6 720 pp = check_var(fhdr, sym, p);
aa1aa2c2 721 if (pp == NULL) {
722 fprintf(fout, "%s%s",
723 (no_decorations || p[0] == '_') ? "" : "_", p);
724 }
725 else {
726 if (no_decorations)
727 fprintf(fout, "%s", pp->name);
728 else
729 output_decorated_pp(fout, pp);
730 }
054f95b2 731 }
054f95b2 732 }
733 else {
734 val = parse_number(words[w]);
735 if (val < 10)
736 fprintf(fout, "%ld", val);
737 else
738 fprintf(fout, "0x%lx", val);
739 }
740
741 first = 0;
742 }
743
744fin:
b545ba7c 745 if (g_comment[0] != 0) {
f0be238a 746 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
b545ba7c 747 g_comment[0] = 0;
054f95b2 748 }
749 fprintf(fout, "\n");
054f95b2 750 }
751 }
752
36595fd2 753 fprintf(fout, "\n");
754
755 // dump public syms
756 for (i = 0; i < pub_sym_cnt; i++)
aa1aa2c2 757 fprintf(fout, ".global %s%s\n",
758 no_decorations ? "" : "_", pub_syms[i]);
36595fd2 759
054f95b2 760 fclose(fout);
761 fclose(fasm);
36595fd2 762 fclose(fhdr);
054f95b2 763
764 return 0;
765}
766
767// vim:ts=2:shiftwidth=2:expandtab