minor makefile update
[ia32rtools.git] / tools / cvt_data.c
CommitLineData
054f95b2 1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
054f95b2 17
b545ba7c 18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
77f3a833 21static int g_cconv_novalidate;
f0be238a 22static int g_arm_mode;
b545ba7c 23
36595fd2 24// note: must be in ascending order
054f95b2 25enum dx_type {
26 DXT_UNSPEC,
27 DXT_BYTE,
28 DXT_WORD,
29 DXT_DWORD,
30 DXT_QUAD,
31 DXT_TEN,
32};
33
36595fd2 34#define anote(fmt, ...) \
35 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
b545ba7c 36#define awarn(fmt, ...) do { \
37 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
38 if (++g_warn_cnt == 10) { \
39 fcloseall(); \
40 exit(1); \
41 } \
42} while (0)
054f95b2 43#define aerr(fmt, ...) do { \
44 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 fcloseall(); \
46 exit(1); \
47} while (0)
48
49#include "masm_tools.h"
50
51static char *next_word_s(char *w, size_t wsize, char *s)
52{
53 int quote = 0;
54 size_t i;
55
56 s = sskip(s);
57
58 for (i = 0; i < wsize - 1; i++) {
59 if (s[i] == '\'')
60 quote ^= 1;
61 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
62 break;
63 w[i] = s[i];
64 }
65 w[i] = 0;
66
67 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
68 printf("warning: '%s' truncated\n", w);
69
70 return s + i;
71}
72
73static void next_section(FILE *fasm, char *name)
74{
75 char words[2][256];
76 char line[256];
77 int wordc;
78 char *p;
79
80 name[0] = 0;
81
82 while (fgets(line, sizeof(line), fasm))
83 {
84 wordc = 0;
85 asmln++;
86
87 p = sskip(line);
88 if (*p == 0)
89 continue;
90
91 if (*p == ';') {
92 while (strlen(line) == sizeof(line) - 1) {
93 // one of those long comment lines..
94 if (!fgets(line, sizeof(line), fasm))
95 break;
96 }
97 continue;
98 }
99
100 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
101 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
102 if (*p == 0 || *p == ';') {
103 wordc++;
104 break;
105 }
106 }
107
108 if (wordc < 2)
109 continue;
110
111 if (!IS(words[1], "segment"))
112 continue;
113
114 strcpy(name, words[0]);
115 break;
116 }
117}
118
119static enum dx_type parse_dx_directive(const char *name)
120{
121 if (IS(name, "dd"))
122 return DXT_DWORD;
123 if (IS(name, "dw"))
124 return DXT_WORD;
125 if (IS(name, "db"))
126 return DXT_BYTE;
127 if (IS(name, "dq"))
128 return DXT_QUAD;
129 if (IS(name, "dt"))
130 return DXT_TEN;
131
132 return DXT_UNSPEC;
133}
134
135static const char *type_name(enum dx_type type)
136{
137 switch (type) {
138 case DXT_BYTE:
139 return ".byte";
140 case DXT_WORD:
b0d802b2 141 return ".hword";
054f95b2 142 case DXT_DWORD:
143 return ".long";
144 case DXT_QUAD:
145 return ".quad";
146 case DXT_TEN:
147 return ".tfloat";
148 case DXT_UNSPEC:
149 break;
150 }
151 return "<bad>";
152}
153
36595fd2 154static const char *type_name_float(enum dx_type type)
155{
156 switch (type) {
157 case DXT_DWORD:
158 return ".float";
159 case DXT_QUAD:
160 return ".double";
161 case DXT_TEN:
162 return ".tfloat";
163 default:
164 break;
165 }
166 return "<bad_float>";
167}
168
054f95b2 169static int type_size(enum dx_type type)
170{
171 switch (type) {
172 case DXT_BYTE:
173 return 1;
174 case DXT_WORD:
175 return 2;
176 case DXT_DWORD:
177 return 4;
178 case DXT_QUAD:
179 return 8;
180 case DXT_TEN:
181 return 10;
182 case DXT_UNSPEC:
183 break;
184 }
185 return -1;
186}
187
188static char *escape_string(char *s)
189{
190 char buf[256];
191 char *t = buf;
192
193 for (; *s != 0; s++) {
194 if (*s == '"') {
195 strcpy(t, "\\22");
196 t += strlen(t);
197 continue;
198 }
199 if (*s == '\\') {
200 strcpy(t, "\\\\");
201 t += strlen(t);
202 continue;
203 }
204 *t++ = *s;
205 }
206 *t = *s;
207 return strcpy(s, buf);
208}
209
b545ba7c 210static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
211 size_t buf_size)
212{
213 char *p = buf;
214 size_t l;
215 int i;
216
217 if (pp->ret_type.is_ptr)
218 *p++ = 'p';
219 else if (IS(pp->ret_type.name, "void"))
220 *p++ = 'v';
221 else
222 *p++ = 'i';
223 *p++ = '(';
224 l = 2;
225
226 for (i = 0; i < pp->argc; i++) {
227 if (pp->arg[i].reg != NULL)
228 snprintf(buf + l, buf_size - l, "%s%s",
229 i == 0 ? "" : ",", pp->arg[i].reg);
230 else
231 snprintf(buf + l, buf_size - l, "%sa%d",
232 i == 0 ? "" : ",", i + 1);
233 l = strlen(buf);
234 }
235 snprintf(buf + l, buf_size - l, ")");
236}
237
c0050df6 238static const struct parsed_proto *check_var(FILE *fhdr,
239 const char *sym, const char *varname)
b545ba7c 240{
241 const struct parsed_proto *pp, *pp_sym;
242 char fp_sym[256], fp_var[256];
b545ba7c 243
244 pp = proto_parse(fhdr, varname, 1);
36595fd2 245 if (pp == NULL) {
b545ba7c 246 if (IS_START(varname, "sub_"))
247 awarn("sub_ sym missing proto: '%s'\n", varname);
c0050df6 248 return NULL;
36595fd2 249 }
250
251 if (!pp->is_func && !pp->is_fptr)
c0050df6 252 return NULL;
b545ba7c 253
b74c31e3 254 pp_print(fp_var, sizeof(fp_var), pp);
b545ba7c 255
36595fd2 256 if (pp->argc_reg == 0)
b545ba7c 257 goto check_sym;
36595fd2 258 if (pp->argc_reg == 1 && pp->argc_stack == 0
259 && IS(pp->arg[0].reg, "ecx"))
260 {
b545ba7c 261 goto check_sym;
36595fd2 262 }
77f3a833 263 if (!g_cconv_novalidate
264 && (pp->argc_reg != 2
265 || !IS(pp->arg[0].reg, "ecx")
266 || !IS(pp->arg[1].reg, "edx")))
36595fd2 267 {
b545ba7c 268 awarn("unhandled reg call: %s\n", fp_var);
36595fd2 269 }
36595fd2 270
b545ba7c 271check_sym:
272 sprint_pp_short(pp, g_comment, sizeof(g_comment));
273
274 if (sym != NULL) {
275 g_func_sym_pp = NULL;
276 pp_sym = proto_parse(fhdr, sym, 1);
277 if (pp_sym == NULL)
c0050df6 278 return pp;
b545ba7c 279 if (!pp_sym->is_fptr)
280 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
281 g_func_sym_pp = pp_sym;
36595fd2 282 }
b545ba7c 283 else {
284 pp_sym = g_func_sym_pp;
285 if (pp_sym == NULL)
c0050df6 286 return pp;
b545ba7c 287 }
288
27ebfaed 289 if (pp_cmp_func(pp, pp_sym)) {
b74c31e3 290 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
b545ba7c 291 anote("var: %s\n", fp_var);
292 anote("sym: %s\n", fp_sym);
293 awarn("^ mismatch\n");
36595fd2 294 }
c0050df6 295
296 return pp;
36595fd2 297}
298
aa1aa2c2 299static void output_decorated_pp(FILE *fout,
300 const struct parsed_proto *pp)
301{
302 if (pp->name[0] != '_')
303 fprintf(fout, pp->is_fastcall ? "@" : "_");
304 fprintf(fout, "%s", pp->name);
305 if (pp->is_stdcall && pp->argc > 0)
306 fprintf(fout, "@%d", pp->argc * 4);
307}
308
f0be238a 309static int align_value(int src_val)
310{
311 if (src_val <= 0) {
312 awarn("bad align: %d\n", src_val);
313 src_val = 1;
314 }
315 if (!g_arm_mode)
316 return src_val;
317
318 return __builtin_ffs(src_val) - 1;
319}
320
36595fd2 321static int cmpstringp(const void *p1, const void *p2)
322{
323 return strcmp(*(char * const *)p1, *(char * const *)p2);
324}
325
c87eb470 326/* XXX: maybe move to external file? */
327static const char *unwanted_syms[] = {
328 "aRuntimeError",
329 "aTlossError",
330 "aSingError",
331 "aDomainError",
332 "aR6029ThisAppli",
333 "aR6028UnableToI",
334 "aR6027NotEnough",
335 "aR6026NotEnough",
336 "aR6025PureVirtu",
337 "aR6024NotEnough",
338 "aR6019UnableToO",
339 "aR6018Unexpecte",
340 "aR6017Unexpecte",
341 "aR6016NotEnough",
342 "aAbnormalProgra",
343 "aR6009NotEnough",
344 "aR6008NotEnough",
345 "aR6002FloatingP",
346 "aMicrosoftVisua",
347 "aRuntimeErrorPr",
348 "aThisApplicatio",
349 "aMicrosoftFindF",
350 "aMicrosoftOffic",
351};
352
353static int is_unwanted_sym(const char *sym)
354{
355 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
356 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
357}
358
054f95b2 359int main(int argc, char *argv[])
360{
36595fd2 361 FILE *fout, *fasm, *fhdr, *frlist;
b545ba7c 362 const struct parsed_proto *pp;
aa1aa2c2 363 int no_decorations = 0;
f0be238a 364 char comment_char = '#';
054f95b2 365 char words[20][256];
054f95b2 366 char word[256];
367 char line[256];
c87eb470 368 char last_sym[32];
054f95b2 369 unsigned long val;
370 unsigned long cnt;
371 const char *sym;
372 enum dx_type type;
36595fd2 373 char **pub_syms;
374 int pub_sym_cnt = 0;
375 int pub_sym_alloc;
376 char **rlist;
377 int rlist_cnt = 0;
378 int rlist_alloc;
054f95b2 379 int is_label;
36595fd2 380 int is_bss;
054f95b2 381 int wordc;
382 int first;
383 int arg_out;
384 int arg = 1;
385 int len;
36595fd2 386 int w, i;
054f95b2 387 char *p;
388 char *p2;
389
36595fd2 390 if (argc < 4) {
aa1aa2c2 391 // -nd: no symbol decorations
f0be238a 392 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
054f95b2 393 argv[0]);
394 return 1;
395 }
396
aa1aa2c2 397 for (arg = 1; arg < argc; arg++) {
398 if (IS(argv[arg], "-nd"))
399 no_decorations = 1;
77f3a833 400 else if (IS(argv[arg], "-i"))
401 g_cconv_novalidate = 1;
f0be238a 402 else if (IS(argv[arg], "-a")) {
403 comment_char = '@';
404 g_arm_mode = 1;
405 }
aa1aa2c2 406 else
407 break;
408 }
409
054f95b2 410 arg_out = arg++;
411
412 asmfn = argv[arg++];
413 fasm = fopen(asmfn, "r");
414 my_assert_not(fasm, NULL);
415
416 hdrfn = argv[arg++];
36595fd2 417 fhdr = fopen(hdrfn, "r");
418 my_assert_not(fhdr, NULL);
054f95b2 419
420 fout = fopen(argv[arg_out], "w");
421 my_assert_not(fout, NULL);
422
36595fd2 423 pub_sym_alloc = 64;
424 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
425 my_assert_not(pub_syms, NULL);
426
427 rlist_alloc = 64;
428 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
429 my_assert_not(rlist, NULL);
430
431 for (; arg < argc; arg++) {
432 frlist = fopen(argv[arg], "r");
433 my_assert_not(frlist, NULL);
434
435 while (fgets(line, sizeof(line), frlist)) {
436 p = sskip(line);
437 if (*p == 0 || *p == ';')
438 continue;
439
440 p = next_word(words[0], sizeof(words[0]), p);
441 if (words[0][0] == 0)
442 continue;
443
444 if (rlist_cnt >= rlist_alloc) {
445 rlist_alloc = rlist_alloc * 2 + 64;
446 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
447 my_assert_not(rlist, NULL);
448 }
449 rlist[rlist_cnt++] = strdup(words[0]);
450 }
451
452 fclose(frlist);
453 frlist = NULL;
454 }
455
456 if (rlist_cnt > 0)
457 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
458
c87eb470 459 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
460 sizeof(unwanted_syms[0]), cmpstringp);
461
462 last_sym[0] = 0;
463
36595fd2 464 while (1) {
054f95b2 465 next_section(fasm, line);
36595fd2 466 if (feof(fasm))
467 break;
054f95b2 468 if (IS(line + 1, "text"))
469 continue;
470
471 if (IS(line + 1, "rdata"))
472 fprintf(fout, "\n.section .rodata\n");
473 else if (IS(line + 1, "data"))
474 fprintf(fout, "\n.data\n");
475 else
476 aerr("unhandled section: '%s'\n", line);
477
f0be238a 478 fprintf(fout, ".align %d\n", align_value(4));
054f95b2 479
480 while (fgets(line, sizeof(line), fasm))
481 {
482 sym = NULL;
483 asmln++;
484
485 p = sskip(line);
b0d802b2 486 if (*p == 0)
487 continue;
488
489 if (*p == ';') {
490 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
491 // ;org is only seen at section start, so assume . addr 0
492 i &= 0xfff;
493 if (i != 0)
494 fprintf(fout, "\t\t .skip 0x%x\n", i);
495 }
054f95b2 496 continue;
b0d802b2 497 }
054f95b2 498
499 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
054f95b2 500 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
501 if (*p == 0 || *p == ';') {
502 wordc++;
503 break;
504 }
505 if (*p == ',') {
054f95b2 506 p = sskip(p + 1);
507 }
508 }
509
b545ba7c 510 if (*p == ';') {
511 p = sskip(p + 1);
512 if (IS_START(p, "sctclrtype"))
513 g_func_sym_pp = NULL;
514 }
515
054f95b2 516 if (wordc == 2 && IS(words[1], "ends"))
517 break;
36595fd2 518 if (wordc <= 2 && IS(words[0], "end"))
519 break;
054f95b2 520 if (wordc < 2)
521 aerr("unhandled: '%s'\n", words[0]);
522
523 // don't cares
524 if (IS(words[0], "assume"))
525 continue;
526
527 if (IS(words[0], "align")) {
528 val = parse_number(words[1]);
f0be238a 529 fprintf(fout, "\t\t .align %d", align_value(val));
054f95b2 530 goto fin;
531 }
532
533 w = 1;
534 type = parse_dx_directive(words[0]);
535 if (type == DXT_UNSPEC) {
536 type = parse_dx_directive(words[1]);
537 sym = words[0];
538 w = 2;
539 }
540 if (type == DXT_UNSPEC)
541 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
542
543 if (sym != NULL) {
c87eb470 544 snprintf(last_sym, sizeof(last_sym), "%s", sym);
36595fd2 545
b545ba7c 546 pp = proto_parse(fhdr, sym, 1);
c87eb470 547 if (pp != NULL) {
b545ba7c 548 g_func_sym_pp = NULL;
549
c87eb470 550 // public/global name
551 if (pub_sym_cnt >= pub_sym_alloc) {
552 pub_sym_alloc *= 2;
553 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
554 my_assert_not(pub_syms, NULL);
555 }
556 pub_syms[pub_sym_cnt++] = strdup(sym);
557 }
558
054f95b2 559 len = strlen(sym);
aa1aa2c2 560 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
054f95b2 561
562 len += 2;
563 if (len < 8)
564 fprintf(fout, "\t");
565 if (len < 16)
566 fprintf(fout, "\t");
567 if (len <= 16)
568 fprintf(fout, " ");
569 else
570 fprintf(fout, " ");
571 }
572 else {
573 fprintf(fout, "\t\t ");
574 }
575
c87eb470 576 // fill out some unwanted strings with zeroes..
577 if (type == DXT_BYTE && words[w][0] == '\''
578 && is_unwanted_sym(last_sym))
579 {
580 len = 0;
581 for (; w < wordc; w++) {
582 if (words[w][0] == '\'') {
583 p = words[w] + 1;
584 for (; *p && *p != '\''; p++)
585 len++;
586 }
587 else {
588 // assume encoded byte
589 len++;
590 }
591 }
592 fprintf(fout, ".skip %d", len);
593 goto fin;
594 }
595 else if (type == DXT_BYTE
efea2951 596 && (words[w][0] == '\''
597 || (w + 1 < wordc && words[w + 1][0] == '\'')))
598 {
054f95b2 599 // string; use asciz for most common case
600 if (w == wordc - 2 && IS(words[w + 1], "0")) {
601 fprintf(fout, ".asciz \"");
602 wordc--;
603 }
604 else
605 fprintf(fout, ".ascii \"");
606
607 for (; w < wordc; w++) {
608 if (words[w][0] == '\'') {
609 p = words[w] + 1;
610 p2 = strchr(p, '\'');
611 if (p2 == NULL)
612 aerr("unterminated string? '%s'\n", p);
613 memcpy(word, p, p2 - p);
614 word[p2 - p] = 0;
615 fprintf(fout, "%s", escape_string(word));
616 }
617 else {
618 val = parse_number(words[w]);
619 if (val & ~0xff)
620 aerr("bad string trailing byte?\n");
621 fprintf(fout, "\\x%02lx", val);
622 }
623 }
624 fprintf(fout, "\"");
625 goto fin;
626 }
627
628 if (w == wordc - 2) {
629 if (IS_START(words[w + 1], "dup(")) {
630 cnt = parse_number(words[w]);
631 p = words[w + 1] + 4;
632 p2 = strchr(p, ')');
633 if (p2 == NULL)
634 aerr("bad dup?\n");
635 memmove(word, p, p2 - p);
636 word[p2 - p] = 0;
36595fd2 637
638 val = 0;
639 if (!IS(word, "?"))
640 val = parse_number(word);
054f95b2 641
642 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
643 cnt, type_size(type), val);
644 goto fin;
645 }
646 }
647
648 if (type == DXT_DWORD && words[w][0] == '\''
649 && words[w][5] == '\'' && strlen(words[w]) == 6)
650 {
651 if (w != wordc - 1)
652 aerr("TODO\n");
653
654 p = words[w];
655 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
656 fprintf(fout, ".long 0x%lx", val);
b545ba7c 657 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
054f95b2 658 goto fin;
659 }
660
36595fd2 661 if (type >= DXT_DWORD && strchr(words[w], '.'))
054f95b2 662 {
663 if (w != wordc - 1)
664 aerr("TODO\n");
665
f0be238a 666 if (g_arm_mode && type == DXT_TEN) {
667 fprintf(fout, ".fill 10");
668 snprintf(g_comment, sizeof(g_comment), "%s %s",
669 type_name_float(type), words[w]);
670 }
671 else
672 fprintf(fout, "%s %s", type_name_float(type), words[w]);
054f95b2 673 goto fin;
674 }
675
676 first = 1;
677 fprintf(fout, "%s ", type_name(type));
678 for (; w < wordc; w++)
679 {
680 if (!first)
681 fprintf(fout, ", ");
682
36595fd2 683 is_label = is_bss = 0;
684 if (w <= wordc - 2 && IS(words[w], "offset")) {
054f95b2 685 is_label = 1;
686 w++;
687 }
36595fd2 688 else if (IS(words[w], "?")) {
689 is_bss = 1;
690 }
054f95b2 691 else if (type == DXT_DWORD
692 && !('0' <= words[w][0] && words[w][0] <= '9'))
693 {
694 // assume label
695 is_label = 1;
696 }
697
36595fd2 698 if (is_bss) {
699 fprintf(fout, "0");
700 }
701 else if (is_label) {
054f95b2 702 p = words[w];
ddaf8bd7 703 if (IS_START(p, "loc_") || IS_START(p, "__imp")
704 || strchr(p, '?') || strchr(p, '@')
36595fd2 705 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
706 cmpstringp))
054f95b2 707 {
708 fprintf(fout, "0");
b545ba7c 709 snprintf(g_comment, sizeof(g_comment), "%s", p);
36595fd2 710 }
711 else {
c0050df6 712 pp = check_var(fhdr, sym, p);
aa1aa2c2 713 if (pp == NULL) {
714 fprintf(fout, "%s%s",
715 (no_decorations || p[0] == '_') ? "" : "_", p);
716 }
717 else {
718 if (no_decorations)
719 fprintf(fout, "%s", pp->name);
720 else
721 output_decorated_pp(fout, pp);
722 }
054f95b2 723 }
054f95b2 724 }
725 else {
726 val = parse_number(words[w]);
727 if (val < 10)
728 fprintf(fout, "%ld", val);
729 else
730 fprintf(fout, "0x%lx", val);
731 }
732
733 first = 0;
734 }
735
736fin:
b545ba7c 737 if (g_comment[0] != 0) {
f0be238a 738 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
b545ba7c 739 g_comment[0] = 0;
054f95b2 740 }
741 fprintf(fout, "\n");
054f95b2 742 }
743 }
744
36595fd2 745 fprintf(fout, "\n");
746
747 // dump public syms
748 for (i = 0; i < pub_sym_cnt; i++)
aa1aa2c2 749 fprintf(fout, ".global %s%s\n",
750 no_decorations ? "" : "_", pub_syms[i]);
36595fd2 751
054f95b2 752 fclose(fout);
753 fclose(fasm);
36595fd2 754 fclose(fhdr);
054f95b2 755
756 return 0;
757}
758
759// vim:ts=2:shiftwidth=2:expandtab