fix stupid lods/cmps/scas bugs
[ia32rtools.git] / tools / cvt_data.c
CommitLineData
054f95b2 1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
054f95b2 17
b545ba7c 18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
77f3a833 21static int g_cconv_novalidate;
b545ba7c 22
36595fd2 23// note: must be in ascending order
054f95b2 24enum dx_type {
25 DXT_UNSPEC,
26 DXT_BYTE,
27 DXT_WORD,
28 DXT_DWORD,
29 DXT_QUAD,
30 DXT_TEN,
31};
32
36595fd2 33#define anote(fmt, ...) \
34 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
b545ba7c 35#define awarn(fmt, ...) do { \
36 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
37 if (++g_warn_cnt == 10) { \
38 fcloseall(); \
39 exit(1); \
40 } \
41} while (0)
054f95b2 42#define aerr(fmt, ...) do { \
43 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
44 fcloseall(); \
45 exit(1); \
46} while (0)
47
48#include "masm_tools.h"
49
50static char *next_word_s(char *w, size_t wsize, char *s)
51{
52 int quote = 0;
53 size_t i;
54
55 s = sskip(s);
56
57 for (i = 0; i < wsize - 1; i++) {
58 if (s[i] == '\'')
59 quote ^= 1;
60 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
61 break;
62 w[i] = s[i];
63 }
64 w[i] = 0;
65
66 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
67 printf("warning: '%s' truncated\n", w);
68
69 return s + i;
70}
71
72static void next_section(FILE *fasm, char *name)
73{
74 char words[2][256];
75 char line[256];
76 int wordc;
77 char *p;
78
79 name[0] = 0;
80
81 while (fgets(line, sizeof(line), fasm))
82 {
83 wordc = 0;
84 asmln++;
85
86 p = sskip(line);
87 if (*p == 0)
88 continue;
89
90 if (*p == ';') {
91 while (strlen(line) == sizeof(line) - 1) {
92 // one of those long comment lines..
93 if (!fgets(line, sizeof(line), fasm))
94 break;
95 }
96 continue;
97 }
98
99 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
100 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
101 if (*p == 0 || *p == ';') {
102 wordc++;
103 break;
104 }
105 }
106
107 if (wordc < 2)
108 continue;
109
110 if (!IS(words[1], "segment"))
111 continue;
112
113 strcpy(name, words[0]);
114 break;
115 }
116}
117
118static enum dx_type parse_dx_directive(const char *name)
119{
120 if (IS(name, "dd"))
121 return DXT_DWORD;
122 if (IS(name, "dw"))
123 return DXT_WORD;
124 if (IS(name, "db"))
125 return DXT_BYTE;
126 if (IS(name, "dq"))
127 return DXT_QUAD;
128 if (IS(name, "dt"))
129 return DXT_TEN;
130
131 return DXT_UNSPEC;
132}
133
134static const char *type_name(enum dx_type type)
135{
136 switch (type) {
137 case DXT_BYTE:
138 return ".byte";
139 case DXT_WORD:
140 return ".word";
141 case DXT_DWORD:
142 return ".long";
143 case DXT_QUAD:
144 return ".quad";
145 case DXT_TEN:
146 return ".tfloat";
147 case DXT_UNSPEC:
148 break;
149 }
150 return "<bad>";
151}
152
36595fd2 153static const char *type_name_float(enum dx_type type)
154{
155 switch (type) {
156 case DXT_DWORD:
157 return ".float";
158 case DXT_QUAD:
159 return ".double";
160 case DXT_TEN:
161 return ".tfloat";
162 default:
163 break;
164 }
165 return "<bad_float>";
166}
167
054f95b2 168static int type_size(enum dx_type type)
169{
170 switch (type) {
171 case DXT_BYTE:
172 return 1;
173 case DXT_WORD:
174 return 2;
175 case DXT_DWORD:
176 return 4;
177 case DXT_QUAD:
178 return 8;
179 case DXT_TEN:
180 return 10;
181 case DXT_UNSPEC:
182 break;
183 }
184 return -1;
185}
186
187static char *escape_string(char *s)
188{
189 char buf[256];
190 char *t = buf;
191
192 for (; *s != 0; s++) {
193 if (*s == '"') {
194 strcpy(t, "\\22");
195 t += strlen(t);
196 continue;
197 }
198 if (*s == '\\') {
199 strcpy(t, "\\\\");
200 t += strlen(t);
201 continue;
202 }
203 *t++ = *s;
204 }
205 *t = *s;
206 return strcpy(s, buf);
207}
208
b545ba7c 209static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
210 size_t buf_size)
211{
212 char *p = buf;
213 size_t l;
214 int i;
215
216 if (pp->ret_type.is_ptr)
217 *p++ = 'p';
218 else if (IS(pp->ret_type.name, "void"))
219 *p++ = 'v';
220 else
221 *p++ = 'i';
222 *p++ = '(';
223 l = 2;
224
225 for (i = 0; i < pp->argc; i++) {
226 if (pp->arg[i].reg != NULL)
227 snprintf(buf + l, buf_size - l, "%s%s",
228 i == 0 ? "" : ",", pp->arg[i].reg);
229 else
230 snprintf(buf + l, buf_size - l, "%sa%d",
231 i == 0 ? "" : ",", i + 1);
232 l = strlen(buf);
233 }
234 snprintf(buf + l, buf_size - l, ")");
235}
236
c0050df6 237static const struct parsed_proto *check_var(FILE *fhdr,
238 const char *sym, const char *varname)
b545ba7c 239{
240 const struct parsed_proto *pp, *pp_sym;
241 char fp_sym[256], fp_var[256];
242 int i, bad = 0;
243
244 pp = proto_parse(fhdr, varname, 1);
36595fd2 245 if (pp == NULL) {
b545ba7c 246 if (IS_START(varname, "sub_"))
247 awarn("sub_ sym missing proto: '%s'\n", varname);
c0050df6 248 return NULL;
36595fd2 249 }
250
251 if (!pp->is_func && !pp->is_fptr)
c0050df6 252 return NULL;
b545ba7c 253
b74c31e3 254 pp_print(fp_var, sizeof(fp_var), pp);
b545ba7c 255
36595fd2 256 if (pp->argc_reg == 0)
b545ba7c 257 goto check_sym;
36595fd2 258 if (pp->argc_reg == 1 && pp->argc_stack == 0
259 && IS(pp->arg[0].reg, "ecx"))
260 {
b545ba7c 261 goto check_sym;
36595fd2 262 }
77f3a833 263 if (!g_cconv_novalidate
264 && (pp->argc_reg != 2
265 || !IS(pp->arg[0].reg, "ecx")
266 || !IS(pp->arg[1].reg, "edx")))
36595fd2 267 {
b545ba7c 268 awarn("unhandled reg call: %s\n", fp_var);
36595fd2 269 }
36595fd2 270
b545ba7c 271check_sym:
272 sprint_pp_short(pp, g_comment, sizeof(g_comment));
273
274 if (sym != NULL) {
275 g_func_sym_pp = NULL;
276 pp_sym = proto_parse(fhdr, sym, 1);
277 if (pp_sym == NULL)
c0050df6 278 return pp;
b545ba7c 279 if (!pp_sym->is_fptr)
280 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
281 g_func_sym_pp = pp_sym;
36595fd2 282 }
b545ba7c 283 else {
284 pp_sym = g_func_sym_pp;
285 if (pp_sym == NULL)
c0050df6 286 return pp;
b545ba7c 287 }
288
289 if (pp->argc != pp_sym->argc || pp->argc_reg != pp_sym->argc_reg)
290 bad = 1;
291 else {
292 for (i = 0; i < pp->argc; i++) {
293 if ((pp->arg[i].reg != NULL) != (pp_sym->arg[i].reg != NULL)) {
294 bad = 1;
295 break;
296 }
297 if ((pp->arg[i].reg != NULL)
298 && !IS(pp->arg[i].reg, pp_sym->arg[i].reg))
299 {
300 bad = 1;
301 break;
302 }
303 }
304 }
305
306 if (bad) {
b74c31e3 307 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
b545ba7c 308 anote("var: %s\n", fp_var);
309 anote("sym: %s\n", fp_sym);
310 awarn("^ mismatch\n");
36595fd2 311 }
c0050df6 312
313 return pp;
36595fd2 314}
315
aa1aa2c2 316static void output_decorated_pp(FILE *fout,
317 const struct parsed_proto *pp)
318{
319 if (pp->name[0] != '_')
320 fprintf(fout, pp->is_fastcall ? "@" : "_");
321 fprintf(fout, "%s", pp->name);
322 if (pp->is_stdcall && pp->argc > 0)
323 fprintf(fout, "@%d", pp->argc * 4);
324}
325
36595fd2 326static int cmpstringp(const void *p1, const void *p2)
327{
328 return strcmp(*(char * const *)p1, *(char * const *)p2);
329}
330
c87eb470 331/* XXX: maybe move to external file? */
332static const char *unwanted_syms[] = {
333 "aRuntimeError",
334 "aTlossError",
335 "aSingError",
336 "aDomainError",
337 "aR6029ThisAppli",
338 "aR6028UnableToI",
339 "aR6027NotEnough",
340 "aR6026NotEnough",
341 "aR6025PureVirtu",
342 "aR6024NotEnough",
343 "aR6019UnableToO",
344 "aR6018Unexpecte",
345 "aR6017Unexpecte",
346 "aR6016NotEnough",
347 "aAbnormalProgra",
348 "aR6009NotEnough",
349 "aR6008NotEnough",
350 "aR6002FloatingP",
351 "aMicrosoftVisua",
352 "aRuntimeErrorPr",
353 "aThisApplicatio",
354 "aMicrosoftFindF",
355 "aMicrosoftOffic",
356};
357
358static int is_unwanted_sym(const char *sym)
359{
360 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
361 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
362}
363
054f95b2 364int main(int argc, char *argv[])
365{
36595fd2 366 FILE *fout, *fasm, *fhdr, *frlist;
b545ba7c 367 const struct parsed_proto *pp;
aa1aa2c2 368 int no_decorations = 0;
054f95b2 369 char words[20][256];
054f95b2 370 char word[256];
371 char line[256];
c87eb470 372 char last_sym[32];
054f95b2 373 unsigned long val;
374 unsigned long cnt;
375 const char *sym;
376 enum dx_type type;
36595fd2 377 char **pub_syms;
378 int pub_sym_cnt = 0;
379 int pub_sym_alloc;
380 char **rlist;
381 int rlist_cnt = 0;
382 int rlist_alloc;
054f95b2 383 int is_label;
36595fd2 384 int is_bss;
054f95b2 385 int wordc;
386 int first;
387 int arg_out;
388 int arg = 1;
389 int len;
36595fd2 390 int w, i;
054f95b2 391 char *p;
392 char *p2;
393
36595fd2 394 if (argc < 4) {
aa1aa2c2 395 // -nd: no symbol decorations
77f3a833 396 printf("usage:\n%s [-nd] [-i] <.s> <.asm> <hdrf> [rlist]*\n",
054f95b2 397 argv[0]);
398 return 1;
399 }
400
aa1aa2c2 401 for (arg = 1; arg < argc; arg++) {
402 if (IS(argv[arg], "-nd"))
403 no_decorations = 1;
77f3a833 404 else if (IS(argv[arg], "-i"))
405 g_cconv_novalidate = 1;
aa1aa2c2 406 else
407 break;
408 }
409
054f95b2 410 arg_out = arg++;
411
412 asmfn = argv[arg++];
413 fasm = fopen(asmfn, "r");
414 my_assert_not(fasm, NULL);
415
416 hdrfn = argv[arg++];
36595fd2 417 fhdr = fopen(hdrfn, "r");
418 my_assert_not(fhdr, NULL);
054f95b2 419
420 fout = fopen(argv[arg_out], "w");
421 my_assert_not(fout, NULL);
422
36595fd2 423 pub_sym_alloc = 64;
424 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
425 my_assert_not(pub_syms, NULL);
426
427 rlist_alloc = 64;
428 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
429 my_assert_not(rlist, NULL);
430
431 for (; arg < argc; arg++) {
432 frlist = fopen(argv[arg], "r");
433 my_assert_not(frlist, NULL);
434
435 while (fgets(line, sizeof(line), frlist)) {
436 p = sskip(line);
437 if (*p == 0 || *p == ';')
438 continue;
439
440 p = next_word(words[0], sizeof(words[0]), p);
441 if (words[0][0] == 0)
442 continue;
443
444 if (rlist_cnt >= rlist_alloc) {
445 rlist_alloc = rlist_alloc * 2 + 64;
446 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
447 my_assert_not(rlist, NULL);
448 }
449 rlist[rlist_cnt++] = strdup(words[0]);
450 }
451
452 fclose(frlist);
453 frlist = NULL;
454 }
455
456 if (rlist_cnt > 0)
457 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
458
c87eb470 459 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
460 sizeof(unwanted_syms[0]), cmpstringp);
461
462 last_sym[0] = 0;
463
36595fd2 464 while (1) {
054f95b2 465 next_section(fasm, line);
36595fd2 466 if (feof(fasm))
467 break;
054f95b2 468 if (IS(line + 1, "text"))
469 continue;
470
471 if (IS(line + 1, "rdata"))
472 fprintf(fout, "\n.section .rodata\n");
473 else if (IS(line + 1, "data"))
474 fprintf(fout, "\n.data\n");
475 else
476 aerr("unhandled section: '%s'\n", line);
477
478 fprintf(fout, ".align 4\n");
479
480 while (fgets(line, sizeof(line), fasm))
481 {
482 sym = NULL;
483 asmln++;
484
485 p = sskip(line);
486 if (*p == 0 || *p == ';')
487 continue;
488
489 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
054f95b2 490 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
491 if (*p == 0 || *p == ';') {
492 wordc++;
493 break;
494 }
495 if (*p == ',') {
054f95b2 496 p = sskip(p + 1);
497 }
498 }
499
b545ba7c 500 if (*p == ';') {
501 p = sskip(p + 1);
502 if (IS_START(p, "sctclrtype"))
503 g_func_sym_pp = NULL;
504 }
505
054f95b2 506 if (wordc == 2 && IS(words[1], "ends"))
507 break;
36595fd2 508 if (wordc <= 2 && IS(words[0], "end"))
509 break;
054f95b2 510 if (wordc < 2)
511 aerr("unhandled: '%s'\n", words[0]);
512
513 // don't cares
514 if (IS(words[0], "assume"))
515 continue;
516
517 if (IS(words[0], "align")) {
518 val = parse_number(words[1]);
519 fprintf(fout, "\t\t .align %ld", val);
520 goto fin;
521 }
522
523 w = 1;
524 type = parse_dx_directive(words[0]);
525 if (type == DXT_UNSPEC) {
526 type = parse_dx_directive(words[1]);
527 sym = words[0];
528 w = 2;
529 }
530 if (type == DXT_UNSPEC)
531 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
532
533 if (sym != NULL) {
c87eb470 534 snprintf(last_sym, sizeof(last_sym), "%s", sym);
36595fd2 535
b545ba7c 536 pp = proto_parse(fhdr, sym, 1);
c87eb470 537 if (pp != NULL) {
b545ba7c 538 g_func_sym_pp = NULL;
539
c87eb470 540 // public/global name
541 if (pub_sym_cnt >= pub_sym_alloc) {
542 pub_sym_alloc *= 2;
543 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
544 my_assert_not(pub_syms, NULL);
545 }
546 pub_syms[pub_sym_cnt++] = strdup(sym);
547 }
548
054f95b2 549 len = strlen(sym);
aa1aa2c2 550 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
054f95b2 551
552 len += 2;
553 if (len < 8)
554 fprintf(fout, "\t");
555 if (len < 16)
556 fprintf(fout, "\t");
557 if (len <= 16)
558 fprintf(fout, " ");
559 else
560 fprintf(fout, " ");
561 }
562 else {
563 fprintf(fout, "\t\t ");
564 }
565
c87eb470 566 // fill out some unwanted strings with zeroes..
567 if (type == DXT_BYTE && words[w][0] == '\''
568 && is_unwanted_sym(last_sym))
569 {
570 len = 0;
571 for (; w < wordc; w++) {
572 if (words[w][0] == '\'') {
573 p = words[w] + 1;
574 for (; *p && *p != '\''; p++)
575 len++;
576 }
577 else {
578 // assume encoded byte
579 len++;
580 }
581 }
582 fprintf(fout, ".skip %d", len);
583 goto fin;
584 }
585 else if (type == DXT_BYTE
efea2951 586 && (words[w][0] == '\''
587 || (w + 1 < wordc && words[w + 1][0] == '\'')))
588 {
054f95b2 589 // string; use asciz for most common case
590 if (w == wordc - 2 && IS(words[w + 1], "0")) {
591 fprintf(fout, ".asciz \"");
592 wordc--;
593 }
594 else
595 fprintf(fout, ".ascii \"");
596
597 for (; w < wordc; w++) {
598 if (words[w][0] == '\'') {
599 p = words[w] + 1;
600 p2 = strchr(p, '\'');
601 if (p2 == NULL)
602 aerr("unterminated string? '%s'\n", p);
603 memcpy(word, p, p2 - p);
604 word[p2 - p] = 0;
605 fprintf(fout, "%s", escape_string(word));
606 }
607 else {
608 val = parse_number(words[w]);
609 if (val & ~0xff)
610 aerr("bad string trailing byte?\n");
611 fprintf(fout, "\\x%02lx", val);
612 }
613 }
614 fprintf(fout, "\"");
615 goto fin;
616 }
617
618 if (w == wordc - 2) {
619 if (IS_START(words[w + 1], "dup(")) {
620 cnt = parse_number(words[w]);
621 p = words[w + 1] + 4;
622 p2 = strchr(p, ')');
623 if (p2 == NULL)
624 aerr("bad dup?\n");
625 memmove(word, p, p2 - p);
626 word[p2 - p] = 0;
36595fd2 627
628 val = 0;
629 if (!IS(word, "?"))
630 val = parse_number(word);
054f95b2 631
632 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
633 cnt, type_size(type), val);
634 goto fin;
635 }
636 }
637
638 if (type == DXT_DWORD && words[w][0] == '\''
639 && words[w][5] == '\'' && strlen(words[w]) == 6)
640 {
641 if (w != wordc - 1)
642 aerr("TODO\n");
643
644 p = words[w];
645 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
646 fprintf(fout, ".long 0x%lx", val);
b545ba7c 647 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
054f95b2 648 goto fin;
649 }
650
36595fd2 651 if (type >= DXT_DWORD && strchr(words[w], '.'))
054f95b2 652 {
653 if (w != wordc - 1)
654 aerr("TODO\n");
655
36595fd2 656 fprintf(fout, "%s %s", type_name_float(type), words[w]);
054f95b2 657 goto fin;
658 }
659
660 first = 1;
661 fprintf(fout, "%s ", type_name(type));
662 for (; w < wordc; w++)
663 {
664 if (!first)
665 fprintf(fout, ", ");
666
36595fd2 667 is_label = is_bss = 0;
668 if (w <= wordc - 2 && IS(words[w], "offset")) {
054f95b2 669 is_label = 1;
670 w++;
671 }
36595fd2 672 else if (IS(words[w], "?")) {
673 is_bss = 1;
674 }
054f95b2 675 else if (type == DXT_DWORD
676 && !('0' <= words[w][0] && words[w][0] <= '9'))
677 {
678 // assume label
679 is_label = 1;
680 }
681
36595fd2 682 if (is_bss) {
683 fprintf(fout, "0");
684 }
685 else if (is_label) {
054f95b2 686 p = words[w];
ddaf8bd7 687 if (IS_START(p, "loc_") || IS_START(p, "__imp")
688 || strchr(p, '?') || strchr(p, '@')
36595fd2 689 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
690 cmpstringp))
054f95b2 691 {
692 fprintf(fout, "0");
b545ba7c 693 snprintf(g_comment, sizeof(g_comment), "%s", p);
36595fd2 694 }
695 else {
c0050df6 696 pp = check_var(fhdr, sym, p);
aa1aa2c2 697 if (pp == NULL) {
698 fprintf(fout, "%s%s",
699 (no_decorations || p[0] == '_') ? "" : "_", p);
700 }
701 else {
702 if (no_decorations)
703 fprintf(fout, "%s", pp->name);
704 else
705 output_decorated_pp(fout, pp);
706 }
054f95b2 707 }
054f95b2 708 }
709 else {
710 val = parse_number(words[w]);
711 if (val < 10)
712 fprintf(fout, "%ld", val);
713 else
714 fprintf(fout, "0x%lx", val);
715 }
716
717 first = 0;
718 }
719
720fin:
b545ba7c 721 if (g_comment[0] != 0) {
722 fprintf(fout, "\t\t# %s", g_comment);
723 g_comment[0] = 0;
054f95b2 724 }
725 fprintf(fout, "\n");
054f95b2 726 }
727 }
728
36595fd2 729 fprintf(fout, "\n");
730
731 // dump public syms
732 for (i = 0; i < pub_sym_cnt; i++)
aa1aa2c2 733 fprintf(fout, ".global %s%s\n",
734 no_decorations ? "" : "_", pub_syms[i]);
36595fd2 735
054f95b2 736 fclose(fout);
737 fclose(fasm);
36595fd2 738 fclose(fhdr);
054f95b2 739
740 return 0;
741}
742
743// vim:ts=2:shiftwidth=2:expandtab