translate: start outputing after analysis, use define for userstack
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
17
18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
21static int g_cconv_novalidate;
22static int g_arm_mode;
23
24// note: must be in ascending order
25enum dx_type {
26 DXT_UNSPEC,
27 DXT_BYTE,
28 DXT_WORD,
29 DXT_DWORD,
30 DXT_QUAD,
31 DXT_TEN,
32};
33
34#define anote(fmt, ...) \
35 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
36#define awarn(fmt, ...) do { \
37 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
38 if (++g_warn_cnt == 10) { \
39 fcloseall(); \
40 exit(1); \
41 } \
42} while (0)
43#define aerr(fmt, ...) do { \
44 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 fcloseall(); \
46 exit(1); \
47} while (0)
48
49#include "masm_tools.h"
50
51static char *next_word_s(char *w, size_t wsize, char *s)
52{
53 int quote = 0;
54 size_t i;
55
56 s = sskip(s);
57
58 for (i = 0; i < wsize - 1; i++) {
59 if (s[i] == '\'')
60 quote ^= 1;
61 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
62 break;
63 w[i] = s[i];
64 }
65 w[i] = 0;
66
67 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
68 printf("warning: '%s' truncated\n", w);
69
70 return s + i;
71}
72
73static void next_section(FILE *fasm, char *name)
74{
75 char words[2][256];
76 char line[256];
77 int wordc;
78 char *p;
79
80 name[0] = 0;
81
82 while (fgets(line, sizeof(line), fasm))
83 {
84 wordc = 0;
85 asmln++;
86
87 p = sskip(line);
88 if (*p == 0)
89 continue;
90
91 if (*p == ';') {
92 while (strlen(line) == sizeof(line) - 1) {
93 // one of those long comment lines..
94 if (!fgets(line, sizeof(line), fasm))
95 break;
96 }
97 continue;
98 }
99
100 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
101 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
102 if (*p == 0 || *p == ';') {
103 wordc++;
104 break;
105 }
106 }
107
108 if (wordc < 2)
109 continue;
110
111 if (!IS(words[1], "segment"))
112 continue;
113
114 strcpy(name, words[0]);
115 break;
116 }
117}
118
119static enum dx_type parse_dx_directive(const char *name)
120{
121 if (IS(name, "dd"))
122 return DXT_DWORD;
123 if (IS(name, "dw"))
124 return DXT_WORD;
125 if (IS(name, "db"))
126 return DXT_BYTE;
127 if (IS(name, "dq"))
128 return DXT_QUAD;
129 if (IS(name, "dt"))
130 return DXT_TEN;
131
132 return DXT_UNSPEC;
133}
134
135static const char *type_name(enum dx_type type)
136{
137 switch (type) {
138 case DXT_BYTE:
139 return ".byte";
140 case DXT_WORD:
141 return ".word";
142 case DXT_DWORD:
143 return ".long";
144 case DXT_QUAD:
145 return ".quad";
146 case DXT_TEN:
147 return ".tfloat";
148 case DXT_UNSPEC:
149 break;
150 }
151 return "<bad>";
152}
153
154static const char *type_name_float(enum dx_type type)
155{
156 switch (type) {
157 case DXT_DWORD:
158 return ".float";
159 case DXT_QUAD:
160 return ".double";
161 case DXT_TEN:
162 return ".tfloat";
163 default:
164 break;
165 }
166 return "<bad_float>";
167}
168
169static int type_size(enum dx_type type)
170{
171 switch (type) {
172 case DXT_BYTE:
173 return 1;
174 case DXT_WORD:
175 return 2;
176 case DXT_DWORD:
177 return 4;
178 case DXT_QUAD:
179 return 8;
180 case DXT_TEN:
181 return 10;
182 case DXT_UNSPEC:
183 break;
184 }
185 return -1;
186}
187
188static char *escape_string(char *s)
189{
190 char buf[256];
191 char *t = buf;
192
193 for (; *s != 0; s++) {
194 if (*s == '"') {
195 strcpy(t, "\\22");
196 t += strlen(t);
197 continue;
198 }
199 if (*s == '\\') {
200 strcpy(t, "\\\\");
201 t += strlen(t);
202 continue;
203 }
204 *t++ = *s;
205 }
206 *t = *s;
207 return strcpy(s, buf);
208}
209
210static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
211 size_t buf_size)
212{
213 char *p = buf;
214 size_t l;
215 int i;
216
217 if (pp->ret_type.is_ptr)
218 *p++ = 'p';
219 else if (IS(pp->ret_type.name, "void"))
220 *p++ = 'v';
221 else
222 *p++ = 'i';
223 *p++ = '(';
224 l = 2;
225
226 for (i = 0; i < pp->argc; i++) {
227 if (pp->arg[i].reg != NULL)
228 snprintf(buf + l, buf_size - l, "%s%s",
229 i == 0 ? "" : ",", pp->arg[i].reg);
230 else
231 snprintf(buf + l, buf_size - l, "%sa%d",
232 i == 0 ? "" : ",", i + 1);
233 l = strlen(buf);
234 }
235 snprintf(buf + l, buf_size - l, ")");
236}
237
238static const struct parsed_proto *check_var(FILE *fhdr,
239 const char *sym, const char *varname)
240{
241 const struct parsed_proto *pp, *pp_sym;
242 char fp_sym[256], fp_var[256];
243 int i, bad = 0;
244
245 pp = proto_parse(fhdr, varname, 1);
246 if (pp == NULL) {
247 if (IS_START(varname, "sub_"))
248 awarn("sub_ sym missing proto: '%s'\n", varname);
249 return NULL;
250 }
251
252 if (!pp->is_func && !pp->is_fptr)
253 return NULL;
254
255 pp_print(fp_var, sizeof(fp_var), pp);
256
257 if (pp->argc_reg == 0)
258 goto check_sym;
259 if (pp->argc_reg == 1 && pp->argc_stack == 0
260 && IS(pp->arg[0].reg, "ecx"))
261 {
262 goto check_sym;
263 }
264 if (!g_cconv_novalidate
265 && (pp->argc_reg != 2
266 || !IS(pp->arg[0].reg, "ecx")
267 || !IS(pp->arg[1].reg, "edx")))
268 {
269 awarn("unhandled reg call: %s\n", fp_var);
270 }
271
272check_sym:
273 sprint_pp_short(pp, g_comment, sizeof(g_comment));
274
275 if (sym != NULL) {
276 g_func_sym_pp = NULL;
277 pp_sym = proto_parse(fhdr, sym, 1);
278 if (pp_sym == NULL)
279 return pp;
280 if (!pp_sym->is_fptr)
281 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
282 g_func_sym_pp = pp_sym;
283 }
284 else {
285 pp_sym = g_func_sym_pp;
286 if (pp_sym == NULL)
287 return pp;
288 }
289
290 if (pp->argc != pp_sym->argc || pp->argc_reg != pp_sym->argc_reg)
291 bad = 1;
292 else {
293 for (i = 0; i < pp->argc; i++) {
294 if ((pp->arg[i].reg != NULL) != (pp_sym->arg[i].reg != NULL)) {
295 bad = 1;
296 break;
297 }
298 if ((pp->arg[i].reg != NULL)
299 && !IS(pp->arg[i].reg, pp_sym->arg[i].reg))
300 {
301 bad = 1;
302 break;
303 }
304 }
305 }
306
307 if (bad) {
308 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
309 anote("var: %s\n", fp_var);
310 anote("sym: %s\n", fp_sym);
311 awarn("^ mismatch\n");
312 }
313
314 return pp;
315}
316
317static void output_decorated_pp(FILE *fout,
318 const struct parsed_proto *pp)
319{
320 if (pp->name[0] != '_')
321 fprintf(fout, pp->is_fastcall ? "@" : "_");
322 fprintf(fout, "%s", pp->name);
323 if (pp->is_stdcall && pp->argc > 0)
324 fprintf(fout, "@%d", pp->argc * 4);
325}
326
327static int align_value(int src_val)
328{
329 if (src_val <= 0) {
330 awarn("bad align: %d\n", src_val);
331 src_val = 1;
332 }
333 if (!g_arm_mode)
334 return src_val;
335
336 return __builtin_ffs(src_val) - 1;
337}
338
339static int cmpstringp(const void *p1, const void *p2)
340{
341 return strcmp(*(char * const *)p1, *(char * const *)p2);
342}
343
344/* XXX: maybe move to external file? */
345static const char *unwanted_syms[] = {
346 "aRuntimeError",
347 "aTlossError",
348 "aSingError",
349 "aDomainError",
350 "aR6029ThisAppli",
351 "aR6028UnableToI",
352 "aR6027NotEnough",
353 "aR6026NotEnough",
354 "aR6025PureVirtu",
355 "aR6024NotEnough",
356 "aR6019UnableToO",
357 "aR6018Unexpecte",
358 "aR6017Unexpecte",
359 "aR6016NotEnough",
360 "aAbnormalProgra",
361 "aR6009NotEnough",
362 "aR6008NotEnough",
363 "aR6002FloatingP",
364 "aMicrosoftVisua",
365 "aRuntimeErrorPr",
366 "aThisApplicatio",
367 "aMicrosoftFindF",
368 "aMicrosoftOffic",
369};
370
371static int is_unwanted_sym(const char *sym)
372{
373 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
374 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
375}
376
377int main(int argc, char *argv[])
378{
379 FILE *fout, *fasm, *fhdr, *frlist;
380 const struct parsed_proto *pp;
381 int no_decorations = 0;
382 char comment_char = '#';
383 char words[20][256];
384 char word[256];
385 char line[256];
386 char last_sym[32];
387 unsigned long val;
388 unsigned long cnt;
389 const char *sym;
390 enum dx_type type;
391 char **pub_syms;
392 int pub_sym_cnt = 0;
393 int pub_sym_alloc;
394 char **rlist;
395 int rlist_cnt = 0;
396 int rlist_alloc;
397 int is_label;
398 int is_bss;
399 int wordc;
400 int first;
401 int arg_out;
402 int arg = 1;
403 int len;
404 int w, i;
405 char *p;
406 char *p2;
407
408 if (argc < 4) {
409 // -nd: no symbol decorations
410 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
411 argv[0]);
412 return 1;
413 }
414
415 for (arg = 1; arg < argc; arg++) {
416 if (IS(argv[arg], "-nd"))
417 no_decorations = 1;
418 else if (IS(argv[arg], "-i"))
419 g_cconv_novalidate = 1;
420 else if (IS(argv[arg], "-a")) {
421 comment_char = '@';
422 g_arm_mode = 1;
423 }
424 else
425 break;
426 }
427
428 arg_out = arg++;
429
430 asmfn = argv[arg++];
431 fasm = fopen(asmfn, "r");
432 my_assert_not(fasm, NULL);
433
434 hdrfn = argv[arg++];
435 fhdr = fopen(hdrfn, "r");
436 my_assert_not(fhdr, NULL);
437
438 fout = fopen(argv[arg_out], "w");
439 my_assert_not(fout, NULL);
440
441 pub_sym_alloc = 64;
442 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
443 my_assert_not(pub_syms, NULL);
444
445 rlist_alloc = 64;
446 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
447 my_assert_not(rlist, NULL);
448
449 for (; arg < argc; arg++) {
450 frlist = fopen(argv[arg], "r");
451 my_assert_not(frlist, NULL);
452
453 while (fgets(line, sizeof(line), frlist)) {
454 p = sskip(line);
455 if (*p == 0 || *p == ';')
456 continue;
457
458 p = next_word(words[0], sizeof(words[0]), p);
459 if (words[0][0] == 0)
460 continue;
461
462 if (rlist_cnt >= rlist_alloc) {
463 rlist_alloc = rlist_alloc * 2 + 64;
464 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
465 my_assert_not(rlist, NULL);
466 }
467 rlist[rlist_cnt++] = strdup(words[0]);
468 }
469
470 fclose(frlist);
471 frlist = NULL;
472 }
473
474 if (rlist_cnt > 0)
475 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
476
477 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
478 sizeof(unwanted_syms[0]), cmpstringp);
479
480 last_sym[0] = 0;
481
482 while (1) {
483 next_section(fasm, line);
484 if (feof(fasm))
485 break;
486 if (IS(line + 1, "text"))
487 continue;
488
489 if (IS(line + 1, "rdata"))
490 fprintf(fout, "\n.section .rodata\n");
491 else if (IS(line + 1, "data"))
492 fprintf(fout, "\n.data\n");
493 else
494 aerr("unhandled section: '%s'\n", line);
495
496 fprintf(fout, ".align %d\n", align_value(4));
497
498 while (fgets(line, sizeof(line), fasm))
499 {
500 sym = NULL;
501 asmln++;
502
503 p = sskip(line);
504 if (*p == 0 || *p == ';')
505 continue;
506
507 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
508 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
509 if (*p == 0 || *p == ';') {
510 wordc++;
511 break;
512 }
513 if (*p == ',') {
514 p = sskip(p + 1);
515 }
516 }
517
518 if (*p == ';') {
519 p = sskip(p + 1);
520 if (IS_START(p, "sctclrtype"))
521 g_func_sym_pp = NULL;
522 }
523
524 if (wordc == 2 && IS(words[1], "ends"))
525 break;
526 if (wordc <= 2 && IS(words[0], "end"))
527 break;
528 if (wordc < 2)
529 aerr("unhandled: '%s'\n", words[0]);
530
531 // don't cares
532 if (IS(words[0], "assume"))
533 continue;
534
535 if (IS(words[0], "align")) {
536 val = parse_number(words[1]);
537 fprintf(fout, "\t\t .align %d", align_value(val));
538 goto fin;
539 }
540
541 w = 1;
542 type = parse_dx_directive(words[0]);
543 if (type == DXT_UNSPEC) {
544 type = parse_dx_directive(words[1]);
545 sym = words[0];
546 w = 2;
547 }
548 if (type == DXT_UNSPEC)
549 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
550
551 if (sym != NULL) {
552 snprintf(last_sym, sizeof(last_sym), "%s", sym);
553
554 pp = proto_parse(fhdr, sym, 1);
555 if (pp != NULL) {
556 g_func_sym_pp = NULL;
557
558 // public/global name
559 if (pub_sym_cnt >= pub_sym_alloc) {
560 pub_sym_alloc *= 2;
561 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
562 my_assert_not(pub_syms, NULL);
563 }
564 pub_syms[pub_sym_cnt++] = strdup(sym);
565 }
566
567 len = strlen(sym);
568 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
569
570 len += 2;
571 if (len < 8)
572 fprintf(fout, "\t");
573 if (len < 16)
574 fprintf(fout, "\t");
575 if (len <= 16)
576 fprintf(fout, " ");
577 else
578 fprintf(fout, " ");
579 }
580 else {
581 fprintf(fout, "\t\t ");
582 }
583
584 // fill out some unwanted strings with zeroes..
585 if (type == DXT_BYTE && words[w][0] == '\''
586 && is_unwanted_sym(last_sym))
587 {
588 len = 0;
589 for (; w < wordc; w++) {
590 if (words[w][0] == '\'') {
591 p = words[w] + 1;
592 for (; *p && *p != '\''; p++)
593 len++;
594 }
595 else {
596 // assume encoded byte
597 len++;
598 }
599 }
600 fprintf(fout, ".skip %d", len);
601 goto fin;
602 }
603 else if (type == DXT_BYTE
604 && (words[w][0] == '\''
605 || (w + 1 < wordc && words[w + 1][0] == '\'')))
606 {
607 // string; use asciz for most common case
608 if (w == wordc - 2 && IS(words[w + 1], "0")) {
609 fprintf(fout, ".asciz \"");
610 wordc--;
611 }
612 else
613 fprintf(fout, ".ascii \"");
614
615 for (; w < wordc; w++) {
616 if (words[w][0] == '\'') {
617 p = words[w] + 1;
618 p2 = strchr(p, '\'');
619 if (p2 == NULL)
620 aerr("unterminated string? '%s'\n", p);
621 memcpy(word, p, p2 - p);
622 word[p2 - p] = 0;
623 fprintf(fout, "%s", escape_string(word));
624 }
625 else {
626 val = parse_number(words[w]);
627 if (val & ~0xff)
628 aerr("bad string trailing byte?\n");
629 fprintf(fout, "\\x%02lx", val);
630 }
631 }
632 fprintf(fout, "\"");
633 goto fin;
634 }
635
636 if (w == wordc - 2) {
637 if (IS_START(words[w + 1], "dup(")) {
638 cnt = parse_number(words[w]);
639 p = words[w + 1] + 4;
640 p2 = strchr(p, ')');
641 if (p2 == NULL)
642 aerr("bad dup?\n");
643 memmove(word, p, p2 - p);
644 word[p2 - p] = 0;
645
646 val = 0;
647 if (!IS(word, "?"))
648 val = parse_number(word);
649
650 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
651 cnt, type_size(type), val);
652 goto fin;
653 }
654 }
655
656 if (type == DXT_DWORD && words[w][0] == '\''
657 && words[w][5] == '\'' && strlen(words[w]) == 6)
658 {
659 if (w != wordc - 1)
660 aerr("TODO\n");
661
662 p = words[w];
663 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
664 fprintf(fout, ".long 0x%lx", val);
665 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
666 goto fin;
667 }
668
669 if (type >= DXT_DWORD && strchr(words[w], '.'))
670 {
671 if (w != wordc - 1)
672 aerr("TODO\n");
673
674 if (g_arm_mode && type == DXT_TEN) {
675 fprintf(fout, ".fill 10");
676 snprintf(g_comment, sizeof(g_comment), "%s %s",
677 type_name_float(type), words[w]);
678 }
679 else
680 fprintf(fout, "%s %s", type_name_float(type), words[w]);
681 goto fin;
682 }
683
684 first = 1;
685 fprintf(fout, "%s ", type_name(type));
686 for (; w < wordc; w++)
687 {
688 if (!first)
689 fprintf(fout, ", ");
690
691 is_label = is_bss = 0;
692 if (w <= wordc - 2 && IS(words[w], "offset")) {
693 is_label = 1;
694 w++;
695 }
696 else if (IS(words[w], "?")) {
697 is_bss = 1;
698 }
699 else if (type == DXT_DWORD
700 && !('0' <= words[w][0] && words[w][0] <= '9'))
701 {
702 // assume label
703 is_label = 1;
704 }
705
706 if (is_bss) {
707 fprintf(fout, "0");
708 }
709 else if (is_label) {
710 p = words[w];
711 if (IS_START(p, "loc_") || IS_START(p, "__imp")
712 || strchr(p, '?') || strchr(p, '@')
713 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
714 cmpstringp))
715 {
716 fprintf(fout, "0");
717 snprintf(g_comment, sizeof(g_comment), "%s", p);
718 }
719 else {
720 pp = check_var(fhdr, sym, p);
721 if (pp == NULL) {
722 fprintf(fout, "%s%s",
723 (no_decorations || p[0] == '_') ? "" : "_", p);
724 }
725 else {
726 if (no_decorations)
727 fprintf(fout, "%s", pp->name);
728 else
729 output_decorated_pp(fout, pp);
730 }
731 }
732 }
733 else {
734 val = parse_number(words[w]);
735 if (val < 10)
736 fprintf(fout, "%ld", val);
737 else
738 fprintf(fout, "0x%lx", val);
739 }
740
741 first = 0;
742 }
743
744fin:
745 if (g_comment[0] != 0) {
746 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
747 g_comment[0] = 0;
748 }
749 fprintf(fout, "\n");
750 }
751 }
752
753 fprintf(fout, "\n");
754
755 // dump public syms
756 for (i = 0; i < pub_sym_cnt; i++)
757 fprintf(fout, ".global %s%s\n",
758 no_decorations ? "" : "_", pub_syms[i]);
759
760 fclose(fout);
761 fclose(fasm);
762 fclose(fhdr);
763
764 return 0;
765}
766
767// vim:ts=2:shiftwidth=2:expandtab