minor makefile update
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
17
18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
21static int g_cconv_novalidate;
22static int g_arm_mode;
23
24// note: must be in ascending order
25enum dx_type {
26 DXT_UNSPEC,
27 DXT_BYTE,
28 DXT_WORD,
29 DXT_DWORD,
30 DXT_QUAD,
31 DXT_TEN,
32};
33
34#define anote(fmt, ...) \
35 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
36#define awarn(fmt, ...) do { \
37 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
38 if (++g_warn_cnt == 10) { \
39 fcloseall(); \
40 exit(1); \
41 } \
42} while (0)
43#define aerr(fmt, ...) do { \
44 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 fcloseall(); \
46 exit(1); \
47} while (0)
48
49#include "masm_tools.h"
50
51static char *next_word_s(char *w, size_t wsize, char *s)
52{
53 int quote = 0;
54 size_t i;
55
56 s = sskip(s);
57
58 for (i = 0; i < wsize - 1; i++) {
59 if (s[i] == '\'')
60 quote ^= 1;
61 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
62 break;
63 w[i] = s[i];
64 }
65 w[i] = 0;
66
67 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
68 printf("warning: '%s' truncated\n", w);
69
70 return s + i;
71}
72
73static void next_section(FILE *fasm, char *name)
74{
75 char words[2][256];
76 char line[256];
77 int wordc;
78 char *p;
79
80 name[0] = 0;
81
82 while (fgets(line, sizeof(line), fasm))
83 {
84 wordc = 0;
85 asmln++;
86
87 p = sskip(line);
88 if (*p == 0)
89 continue;
90
91 if (*p == ';') {
92 while (strlen(line) == sizeof(line) - 1) {
93 // one of those long comment lines..
94 if (!fgets(line, sizeof(line), fasm))
95 break;
96 }
97 continue;
98 }
99
100 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
101 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
102 if (*p == 0 || *p == ';') {
103 wordc++;
104 break;
105 }
106 }
107
108 if (wordc < 2)
109 continue;
110
111 if (!IS(words[1], "segment"))
112 continue;
113
114 strcpy(name, words[0]);
115 break;
116 }
117}
118
119static enum dx_type parse_dx_directive(const char *name)
120{
121 if (IS(name, "dd"))
122 return DXT_DWORD;
123 if (IS(name, "dw"))
124 return DXT_WORD;
125 if (IS(name, "db"))
126 return DXT_BYTE;
127 if (IS(name, "dq"))
128 return DXT_QUAD;
129 if (IS(name, "dt"))
130 return DXT_TEN;
131
132 return DXT_UNSPEC;
133}
134
135static const char *type_name(enum dx_type type)
136{
137 switch (type) {
138 case DXT_BYTE:
139 return ".byte";
140 case DXT_WORD:
141 return ".hword";
142 case DXT_DWORD:
143 return ".long";
144 case DXT_QUAD:
145 return ".quad";
146 case DXT_TEN:
147 return ".tfloat";
148 case DXT_UNSPEC:
149 break;
150 }
151 return "<bad>";
152}
153
154static const char *type_name_float(enum dx_type type)
155{
156 switch (type) {
157 case DXT_DWORD:
158 return ".float";
159 case DXT_QUAD:
160 return ".double";
161 case DXT_TEN:
162 return ".tfloat";
163 default:
164 break;
165 }
166 return "<bad_float>";
167}
168
169static int type_size(enum dx_type type)
170{
171 switch (type) {
172 case DXT_BYTE:
173 return 1;
174 case DXT_WORD:
175 return 2;
176 case DXT_DWORD:
177 return 4;
178 case DXT_QUAD:
179 return 8;
180 case DXT_TEN:
181 return 10;
182 case DXT_UNSPEC:
183 break;
184 }
185 return -1;
186}
187
188static char *escape_string(char *s)
189{
190 char buf[256];
191 char *t = buf;
192
193 for (; *s != 0; s++) {
194 if (*s == '"') {
195 strcpy(t, "\\22");
196 t += strlen(t);
197 continue;
198 }
199 if (*s == '\\') {
200 strcpy(t, "\\\\");
201 t += strlen(t);
202 continue;
203 }
204 *t++ = *s;
205 }
206 *t = *s;
207 return strcpy(s, buf);
208}
209
210static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
211 size_t buf_size)
212{
213 char *p = buf;
214 size_t l;
215 int i;
216
217 if (pp->ret_type.is_ptr)
218 *p++ = 'p';
219 else if (IS(pp->ret_type.name, "void"))
220 *p++ = 'v';
221 else
222 *p++ = 'i';
223 *p++ = '(';
224 l = 2;
225
226 for (i = 0; i < pp->argc; i++) {
227 if (pp->arg[i].reg != NULL)
228 snprintf(buf + l, buf_size - l, "%s%s",
229 i == 0 ? "" : ",", pp->arg[i].reg);
230 else
231 snprintf(buf + l, buf_size - l, "%sa%d",
232 i == 0 ? "" : ",", i + 1);
233 l = strlen(buf);
234 }
235 snprintf(buf + l, buf_size - l, ")");
236}
237
238static const struct parsed_proto *check_var(FILE *fhdr,
239 const char *sym, const char *varname)
240{
241 const struct parsed_proto *pp, *pp_sym;
242 char fp_sym[256], fp_var[256];
243
244 pp = proto_parse(fhdr, varname, 1);
245 if (pp == NULL) {
246 if (IS_START(varname, "sub_"))
247 awarn("sub_ sym missing proto: '%s'\n", varname);
248 return NULL;
249 }
250
251 if (!pp->is_func && !pp->is_fptr)
252 return NULL;
253
254 pp_print(fp_var, sizeof(fp_var), pp);
255
256 if (pp->argc_reg == 0)
257 goto check_sym;
258 if (pp->argc_reg == 1 && pp->argc_stack == 0
259 && IS(pp->arg[0].reg, "ecx"))
260 {
261 goto check_sym;
262 }
263 if (!g_cconv_novalidate
264 && (pp->argc_reg != 2
265 || !IS(pp->arg[0].reg, "ecx")
266 || !IS(pp->arg[1].reg, "edx")))
267 {
268 awarn("unhandled reg call: %s\n", fp_var);
269 }
270
271check_sym:
272 sprint_pp_short(pp, g_comment, sizeof(g_comment));
273
274 if (sym != NULL) {
275 g_func_sym_pp = NULL;
276 pp_sym = proto_parse(fhdr, sym, 1);
277 if (pp_sym == NULL)
278 return pp;
279 if (!pp_sym->is_fptr)
280 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
281 g_func_sym_pp = pp_sym;
282 }
283 else {
284 pp_sym = g_func_sym_pp;
285 if (pp_sym == NULL)
286 return pp;
287 }
288
289 if (pp_cmp_func(pp, pp_sym)) {
290 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
291 anote("var: %s\n", fp_var);
292 anote("sym: %s\n", fp_sym);
293 awarn("^ mismatch\n");
294 }
295
296 return pp;
297}
298
299static void output_decorated_pp(FILE *fout,
300 const struct parsed_proto *pp)
301{
302 if (pp->name[0] != '_')
303 fprintf(fout, pp->is_fastcall ? "@" : "_");
304 fprintf(fout, "%s", pp->name);
305 if (pp->is_stdcall && pp->argc > 0)
306 fprintf(fout, "@%d", pp->argc * 4);
307}
308
309static int align_value(int src_val)
310{
311 if (src_val <= 0) {
312 awarn("bad align: %d\n", src_val);
313 src_val = 1;
314 }
315 if (!g_arm_mode)
316 return src_val;
317
318 return __builtin_ffs(src_val) - 1;
319}
320
321static int cmpstringp(const void *p1, const void *p2)
322{
323 return strcmp(*(char * const *)p1, *(char * const *)p2);
324}
325
326/* XXX: maybe move to external file? */
327static const char *unwanted_syms[] = {
328 "aRuntimeError",
329 "aTlossError",
330 "aSingError",
331 "aDomainError",
332 "aR6029ThisAppli",
333 "aR6028UnableToI",
334 "aR6027NotEnough",
335 "aR6026NotEnough",
336 "aR6025PureVirtu",
337 "aR6024NotEnough",
338 "aR6019UnableToO",
339 "aR6018Unexpecte",
340 "aR6017Unexpecte",
341 "aR6016NotEnough",
342 "aAbnormalProgra",
343 "aR6009NotEnough",
344 "aR6008NotEnough",
345 "aR6002FloatingP",
346 "aMicrosoftVisua",
347 "aRuntimeErrorPr",
348 "aThisApplicatio",
349 "aMicrosoftFindF",
350 "aMicrosoftOffic",
351};
352
353static int is_unwanted_sym(const char *sym)
354{
355 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
356 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
357}
358
359int main(int argc, char *argv[])
360{
361 FILE *fout, *fasm, *fhdr, *frlist;
362 const struct parsed_proto *pp;
363 int no_decorations = 0;
364 char comment_char = '#';
365 char words[20][256];
366 char word[256];
367 char line[256];
368 char last_sym[32];
369 unsigned long val;
370 unsigned long cnt;
371 const char *sym;
372 enum dx_type type;
373 char **pub_syms;
374 int pub_sym_cnt = 0;
375 int pub_sym_alloc;
376 char **rlist;
377 int rlist_cnt = 0;
378 int rlist_alloc;
379 int is_label;
380 int is_bss;
381 int wordc;
382 int first;
383 int arg_out;
384 int arg = 1;
385 int len;
386 int w, i;
387 char *p;
388 char *p2;
389
390 if (argc < 4) {
391 // -nd: no symbol decorations
392 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
393 argv[0]);
394 return 1;
395 }
396
397 for (arg = 1; arg < argc; arg++) {
398 if (IS(argv[arg], "-nd"))
399 no_decorations = 1;
400 else if (IS(argv[arg], "-i"))
401 g_cconv_novalidate = 1;
402 else if (IS(argv[arg], "-a")) {
403 comment_char = '@';
404 g_arm_mode = 1;
405 }
406 else
407 break;
408 }
409
410 arg_out = arg++;
411
412 asmfn = argv[arg++];
413 fasm = fopen(asmfn, "r");
414 my_assert_not(fasm, NULL);
415
416 hdrfn = argv[arg++];
417 fhdr = fopen(hdrfn, "r");
418 my_assert_not(fhdr, NULL);
419
420 fout = fopen(argv[arg_out], "w");
421 my_assert_not(fout, NULL);
422
423 pub_sym_alloc = 64;
424 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
425 my_assert_not(pub_syms, NULL);
426
427 rlist_alloc = 64;
428 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
429 my_assert_not(rlist, NULL);
430
431 for (; arg < argc; arg++) {
432 frlist = fopen(argv[arg], "r");
433 my_assert_not(frlist, NULL);
434
435 while (fgets(line, sizeof(line), frlist)) {
436 p = sskip(line);
437 if (*p == 0 || *p == ';')
438 continue;
439
440 p = next_word(words[0], sizeof(words[0]), p);
441 if (words[0][0] == 0)
442 continue;
443
444 if (rlist_cnt >= rlist_alloc) {
445 rlist_alloc = rlist_alloc * 2 + 64;
446 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
447 my_assert_not(rlist, NULL);
448 }
449 rlist[rlist_cnt++] = strdup(words[0]);
450 }
451
452 fclose(frlist);
453 frlist = NULL;
454 }
455
456 if (rlist_cnt > 0)
457 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
458
459 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
460 sizeof(unwanted_syms[0]), cmpstringp);
461
462 last_sym[0] = 0;
463
464 while (1) {
465 next_section(fasm, line);
466 if (feof(fasm))
467 break;
468 if (IS(line + 1, "text"))
469 continue;
470
471 if (IS(line + 1, "rdata"))
472 fprintf(fout, "\n.section .rodata\n");
473 else if (IS(line + 1, "data"))
474 fprintf(fout, "\n.data\n");
475 else
476 aerr("unhandled section: '%s'\n", line);
477
478 fprintf(fout, ".align %d\n", align_value(4));
479
480 while (fgets(line, sizeof(line), fasm))
481 {
482 sym = NULL;
483 asmln++;
484
485 p = sskip(line);
486 if (*p == 0)
487 continue;
488
489 if (*p == ';') {
490 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
491 // ;org is only seen at section start, so assume . addr 0
492 i &= 0xfff;
493 if (i != 0)
494 fprintf(fout, "\t\t .skip 0x%x\n", i);
495 }
496 continue;
497 }
498
499 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
500 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
501 if (*p == 0 || *p == ';') {
502 wordc++;
503 break;
504 }
505 if (*p == ',') {
506 p = sskip(p + 1);
507 }
508 }
509
510 if (*p == ';') {
511 p = sskip(p + 1);
512 if (IS_START(p, "sctclrtype"))
513 g_func_sym_pp = NULL;
514 }
515
516 if (wordc == 2 && IS(words[1], "ends"))
517 break;
518 if (wordc <= 2 && IS(words[0], "end"))
519 break;
520 if (wordc < 2)
521 aerr("unhandled: '%s'\n", words[0]);
522
523 // don't cares
524 if (IS(words[0], "assume"))
525 continue;
526
527 if (IS(words[0], "align")) {
528 val = parse_number(words[1]);
529 fprintf(fout, "\t\t .align %d", align_value(val));
530 goto fin;
531 }
532
533 w = 1;
534 type = parse_dx_directive(words[0]);
535 if (type == DXT_UNSPEC) {
536 type = parse_dx_directive(words[1]);
537 sym = words[0];
538 w = 2;
539 }
540 if (type == DXT_UNSPEC)
541 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
542
543 if (sym != NULL) {
544 snprintf(last_sym, sizeof(last_sym), "%s", sym);
545
546 pp = proto_parse(fhdr, sym, 1);
547 if (pp != NULL) {
548 g_func_sym_pp = NULL;
549
550 // public/global name
551 if (pub_sym_cnt >= pub_sym_alloc) {
552 pub_sym_alloc *= 2;
553 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
554 my_assert_not(pub_syms, NULL);
555 }
556 pub_syms[pub_sym_cnt++] = strdup(sym);
557 }
558
559 len = strlen(sym);
560 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
561
562 len += 2;
563 if (len < 8)
564 fprintf(fout, "\t");
565 if (len < 16)
566 fprintf(fout, "\t");
567 if (len <= 16)
568 fprintf(fout, " ");
569 else
570 fprintf(fout, " ");
571 }
572 else {
573 fprintf(fout, "\t\t ");
574 }
575
576 // fill out some unwanted strings with zeroes..
577 if (type == DXT_BYTE && words[w][0] == '\''
578 && is_unwanted_sym(last_sym))
579 {
580 len = 0;
581 for (; w < wordc; w++) {
582 if (words[w][0] == '\'') {
583 p = words[w] + 1;
584 for (; *p && *p != '\''; p++)
585 len++;
586 }
587 else {
588 // assume encoded byte
589 len++;
590 }
591 }
592 fprintf(fout, ".skip %d", len);
593 goto fin;
594 }
595 else if (type == DXT_BYTE
596 && (words[w][0] == '\''
597 || (w + 1 < wordc && words[w + 1][0] == '\'')))
598 {
599 // string; use asciz for most common case
600 if (w == wordc - 2 && IS(words[w + 1], "0")) {
601 fprintf(fout, ".asciz \"");
602 wordc--;
603 }
604 else
605 fprintf(fout, ".ascii \"");
606
607 for (; w < wordc; w++) {
608 if (words[w][0] == '\'') {
609 p = words[w] + 1;
610 p2 = strchr(p, '\'');
611 if (p2 == NULL)
612 aerr("unterminated string? '%s'\n", p);
613 memcpy(word, p, p2 - p);
614 word[p2 - p] = 0;
615 fprintf(fout, "%s", escape_string(word));
616 }
617 else {
618 val = parse_number(words[w]);
619 if (val & ~0xff)
620 aerr("bad string trailing byte?\n");
621 fprintf(fout, "\\x%02lx", val);
622 }
623 }
624 fprintf(fout, "\"");
625 goto fin;
626 }
627
628 if (w == wordc - 2) {
629 if (IS_START(words[w + 1], "dup(")) {
630 cnt = parse_number(words[w]);
631 p = words[w + 1] + 4;
632 p2 = strchr(p, ')');
633 if (p2 == NULL)
634 aerr("bad dup?\n");
635 memmove(word, p, p2 - p);
636 word[p2 - p] = 0;
637
638 val = 0;
639 if (!IS(word, "?"))
640 val = parse_number(word);
641
642 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
643 cnt, type_size(type), val);
644 goto fin;
645 }
646 }
647
648 if (type == DXT_DWORD && words[w][0] == '\''
649 && words[w][5] == '\'' && strlen(words[w]) == 6)
650 {
651 if (w != wordc - 1)
652 aerr("TODO\n");
653
654 p = words[w];
655 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
656 fprintf(fout, ".long 0x%lx", val);
657 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
658 goto fin;
659 }
660
661 if (type >= DXT_DWORD && strchr(words[w], '.'))
662 {
663 if (w != wordc - 1)
664 aerr("TODO\n");
665
666 if (g_arm_mode && type == DXT_TEN) {
667 fprintf(fout, ".fill 10");
668 snprintf(g_comment, sizeof(g_comment), "%s %s",
669 type_name_float(type), words[w]);
670 }
671 else
672 fprintf(fout, "%s %s", type_name_float(type), words[w]);
673 goto fin;
674 }
675
676 first = 1;
677 fprintf(fout, "%s ", type_name(type));
678 for (; w < wordc; w++)
679 {
680 if (!first)
681 fprintf(fout, ", ");
682
683 is_label = is_bss = 0;
684 if (w <= wordc - 2 && IS(words[w], "offset")) {
685 is_label = 1;
686 w++;
687 }
688 else if (IS(words[w], "?")) {
689 is_bss = 1;
690 }
691 else if (type == DXT_DWORD
692 && !('0' <= words[w][0] && words[w][0] <= '9'))
693 {
694 // assume label
695 is_label = 1;
696 }
697
698 if (is_bss) {
699 fprintf(fout, "0");
700 }
701 else if (is_label) {
702 p = words[w];
703 if (IS_START(p, "loc_") || IS_START(p, "__imp")
704 || strchr(p, '?') || strchr(p, '@')
705 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
706 cmpstringp))
707 {
708 fprintf(fout, "0");
709 snprintf(g_comment, sizeof(g_comment), "%s", p);
710 }
711 else {
712 pp = check_var(fhdr, sym, p);
713 if (pp == NULL) {
714 fprintf(fout, "%s%s",
715 (no_decorations || p[0] == '_') ? "" : "_", p);
716 }
717 else {
718 if (no_decorations)
719 fprintf(fout, "%s", pp->name);
720 else
721 output_decorated_pp(fout, pp);
722 }
723 }
724 }
725 else {
726 val = parse_number(words[w]);
727 if (val < 10)
728 fprintf(fout, "%ld", val);
729 else
730 fprintf(fout, "0x%lx", val);
731 }
732
733 first = 0;
734 }
735
736fin:
737 if (g_comment[0] != 0) {
738 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
739 g_comment[0] = 0;
740 }
741 fprintf(fout, "\n");
742 }
743 }
744
745 fprintf(fout, "\n");
746
747 // dump public syms
748 for (i = 0; i < pub_sym_cnt; i++)
749 fprintf(fout, ".global %s%s\n",
750 no_decorations ? "" : "_", pub_syms[i]);
751
752 fclose(fout);
753 fclose(fasm);
754 fclose(fhdr);
755
756 return 0;
757}
758
759// vim:ts=2:shiftwidth=2:expandtab