translate: some vararg improvements
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "my_assert.h"
15#include "my_str.h"
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
18#define IS(w, y) !strcmp(w, y)
19#define IS_START(w, y) !strncmp(w, y, strlen(y))
20
21#include "protoparse.h"
22
23static const char *asmfn;
24static int asmln;
25
26static const struct parsed_proto *g_func_sym_pp;
27static char g_comment[256];
28static int g_warn_cnt;
29static int g_cconv_novalidate;
30static int g_arm_mode;
31
32// note: must be in ascending order
33enum dx_type {
34 DXT_UNSPEC,
35 DXT_BYTE,
36 DXT_WORD,
37 DXT_DWORD,
38 DXT_QUAD,
39 DXT_TEN,
40};
41
42#define anote(fmt, ...) \
43 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
44#define awarn(fmt, ...) do { \
45 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
46 if (++g_warn_cnt == 10) { \
47 fcloseall(); \
48 exit(1); \
49 } \
50} while (0)
51#define aerr(fmt, ...) do { \
52 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
53 fcloseall(); \
54 exit(1); \
55} while (0)
56
57#include "masm_tools.h"
58
59static char *next_word_s(char *w, size_t wsize, char *s)
60{
61 int quote = 0;
62 size_t i;
63
64 s = sskip(s);
65
66 for (i = 0; i < wsize - 1; i++) {
67 if (s[i] == '\'')
68 quote ^= 1;
69 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
70 break;
71 w[i] = s[i];
72 }
73 w[i] = 0;
74
75 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
76 printf("warning: '%s' truncated\n", w);
77
78 return s + i;
79}
80
81static void next_section(FILE *fasm, char *name)
82{
83 char words[2][256];
84 char line[256];
85 int wordc;
86 char *p;
87
88 name[0] = 0;
89
90 while (fgets(line, sizeof(line), fasm))
91 {
92 wordc = 0;
93 asmln++;
94
95 p = sskip(line);
96 if (*p == 0)
97 continue;
98
99 if (*p == ';') {
100 while (strlen(line) == sizeof(line) - 1) {
101 // one of those long comment lines..
102 if (!fgets(line, sizeof(line), fasm))
103 break;
104 }
105 continue;
106 }
107
108 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
109 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
110 if (*p == 0 || *p == ';') {
111 wordc++;
112 break;
113 }
114 }
115
116 if (wordc < 2)
117 continue;
118
119 if (!IS(words[1], "segment"))
120 continue;
121
122 strcpy(name, words[0]);
123 break;
124 }
125}
126
127static enum dx_type parse_dx_directive(const char *name)
128{
129 if (IS(name, "dd"))
130 return DXT_DWORD;
131 if (IS(name, "dw"))
132 return DXT_WORD;
133 if (IS(name, "db"))
134 return DXT_BYTE;
135 if (IS(name, "dq"))
136 return DXT_QUAD;
137 if (IS(name, "dt"))
138 return DXT_TEN;
139
140 return DXT_UNSPEC;
141}
142
143static const char *type_name(enum dx_type type)
144{
145 switch (type) {
146 case DXT_BYTE:
147 return ".byte";
148 case DXT_WORD:
149 return ".hword";
150 case DXT_DWORD:
151 return ".long";
152 case DXT_QUAD:
153 return ".quad";
154 case DXT_TEN:
155 return ".tfloat";
156 case DXT_UNSPEC:
157 break;
158 }
159 return "<bad>";
160}
161
162static const char *type_name_float(enum dx_type type)
163{
164 switch (type) {
165 case DXT_DWORD:
166 return ".float";
167 case DXT_QUAD:
168 return ".double";
169 case DXT_TEN:
170 return ".tfloat";
171 default:
172 break;
173 }
174 return "<bad_float>";
175}
176
177static int type_size(enum dx_type type)
178{
179 switch (type) {
180 case DXT_BYTE:
181 return 1;
182 case DXT_WORD:
183 return 2;
184 case DXT_DWORD:
185 return 4;
186 case DXT_QUAD:
187 return 8;
188 case DXT_TEN:
189 return 10;
190 case DXT_UNSPEC:
191 break;
192 }
193 return -1;
194}
195
196static char *escape_string(char *s)
197{
198 char buf[256];
199 char *t = buf;
200
201 for (; *s != 0; s++) {
202 if (*s == '"') {
203 strcpy(t, "\\22");
204 t += strlen(t);
205 continue;
206 }
207 if (*s == '\\') {
208 strcpy(t, "\\\\");
209 t += strlen(t);
210 continue;
211 }
212 *t++ = *s;
213 }
214 *t = *s;
215 return strcpy(s, buf);
216}
217
218static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
219 size_t buf_size)
220{
221 char *p = buf;
222 size_t l;
223 int i;
224
225 if (pp->ret_type.is_ptr)
226 *p++ = 'p';
227 else if (IS(pp->ret_type.name, "void"))
228 *p++ = 'v';
229 else
230 *p++ = 'i';
231 *p++ = '(';
232 l = 2;
233
234 for (i = 0; i < pp->argc; i++) {
235 if (pp->arg[i].reg != NULL)
236 snprintf(buf + l, buf_size - l, "%s%s",
237 i == 0 ? "" : ",", pp->arg[i].reg);
238 else
239 snprintf(buf + l, buf_size - l, "%sa%d",
240 i == 0 ? "" : ",", i + 1);
241 l = strlen(buf);
242 }
243 snprintf(buf + l, buf_size - l, ")");
244}
245
246static const struct parsed_proto *check_var(FILE *fhdr,
247 const char *sym, const char *varname)
248{
249 const struct parsed_proto *pp, *pp_sym;
250 char fp_sym[256], fp_var[256];
251
252 pp = proto_parse(fhdr, varname, 1);
253 if (pp == NULL) {
254 if (IS_START(varname, "sub_"))
255 awarn("sub_ sym missing proto: '%s'\n", varname);
256 return NULL;
257 }
258
259 if (!pp->is_func && !pp->is_fptr)
260 return NULL;
261
262 pp_print(fp_var, sizeof(fp_var), pp);
263
264 if (pp->argc_reg == 0)
265 goto check_sym;
266 if (pp->argc_reg == 1 && pp->argc_stack == 0
267 && IS(pp->arg[0].reg, "ecx"))
268 {
269 goto check_sym;
270 }
271 if (!g_cconv_novalidate
272 && (pp->argc_reg != 2
273 || !IS(pp->arg[0].reg, "ecx")
274 || !IS(pp->arg[1].reg, "edx")))
275 {
276 awarn("unhandled reg call: %s\n", fp_var);
277 }
278
279check_sym:
280 sprint_pp_short(pp, g_comment, sizeof(g_comment));
281
282 if (sym != NULL) {
283 g_func_sym_pp = NULL;
284 pp_sym = proto_parse(fhdr, sym, 1);
285 if (pp_sym == NULL)
286 return pp;
287 if (!pp_sym->is_fptr)
288 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
289 g_func_sym_pp = pp_sym;
290 }
291 else {
292 pp_sym = g_func_sym_pp;
293 if (pp_sym == NULL)
294 return pp;
295 }
296
297 if (pp_cmp_func(pp, pp_sym)) {
298 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
299 anote("var: %s\n", fp_var);
300 anote("sym: %s\n", fp_sym);
301 awarn("^ mismatch\n");
302 }
303
304 return pp;
305}
306
307static void output_decorated_pp(FILE *fout,
308 const struct parsed_proto *pp)
309{
310 if (pp->name[0] != '_')
311 fprintf(fout, pp->is_fastcall ? "@" : "_");
312 fprintf(fout, "%s", pp->name);
313 if (pp->is_stdcall && pp->argc > 0)
314 fprintf(fout, "@%d", pp->argc * 4);
315}
316
317static int align_value(int src_val)
318{
319 if (src_val <= 0) {
320 awarn("bad align: %d\n", src_val);
321 src_val = 1;
322 }
323 if (!g_arm_mode)
324 return src_val;
325
326 return __builtin_ffs(src_val) - 1;
327}
328
329static int cmpstringp(const void *p1, const void *p2)
330{
331 return strcmp(*(char * const *)p1, *(char * const *)p2);
332}
333
334/* XXX: maybe move to external file? */
335static const char *unwanted_syms[] = {
336 "aRuntimeError",
337 "aTlossError",
338 "aSingError",
339 "aDomainError",
340 "aR6029ThisAppli",
341 "aR6028UnableToI",
342 "aR6027NotEnough",
343 "aR6026NotEnough",
344 "aR6025PureVirtu",
345 "aR6024NotEnough",
346 "aR6019UnableToO",
347 "aR6018Unexpecte",
348 "aR6017Unexpecte",
349 "aR6016NotEnough",
350 "aAbnormalProgra",
351 "aR6009NotEnough",
352 "aR6008NotEnough",
353 "aR6002FloatingP",
354 "aMicrosoftVisua",
355 "aRuntimeErrorPr",
356 "aThisApplicatio",
357 "aMicrosoftFindF",
358 "aMicrosoftOffic",
359};
360
361static int is_unwanted_sym(const char *sym)
362{
363 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
364 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
365}
366
367int main(int argc, char *argv[])
368{
369 FILE *fout, *fasm, *fhdr, *frlist;
370 const struct parsed_proto *pp;
371 int no_decorations = 0;
372 char comment_char = '#';
373 char words[20][256];
374 char word[256];
375 char line[256];
376 char last_sym[32];
377 unsigned long val;
378 unsigned long cnt;
379 const char *sym;
380 enum dx_type type;
381 char **pub_syms;
382 int pub_sym_cnt = 0;
383 int pub_sym_alloc;
384 char **rlist;
385 int rlist_cnt = 0;
386 int rlist_alloc;
387 int is_label;
388 int is_bss;
389 int wordc;
390 int first;
391 int arg_out;
392 int arg = 1;
393 int len;
394 int w, i;
395 char *p;
396 char *p2;
397
398 if (argc < 4) {
399 // -nd: no symbol decorations
400 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
401 argv[0]);
402 return 1;
403 }
404
405 for (arg = 1; arg < argc; arg++) {
406 if (IS(argv[arg], "-nd"))
407 no_decorations = 1;
408 else if (IS(argv[arg], "-i"))
409 g_cconv_novalidate = 1;
410 else if (IS(argv[arg], "-a")) {
411 comment_char = '@';
412 g_arm_mode = 1;
413 }
414 else
415 break;
416 }
417
418 arg_out = arg++;
419
420 asmfn = argv[arg++];
421 fasm = fopen(asmfn, "r");
422 my_assert_not(fasm, NULL);
423
424 hdrfn = argv[arg++];
425 fhdr = fopen(hdrfn, "r");
426 my_assert_not(fhdr, NULL);
427
428 fout = fopen(argv[arg_out], "w");
429 my_assert_not(fout, NULL);
430
431 pub_sym_alloc = 64;
432 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
433 my_assert_not(pub_syms, NULL);
434
435 rlist_alloc = 64;
436 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
437 my_assert_not(rlist, NULL);
438
439 for (; arg < argc; arg++) {
440 frlist = fopen(argv[arg], "r");
441 my_assert_not(frlist, NULL);
442
443 while (fgets(line, sizeof(line), frlist)) {
444 p = sskip(line);
445 if (*p == 0 || *p == ';')
446 continue;
447
448 p = next_word(words[0], sizeof(words[0]), p);
449 if (words[0][0] == 0)
450 continue;
451
452 if (rlist_cnt >= rlist_alloc) {
453 rlist_alloc = rlist_alloc * 2 + 64;
454 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
455 my_assert_not(rlist, NULL);
456 }
457 rlist[rlist_cnt++] = strdup(words[0]);
458 }
459
460 fclose(frlist);
461 frlist = NULL;
462 }
463
464 if (rlist_cnt > 0)
465 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
466
467 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
468 sizeof(unwanted_syms[0]), cmpstringp);
469
470 last_sym[0] = 0;
471
472 while (1) {
473 next_section(fasm, line);
474 if (feof(fasm))
475 break;
476 if (IS(line + 1, "text"))
477 continue;
478
479 if (IS(line + 1, "rdata"))
480 fprintf(fout, "\n.section .rodata\n");
481 else if (IS(line + 1, "data"))
482 fprintf(fout, "\n.data\n");
483 else
484 aerr("unhandled section: '%s'\n", line);
485
486 fprintf(fout, ".align %d\n", align_value(4));
487
488 while (fgets(line, sizeof(line), fasm))
489 {
490 sym = NULL;
491 asmln++;
492
493 p = sskip(line);
494 if (*p == 0)
495 continue;
496
497 if (*p == ';') {
498 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
499 // ;org is only seen at section start, so assume . addr 0
500 i &= 0xfff;
501 if (i != 0)
502 fprintf(fout, "\t\t .skip 0x%x\n", i);
503 }
504 continue;
505 }
506
507 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
508 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
509 if (*p == 0 || *p == ';') {
510 wordc++;
511 break;
512 }
513 if (*p == ',') {
514 p = sskip(p + 1);
515 }
516 }
517
518 if (*p == ';') {
519 p = sskip(p + 1);
520 if (IS_START(p, "sctclrtype"))
521 g_func_sym_pp = NULL;
522 }
523
524 if (wordc == 2 && IS(words[1], "ends"))
525 break;
526 if (wordc <= 2 && IS(words[0], "end"))
527 break;
528 if (wordc < 2)
529 aerr("unhandled: '%s'\n", words[0]);
530
531 // don't cares
532 if (IS(words[0], "assume"))
533 continue;
534
535 if (IS(words[0], "align")) {
536 val = parse_number(words[1]);
537 fprintf(fout, "\t\t .align %d", align_value(val));
538 goto fin;
539 }
540
541 w = 1;
542 type = parse_dx_directive(words[0]);
543 if (type == DXT_UNSPEC) {
544 type = parse_dx_directive(words[1]);
545 sym = words[0];
546 w = 2;
547 }
548 if (type == DXT_UNSPEC)
549 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
550
551 if (sym != NULL) {
552 snprintf(last_sym, sizeof(last_sym), "%s", sym);
553
554 pp = proto_parse(fhdr, sym, 1);
555 if (pp != NULL) {
556 g_func_sym_pp = NULL;
557
558 // public/global name
559 if (pub_sym_cnt >= pub_sym_alloc) {
560 pub_sym_alloc *= 2;
561 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
562 my_assert_not(pub_syms, NULL);
563 }
564 pub_syms[pub_sym_cnt++] = strdup(sym);
565 }
566
567 len = strlen(sym);
568 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
569
570 len += 2;
571 if (len < 8)
572 fprintf(fout, "\t");
573 if (len < 16)
574 fprintf(fout, "\t");
575 if (len <= 16)
576 fprintf(fout, " ");
577 else
578 fprintf(fout, " ");
579 }
580 else {
581 fprintf(fout, "\t\t ");
582 }
583
584 // fill out some unwanted strings with zeroes..
585 if (type == DXT_BYTE && words[w][0] == '\''
586 && is_unwanted_sym(last_sym))
587 {
588 len = 0;
589 for (; w < wordc; w++) {
590 if (words[w][0] == '\'') {
591 p = words[w] + 1;
592 for (; *p && *p != '\''; p++)
593 len++;
594 }
595 else {
596 // assume encoded byte
597 len++;
598 }
599 }
600 fprintf(fout, ".skip %d", len);
601 goto fin;
602 }
603 else if (type == DXT_BYTE
604 && (words[w][0] == '\''
605 || (w + 1 < wordc && words[w + 1][0] == '\'')))
606 {
607 // string; use asciz for most common case
608 if (w == wordc - 2 && IS(words[w + 1], "0")) {
609 fprintf(fout, ".asciz \"");
610 wordc--;
611 }
612 else
613 fprintf(fout, ".ascii \"");
614
615 for (; w < wordc; w++) {
616 if (words[w][0] == '\'') {
617 p = words[w] + 1;
618 p2 = strchr(p, '\'');
619 if (p2 == NULL)
620 aerr("unterminated string? '%s'\n", p);
621 memcpy(word, p, p2 - p);
622 word[p2 - p] = 0;
623 fprintf(fout, "%s", escape_string(word));
624 }
625 else {
626 val = parse_number(words[w]);
627 if (val & ~0xff)
628 aerr("bad string trailing byte?\n");
629 fprintf(fout, "\\x%02lx", val);
630 }
631 }
632 fprintf(fout, "\"");
633 goto fin;
634 }
635
636 if (w == wordc - 2) {
637 if (IS_START(words[w + 1], "dup(")) {
638 cnt = parse_number(words[w]);
639 p = words[w + 1] + 4;
640 p2 = strchr(p, ')');
641 if (p2 == NULL)
642 aerr("bad dup?\n");
643 memmove(word, p, p2 - p);
644 word[p2 - p] = 0;
645
646 val = 0;
647 if (!IS(word, "?"))
648 val = parse_number(word);
649
650 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
651 cnt, type_size(type), val);
652 goto fin;
653 }
654 }
655
656 if (type == DXT_DWORD && words[w][0] == '\''
657 && words[w][5] == '\'' && strlen(words[w]) == 6)
658 {
659 if (w != wordc - 1)
660 aerr("TODO\n");
661
662 p = words[w];
663 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
664 fprintf(fout, ".long 0x%lx", val);
665 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
666 goto fin;
667 }
668
669 if (type >= DXT_DWORD && strchr(words[w], '.'))
670 {
671 if (w != wordc - 1)
672 aerr("TODO\n");
673
674 if (g_arm_mode && type == DXT_TEN) {
675 fprintf(fout, ".fill 10");
676 snprintf(g_comment, sizeof(g_comment), "%s %s",
677 type_name_float(type), words[w]);
678 }
679 else
680 fprintf(fout, "%s %s", type_name_float(type), words[w]);
681 goto fin;
682 }
683
684 first = 1;
685 fprintf(fout, "%s ", type_name(type));
686 for (; w < wordc; w++)
687 {
688 if (!first)
689 fprintf(fout, ", ");
690
691 is_label = is_bss = 0;
692 if (w <= wordc - 2 && IS(words[w], "offset")) {
693 is_label = 1;
694 w++;
695 }
696 else if (IS(words[w], "?")) {
697 is_bss = 1;
698 }
699 else if (type == DXT_DWORD
700 && !('0' <= words[w][0] && words[w][0] <= '9'))
701 {
702 // assume label
703 is_label = 1;
704 }
705
706 if (is_bss) {
707 fprintf(fout, "0");
708 }
709 else if (is_label) {
710 p = words[w];
711 if (IS_START(p, "loc_") || IS_START(p, "__imp")
712 || strchr(p, '?') || strchr(p, '@')
713 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
714 cmpstringp))
715 {
716 fprintf(fout, "0");
717 snprintf(g_comment, sizeof(g_comment), "%s", p);
718 }
719 else {
720 pp = check_var(fhdr, sym, p);
721 if (pp == NULL) {
722 fprintf(fout, "%s%s",
723 (no_decorations || p[0] == '_') ? "" : "_", p);
724 }
725 else {
726 if (no_decorations)
727 fprintf(fout, "%s", pp->name);
728 else
729 output_decorated_pp(fout, pp);
730 }
731 }
732 }
733 else {
734 val = parse_number(words[w]);
735 if (val < 10)
736 fprintf(fout, "%ld", val);
737 else
738 fprintf(fout, "0x%lx", val);
739 }
740
741 first = 0;
742 }
743
744fin:
745 if (g_comment[0] != 0) {
746 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
747 g_comment[0] = 0;
748 }
749 fprintf(fout, "\n");
750 }
751 }
752
753 fprintf(fout, "\n");
754
755 // dump public syms
756 for (i = 0; i < pub_sym_cnt; i++)
757 fprintf(fout, ".global %s%s\n",
758 no_decorations ? "" : "_", pub_syms[i]);
759
760 fclose(fout);
761 fclose(fasm);
762 fclose(fhdr);
763
764 return 0;
765}
766
767// vim:ts=2:shiftwidth=2:expandtab