get rid of some duplication
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <stdint.h>
14#include <inttypes.h>
15
16#include "my_assert.h"
17#include "my_str.h"
18#include "common.h"
19
20#include "protoparse.h"
21
22static const char *asmfn;
23static int asmln;
24
25static const struct parsed_proto *g_func_sym_pp;
26static char g_comment[256];
27static int g_warn_cnt;
28static int g_cconv_novalidate;
29static int g_arm_mode;
30
31// note: must be in ascending order
32enum dx_type {
33 DXT_UNSPEC,
34 DXT_BYTE,
35 DXT_WORD,
36 DXT_DWORD,
37 DXT_QUAD,
38 DXT_TEN,
39};
40
41#define anote(fmt, ...) \
42 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
43#define awarn(fmt, ...) do { \
44 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 if (++g_warn_cnt == 10) { \
46 fcloseall(); \
47 exit(1); \
48 } \
49} while (0)
50#define aerr(fmt, ...) do { \
51 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
52 fcloseall(); \
53 exit(1); \
54} while (0)
55
56#include "masm_tools.h"
57
58static char *next_word_s(char *w, size_t wsize, char *s)
59{
60 int quote = 0;
61 size_t i;
62
63 s = sskip(s);
64
65 for (i = 0; i < wsize - 1; i++) {
66 if (s[i] == '\'')
67 quote ^= 1;
68 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
69 break;
70 w[i] = s[i];
71 }
72 w[i] = 0;
73
74 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
75 printf("warning: '%s' truncated\n", w);
76
77 return s + i;
78}
79
80static void next_section(FILE *fasm, char *name)
81{
82 char words[2][256];
83 char line[256];
84 int wordc;
85 char *p;
86
87 name[0] = 0;
88
89 while (my_fgets(line, sizeof(line), fasm))
90 {
91 wordc = 0;
92 asmln++;
93
94 p = sskip(line);
95 if (*p == 0)
96 continue;
97
98 if (*p == ';')
99 continue;
100
101 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
102 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
103 if (*p == 0 || *p == ';') {
104 wordc++;
105 break;
106 }
107 }
108
109 if (wordc < 2)
110 continue;
111
112 if (!IS(words[1], "segment"))
113 continue;
114
115 strcpy(name, words[0]);
116 break;
117 }
118}
119
120static enum dx_type parse_dx_directive(const char *name)
121{
122 if (IS(name, "dd"))
123 return DXT_DWORD;
124 if (IS(name, "dw"))
125 return DXT_WORD;
126 if (IS(name, "db"))
127 return DXT_BYTE;
128 if (IS(name, "dq"))
129 return DXT_QUAD;
130 if (IS(name, "dt"))
131 return DXT_TEN;
132
133 return DXT_UNSPEC;
134}
135
136static const char *type_name(enum dx_type type)
137{
138 switch (type) {
139 case DXT_BYTE:
140 return ".byte";
141 case DXT_WORD:
142 return ".hword";
143 case DXT_DWORD:
144 return ".long";
145 case DXT_QUAD:
146 return ".quad";
147 case DXT_TEN:
148 return ".tfloat";
149 case DXT_UNSPEC:
150 break;
151 }
152 return "<bad>";
153}
154
155static const char *type_name_float(enum dx_type type)
156{
157 switch (type) {
158 case DXT_DWORD:
159 return ".float";
160 case DXT_QUAD:
161 return ".double";
162 case DXT_TEN:
163 return ".tfloat";
164 default:
165 break;
166 }
167 return "<bad_float>";
168}
169
170static int type_size(enum dx_type type)
171{
172 switch (type) {
173 case DXT_BYTE:
174 return 1;
175 case DXT_WORD:
176 return 2;
177 case DXT_DWORD:
178 return 4;
179 case DXT_QUAD:
180 return 8;
181 case DXT_TEN:
182 return 10;
183 case DXT_UNSPEC:
184 break;
185 }
186 return -1;
187}
188
189static char *escape_string(char *s)
190{
191 char buf[256];
192 char *t = buf;
193
194 for (; *s != 0; s++) {
195 if (*s == '"') {
196 strcpy(t, "\\\"");
197 t += strlen(t);
198 continue;
199 }
200 if (*s == '\\') {
201 strcpy(t, "\\\\");
202 t += strlen(t);
203 continue;
204 }
205 *t++ = *s;
206 }
207 *t++ = *s;
208 if (t - buf > sizeof(buf))
209 aerr("string is too long\n");
210 return strcpy(s, buf);
211}
212
213static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
214 size_t buf_size)
215{
216 char *p = buf;
217 size_t l;
218 int i;
219
220 if (pp->ret_type.is_ptr)
221 *p++ = 'p';
222 else if (IS(pp->ret_type.name, "void"))
223 *p++ = 'v';
224 else
225 *p++ = 'i';
226 *p++ = '(';
227 l = 2;
228
229 for (i = 0; i < pp->argc; i++) {
230 if (pp->arg[i].reg != NULL)
231 snprintf(buf + l, buf_size - l, "%s%s",
232 i == 0 ? "" : ",", pp->arg[i].reg);
233 else
234 snprintf(buf + l, buf_size - l, "%sa%d",
235 i == 0 ? "" : ",", i + 1);
236 l = strlen(buf);
237 }
238 snprintf(buf + l, buf_size - l, ")");
239}
240
241static const struct parsed_proto *check_var(FILE *fhdr,
242 const char *sym, const char *varname)
243{
244 const struct parsed_proto *pp, *pp_sym;
245 char fp_sym[256], fp_var[256], *p;
246 int i;
247
248 pp = proto_parse(fhdr, varname, 1);
249 if (pp == NULL) {
250 if (IS_START(varname, "sub_"))
251 awarn("sub_ sym missing proto: '%s'\n", varname);
252 return NULL;
253 }
254
255 if (!pp->is_func && !pp->is_fptr)
256 return NULL;
257
258 pp_print(fp_var, sizeof(fp_var), pp);
259
260 if (pp->argc_reg == 0)
261 goto check_sym;
262 if (pp->argc_reg == 1 && pp->argc_stack == 0
263 && IS(pp->arg[0].reg, "ecx"))
264 {
265 goto check_sym;
266 }
267 if (!g_cconv_novalidate
268 && (pp->argc_reg != 2
269 || !IS(pp->arg[0].reg, "ecx")
270 || !IS(pp->arg[1].reg, "edx")))
271 {
272 awarn("unhandled reg call: %s\n", fp_var);
273 }
274
275check_sym:
276 // fptrs must use 32bit args, callsite might have no information and
277 // lack a cast to smaller types, which results in incorrectly masked
278 // args passed (callee may assume masked args, it does on ARM)
279 for (i = 0; i < pp->argc; i++) {
280 if (pp->arg[i].type.is_ptr)
281 continue;
282 p = pp->arg[i].type.name;
283 if (strstr(p, "int8") || strstr(p, "int16")
284 || strstr(p, "char") || strstr(p, "short"))
285 {
286 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
287 }
288 }
289
290 sprint_pp_short(pp, g_comment, sizeof(g_comment));
291
292 if (sym != NULL) {
293 g_func_sym_pp = NULL;
294 pp_sym = proto_parse(fhdr, sym, 1);
295 if (pp_sym == NULL)
296 return pp;
297 if (!pp_sym->is_fptr)
298 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
299 g_func_sym_pp = pp_sym;
300 }
301 else {
302 pp_sym = g_func_sym_pp;
303 if (pp_sym == NULL)
304 return pp;
305 }
306
307 if (pp_cmp_func(pp, pp_sym)) {
308 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
309 anote("var: %s\n", fp_var);
310 anote("sym: %s\n", fp_sym);
311 awarn("^ mismatch\n");
312 }
313
314 return pp;
315}
316
317static void output_decorated_pp(FILE *fout,
318 const struct parsed_proto *pp)
319{
320 if (pp->name[0] != '_')
321 fprintf(fout, pp->is_fastcall ? "@" : "_");
322 fprintf(fout, "%s", pp->name);
323 if (pp->is_stdcall && pp->argc > 0)
324 fprintf(fout, "@%d", pp->argc * 4);
325}
326
327static int align_value(int src_val)
328{
329 if (src_val <= 0) {
330 awarn("bad align: %d\n", src_val);
331 src_val = 1;
332 }
333 if (!g_arm_mode)
334 return src_val;
335
336 return __builtin_ffs(src_val) - 1;
337}
338
339static int cmpstringp(const void *p1, const void *p2)
340{
341 return strcmp(*(char * const *)p1, *(char * const *)p2);
342}
343
344/* XXX: maybe move to external file? */
345static const char *unwanted_syms[] = {
346 "aRuntimeError",
347 "aTlossError",
348 "aSingError",
349 "aDomainError",
350 "aR6029ThisAppli",
351 "aR6028UnableToI",
352 "aR6027NotEnough",
353 "aR6026NotEnough",
354 "aR6025PureVirtu",
355 "aR6024NotEnough",
356 "aR6019UnableToO",
357 "aR6018Unexpecte",
358 "aR6017Unexpecte",
359 "aR6016NotEnough",
360 "aAbnormalProgra",
361 "aR6009NotEnough",
362 "aR6008NotEnough",
363 "aR6002FloatingP",
364 "aMicrosoftVisua",
365 "aRuntimeErrorPr",
366 "aThisApplicatio",
367 "aMicrosoftFindF",
368 "aMicrosoftOffic",
369};
370
371static int is_unwanted_sym(const char *sym)
372{
373 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
374 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
375}
376
377int main(int argc, char *argv[])
378{
379 FILE *fout, *fasm, *fhdr = NULL, *frlist;
380 const struct parsed_proto *pp;
381 int no_decorations = 0;
382 char comment_char = '#';
383 char words[20][256];
384 char word[256];
385 char line[256];
386 char last_sym[32];
387 unsigned long val;
388 unsigned long cnt;
389 uint64_t val64;
390 const char *sym;
391 enum dx_type type;
392 char **pub_syms;
393 int pub_sym_cnt = 0;
394 int pub_sym_alloc;
395 char **rlist;
396 int rlist_cnt = 0;
397 int rlist_alloc;
398 int header_mode = 0;
399 int is_ro = 0;
400 int is_label;
401 int is_bss;
402 int wordc;
403 int first;
404 int arg_out;
405 int arg = 1;
406 int len;
407 int w, i;
408 char *p;
409 char *p2;
410
411 if (argc < 4) {
412 // -nd: no symbol decorations
413 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n"
414 "%s -hdr <.h> <.asm>\n",
415 argv[0], argv[0]);
416 return 1;
417 }
418
419 for (arg = 1; arg < argc; arg++) {
420 if (IS(argv[arg], "-nd"))
421 no_decorations = 1;
422 else if (IS(argv[arg], "-i"))
423 g_cconv_novalidate = 1;
424 else if (IS(argv[arg], "-a")) {
425 comment_char = '@';
426 g_arm_mode = 1;
427 }
428 else if (IS(argv[arg], "-hdr"))
429 header_mode = 1;
430 else
431 break;
432 }
433
434 arg_out = arg++;
435
436 asmfn = argv[arg++];
437 fasm = fopen(asmfn, "r");
438 my_assert_not(fasm, NULL);
439
440 if (!header_mode) {
441 hdrfn = argv[arg++];
442 fhdr = fopen(hdrfn, "r");
443 my_assert_not(fhdr, NULL);
444 }
445
446 fout = fopen(argv[arg_out], "w");
447 my_assert_not(fout, NULL);
448
449 pub_sym_alloc = 64;
450 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
451 my_assert_not(pub_syms, NULL);
452
453 rlist_alloc = 64;
454 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
455 my_assert_not(rlist, NULL);
456
457 for (; arg < argc; arg++) {
458 frlist = fopen(argv[arg], "r");
459 my_assert_not(frlist, NULL);
460
461 while (my_fgets(line, sizeof(line), frlist)) {
462 p = sskip(line);
463 if (*p == 0 || *p == ';')
464 continue;
465
466 p = next_word(words[0], sizeof(words[0]), p);
467 if (words[0][0] == 0)
468 continue;
469
470 if (rlist_cnt >= rlist_alloc) {
471 rlist_alloc = rlist_alloc * 2 + 64;
472 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
473 my_assert_not(rlist, NULL);
474 }
475 rlist[rlist_cnt++] = strdup(words[0]);
476 }
477
478 fclose(frlist);
479 frlist = NULL;
480 }
481
482 if (rlist_cnt > 0)
483 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
484
485 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
486 sizeof(unwanted_syms[0]), cmpstringp);
487
488 last_sym[0] = 0;
489
490 while (1) {
491 next_section(fasm, line);
492 if (feof(fasm))
493 break;
494 if (IS(line + 1, "text"))
495 continue;
496
497 if (IS(line + 1, "rdata")) {
498 is_ro = 1;
499 if (!header_mode)
500 fprintf(fout, "\n.section .rodata\n");
501 }
502 else if (IS(line + 1, "data")) {
503 is_ro = 0;
504 if (!header_mode)
505 fprintf(fout, "\n.data\n");
506 }
507 else
508 aerr("unhandled section: '%s'\n", line);
509
510 if (!header_mode)
511 fprintf(fout, ".align %d\n", align_value(4));
512
513 while (my_fgets(line, sizeof(line), fasm))
514 {
515 sym = NULL;
516 asmln++;
517
518 p = sskip(line);
519 if (*p == 0)
520 continue;
521
522 if (*p == ';') {
523 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
524 // ;org is only seen at section start, so assume . addr 0
525 i &= 0xfff;
526 if (i != 0 && !header_mode)
527 fprintf(fout, "\t\t .skip 0x%x\n", i);
528 }
529 continue;
530 }
531
532 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
533 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
534 if (*p == 0 || *p == ';') {
535 wordc++;
536 break;
537 }
538 if (*p == ',') {
539 p = sskip(p + 1);
540 }
541 }
542
543 if (*p == ';') {
544 p = sskip(p + 1);
545 if (IS_START(p, "sctclrtype"))
546 g_func_sym_pp = NULL;
547 }
548
549 if (wordc == 2 && IS(words[1], "ends"))
550 break;
551 if (wordc <= 2 && IS(words[0], "end"))
552 break;
553 if (wordc < 2)
554 aerr("unhandled: '%s'\n", words[0]);
555
556 // don't cares
557 if (IS(words[0], "assume"))
558 continue;
559
560 if (IS(words[0], "align")) {
561 if (header_mode)
562 continue;
563
564 val = parse_number(words[1], 0);
565 fprintf(fout, "\t\t .align %d", align_value(val));
566 goto fin;
567 }
568
569 w = 1;
570 type = parse_dx_directive(words[0]);
571 if (type == DXT_UNSPEC) {
572 type = parse_dx_directive(words[1]);
573 sym = words[0];
574 w = 2;
575 }
576 if (type == DXT_UNSPEC)
577 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
578
579 if (sym != NULL)
580 {
581 if (header_mode) {
582 int is_str = 0;
583
584 fprintf(fout, "extern ");
585 if (is_ro)
586 fprintf(fout, "const ");
587
588 switch (type) {
589 case DXT_BYTE:
590 for (i = w; i < wordc; i++)
591 if (words[i][0] == '\'')
592 is_str = 1;
593 if (is_str)
594 fprintf(fout, "char %s[];\n", sym);
595 else
596 fprintf(fout, "uint8_t %s;\n", sym);
597 break;
598
599 case DXT_WORD:
600 fprintf(fout, "uint16_t %s;\n", sym);
601 break;
602
603 case DXT_DWORD:
604 fprintf(fout, "uint32_t %s;\n", sym);
605 break;
606
607 default:
608 fprintf(fout, "_UNKNOWN %s;\n", sym);
609 break;
610 }
611
612 continue;
613 }
614
615 snprintf(last_sym, sizeof(last_sym), "%s", sym);
616
617 pp = proto_parse(fhdr, sym, 1);
618 if (pp != NULL) {
619 g_func_sym_pp = NULL;
620
621 // public/global name
622 if (pub_sym_cnt >= pub_sym_alloc) {
623 pub_sym_alloc *= 2;
624 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
625 my_assert_not(pub_syms, NULL);
626 }
627 pub_syms[pub_sym_cnt++] = strdup(sym);
628 }
629
630 len = strlen(sym);
631 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
632
633 len += 2;
634 if (len < 8)
635 fprintf(fout, "\t");
636 if (len < 16)
637 fprintf(fout, "\t");
638 if (len <= 16)
639 fprintf(fout, " ");
640 else
641 fprintf(fout, " ");
642 }
643 else {
644 if (header_mode)
645 continue;
646
647 fprintf(fout, "\t\t ");
648 }
649
650 // fill out some unwanted strings with zeroes..
651 if (type == DXT_BYTE && words[w][0] == '\''
652 && is_unwanted_sym(last_sym))
653 {
654 len = 0;
655 for (; w < wordc; w++) {
656 if (words[w][0] == '\'') {
657 p = words[w] + 1;
658 for (; *p && *p != '\''; p++)
659 len++;
660 }
661 else {
662 // assume encoded byte
663 len++;
664 }
665 }
666 fprintf(fout, ".skip %d", len);
667 goto fin;
668 }
669 else if (type == DXT_BYTE
670 && (words[w][0] == '\''
671 || (w + 1 < wordc && words[w + 1][0] == '\'')))
672 {
673 // string; use asciz for most common case
674 if (w == wordc - 2 && IS(words[w + 1], "0")) {
675 fprintf(fout, ".asciz \"");
676 wordc--;
677 }
678 else
679 fprintf(fout, ".ascii \"");
680
681 for (; w < wordc; w++) {
682 if (words[w][0] == '\'') {
683 p = words[w] + 1;
684 p2 = strchr(p, '\'');
685 if (p2 == NULL)
686 aerr("unterminated string? '%s'\n", p);
687 memcpy(word, p, p2 - p);
688 word[p2 - p] = 0;
689 fprintf(fout, "%s", escape_string(word));
690 }
691 else {
692 val = parse_number(words[w], 0);
693 if (val & ~0xff)
694 aerr("bad string trailing byte?\n");
695 // unfortunately \xHH is unusable - gas interprets
696 // things like \x27b as 0x7b, so have to use octal here
697 fprintf(fout, "\\%03lo", val);
698 }
699 }
700 fprintf(fout, "\"");
701 goto fin;
702 }
703
704 if (w == wordc - 2) {
705 if (IS_START(words[w + 1], "dup(")) {
706 cnt = parse_number(words[w], 0);
707 p = words[w + 1] + 4;
708 p2 = strchr(p, ')');
709 if (p2 == NULL)
710 aerr("bad dup?\n");
711 memmove(word, p, p2 - p);
712 word[p2 - p] = 0;
713
714 val = 0;
715 if (!IS(word, "?"))
716 val = parse_number(word, 0);
717
718 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
719 cnt, type_size(type), val);
720 goto fin;
721 }
722 }
723
724 if (type == DXT_DWORD && words[w][0] == '\''
725 && words[w][5] == '\'' && strlen(words[w]) == 6)
726 {
727 if (w != wordc - 1)
728 aerr("TODO\n");
729
730 p = words[w];
731 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
732 fprintf(fout, ".long 0x%lx", val);
733 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
734 goto fin;
735 }
736
737 if (type >= DXT_DWORD && strchr(words[w], '.'))
738 {
739 if (w != wordc - 1)
740 aerr("TODO\n");
741
742 if (g_arm_mode && type == DXT_TEN) {
743 fprintf(fout, ".fill 10");
744 snprintf(g_comment, sizeof(g_comment), "%s %s",
745 type_name_float(type), words[w]);
746 }
747 else
748 fprintf(fout, "%s %s", type_name_float(type), words[w]);
749 goto fin;
750 }
751
752 first = 1;
753 fprintf(fout, "%s ", type_name(type));
754 for (; w < wordc; w++)
755 {
756 if (!first)
757 fprintf(fout, ", ");
758
759 is_label = is_bss = 0;
760 if (w <= wordc - 2 && IS(words[w], "offset")) {
761 is_label = 1;
762 w++;
763 }
764 else if (IS(words[w], "?")) {
765 is_bss = 1;
766 }
767 else if (type == DXT_DWORD
768 && !('0' <= words[w][0] && words[w][0] <= '9'))
769 {
770 // assume label
771 is_label = 1;
772 }
773
774 if (is_bss) {
775 fprintf(fout, "0");
776 }
777 else if (is_label) {
778 p = words[w];
779 if (IS_START(p, "loc_") || IS_START(p, "__imp")
780 || strchr(p, '?') || strchr(p, '@')
781 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
782 cmpstringp))
783 {
784 fprintf(fout, "0");
785 snprintf(g_comment, sizeof(g_comment), "%s", p);
786 }
787 else {
788 pp = check_var(fhdr, sym, p);
789 if (pp == NULL) {
790 fprintf(fout, "%s%s",
791 (no_decorations || p[0] == '_') ? "" : "_", p);
792 }
793 else {
794 if (no_decorations)
795 fprintf(fout, "%s", pp->name);
796 else
797 output_decorated_pp(fout, pp);
798 }
799 }
800 }
801 else {
802 val64 = parse_number(words[w], 1);
803 if (val64 < 10)
804 fprintf(fout, "%d", (int)val64);
805 else
806 fprintf(fout, "0x%" PRIx64, val64);
807 }
808
809 first = 0;
810 }
811
812fin:
813 if (g_comment[0] != 0) {
814 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
815 g_comment[0] = 0;
816 }
817 fprintf(fout, "\n");
818 }
819 }
820
821 fprintf(fout, "\n");
822
823 // dump public syms
824 for (i = 0; i < pub_sym_cnt; i++)
825 fprintf(fout, ".global %s%s\n",
826 no_decorations ? "" : "_", pub_syms[i]);
827
828 fclose(fout);
829 fclose(fasm);
830 if (fhdr != NULL)
831 fclose(fhdr);
832
833 return 0;
834}
835
836// vim:ts=2:shiftwidth=2:expandtab