api for fog
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <stdint.h>
14#include <inttypes.h>
15
16#include "my_assert.h"
17#include "my_str.h"
18#include "common.h"
19
20#include "protoparse.h"
21
22static const char *asmfn;
23static int asmln;
24
25static const struct parsed_proto *g_func_sym_pp;
26static char g_comment[256];
27static int g_warn_cnt;
28static int g_cconv_novalidate;
29static int g_arm_mode;
30
31// note: must be in ascending order
32enum dx_type {
33 DXT_UNSPEC,
34 DXT_BYTE,
35 DXT_WORD,
36 DXT_DWORD,
37 DXT_QUAD,
38 DXT_TEN,
39};
40
41#define anote(fmt, ...) \
42 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
43#define awarn(fmt, ...) do { \
44 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 if (++g_warn_cnt == 10) { \
46 fcloseall(); \
47 exit(1); \
48 } \
49} while (0)
50#define aerr(fmt, ...) do { \
51 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
52 fcloseall(); \
53 exit(1); \
54} while (0)
55
56#include "masm_tools.h"
57
58static char *next_word_s(char *w, size_t wsize, char *s)
59{
60 int quote = 0;
61 size_t i;
62
63 s = sskip(s);
64
65 for (i = 0; i < wsize - 1; i++) {
66 if (s[i] == '\'')
67 quote ^= 1;
68 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
69 break;
70 w[i] = s[i];
71 }
72 w[i] = 0;
73
74 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
75 printf("warning: '%s' truncated\n", w);
76
77 return s + i;
78}
79
80static void next_section(FILE *fasm, char *name)
81{
82 char words[2][256];
83 char line[256];
84 int wordc;
85 char *p;
86
87 name[0] = 0;
88
89 while (my_fgets(line, sizeof(line), fasm))
90 {
91 wordc = 0;
92 asmln++;
93
94 p = sskip(line);
95 if (*p == 0)
96 continue;
97
98 if (*p == ';')
99 continue;
100
101 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
102 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
103 if (*p == 0 || *p == ';') {
104 wordc++;
105 break;
106 }
107 }
108
109 if (wordc < 2)
110 continue;
111
112 if (!IS(words[1], "segment"))
113 continue;
114
115 strcpy(name, words[0]);
116 break;
117 }
118}
119
120static enum dx_type parse_dx_directive(const char *name)
121{
122 if (IS(name, "dd"))
123 return DXT_DWORD;
124 if (IS(name, "dw"))
125 return DXT_WORD;
126 if (IS(name, "db"))
127 return DXT_BYTE;
128 if (IS(name, "dq"))
129 return DXT_QUAD;
130 if (IS(name, "dt"))
131 return DXT_TEN;
132
133 return DXT_UNSPEC;
134}
135
136static const char *type_name(enum dx_type type)
137{
138 switch (type) {
139 case DXT_BYTE:
140 return ".byte";
141 case DXT_WORD:
142 return ".hword";
143 case DXT_DWORD:
144 return ".long";
145 case DXT_QUAD:
146 return ".quad";
147 case DXT_TEN:
148 return ".tfloat";
149 case DXT_UNSPEC:
150 break;
151 }
152 return "<bad>";
153}
154
155static const char *type_name_float(enum dx_type type)
156{
157 switch (type) {
158 case DXT_DWORD:
159 return ".float";
160 case DXT_QUAD:
161 return ".double";
162 case DXT_TEN:
163 return ".tfloat";
164 default:
165 break;
166 }
167 return "<bad_float>";
168}
169
170static int type_size(enum dx_type type)
171{
172 switch (type) {
173 case DXT_BYTE:
174 return 1;
175 case DXT_WORD:
176 return 2;
177 case DXT_DWORD:
178 return 4;
179 case DXT_QUAD:
180 return 8;
181 case DXT_TEN:
182 return 10;
183 case DXT_UNSPEC:
184 break;
185 }
186 return -1;
187}
188
189static char *escape_string(char *s)
190{
191 char buf[256];
192 char *t = buf;
193
194 for (; *s != 0; s++) {
195 if (*s == '"') {
196 strcpy(t, "\\\"");
197 t += strlen(t);
198 continue;
199 }
200 if (*s == '\\') {
201 strcpy(t, "\\\\");
202 t += strlen(t);
203 continue;
204 }
205 *t++ = *s;
206 }
207 *t++ = *s;
208 if (t - buf > sizeof(buf))
209 aerr("string is too long\n");
210 return strcpy(s, buf);
211}
212
213static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
214 size_t buf_size)
215{
216 char *p = buf;
217 size_t l;
218 int i;
219
220 if (pp->ret_type.is_ptr)
221 *p++ = 'p';
222 else if (IS(pp->ret_type.name, "void"))
223 *p++ = 'v';
224 else
225 *p++ = 'i';
226 *p++ = '(';
227 l = 2;
228
229 for (i = 0; i < pp->argc; i++) {
230 if (pp->arg[i].reg != NULL)
231 snprintf(buf + l, buf_size - l, "%s%s",
232 i == 0 ? "" : ",", pp->arg[i].reg);
233 else
234 snprintf(buf + l, buf_size - l, "%sa%d",
235 i == 0 ? "" : ",", i + 1);
236 l = strlen(buf);
237 }
238 snprintf(buf + l, buf_size - l, ")");
239}
240
241static const struct parsed_proto *check_var(FILE *fhdr,
242 const char *sym, const char *varname)
243{
244 const struct parsed_proto *pp, *pp_sym;
245 char fp_sym[256], fp_var[256], *p;
246 int i;
247
248 pp = proto_parse(fhdr, varname, 1);
249 if (pp == NULL) {
250 if (IS_START(varname, "sub_"))
251 awarn("sub_ sym missing proto: '%s'\n", varname);
252 return NULL;
253 }
254
255 if (!pp->is_func && !pp->is_fptr)
256 return NULL;
257
258 pp_print(fp_var, sizeof(fp_var), pp);
259
260 if (pp->argc_reg == 0)
261 goto check_sym;
262 if (pp->argc_reg == 1 && pp->argc_stack == 0
263 && IS(pp->arg[0].reg, "ecx"))
264 {
265 goto check_sym;
266 }
267 if (!g_cconv_novalidate
268 && (pp->argc_reg != 2
269 || !IS(pp->arg[0].reg, "ecx")
270 || !IS(pp->arg[1].reg, "edx")))
271 {
272 awarn("unhandled reg call: %s\n", fp_var);
273 }
274
275check_sym:
276 // fptrs must use 32bit args, callsite might have no information and
277 // lack a cast to smaller types, which results in incorrectly masked
278 // args passed (callee may assume masked args, it does on ARM)
279 for (i = 0; i < pp->argc; i++) {
280 if (pp->arg[i].type.is_ptr)
281 continue;
282 p = pp->arg[i].type.name;
283 if (strstr(p, "int8") || strstr(p, "int16")
284 || strstr(p, "char") || strstr(p, "short"))
285 {
286 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
287 }
288 }
289
290 sprint_pp_short(pp, g_comment, sizeof(g_comment));
291
292 if (sym != NULL) {
293 g_func_sym_pp = NULL;
294 pp_sym = proto_parse(fhdr, sym, 1);
295 if (pp_sym == NULL)
296 return pp;
297 if (!pp_sym->is_fptr)
298 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
299 g_func_sym_pp = pp_sym;
300 }
301 else {
302 pp_sym = g_func_sym_pp;
303 if (pp_sym == NULL)
304 return pp;
305 }
306
307 if (pp_cmp_func(pp, pp_sym)) {
308 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
309 anote("var: %s\n", fp_var);
310 anote("sym: %s\n", fp_sym);
311 awarn("^ mismatch\n");
312 }
313
314 return pp;
315}
316
317static void output_decorated_pp(FILE *fout,
318 const struct parsed_proto *pp)
319{
320 if (pp->name[0] != '_')
321 fprintf(fout, pp->is_fastcall ? "@" : "_");
322 fprintf(fout, "%s", pp->name);
323 if (pp->is_stdcall && pp->argc > 0)
324 fprintf(fout, "@%d", pp->argc * 4);
325}
326
327static int align_value(int src_val)
328{
329 if (src_val <= 0) {
330 awarn("bad align: %d\n", src_val);
331 src_val = 1;
332 }
333 if (!g_arm_mode)
334 return src_val;
335
336 return __builtin_ffs(src_val) - 1;
337}
338
339static int cmpstringp(const void *p1, const void *p2)
340{
341 return strcmp(*(char * const *)p1, *(char * const *)p2);
342}
343
344/* XXX: maybe move to external file? */
345static const char *unwanted_syms[] = {
346 "aRuntimeError",
347 "aTlossError",
348 "aSingError",
349 "aDomainError",
350 "aR6029ThisAppli",
351 "aR6028UnableToI",
352 "aR6027NotEnough",
353 "aR6026NotEnough",
354 "aR6025PureVirtu",
355 "aR6024NotEnough",
356 "aR6019UnableToO",
357 "aR6018Unexpecte",
358 "aR6017Unexpecte",
359 "aR6016NotEnough",
360 "aAbnormalProgra",
361 "aR6009NotEnough",
362 "aR6008NotEnough",
363 "aR6002FloatingP",
364 "aMicrosoftVisua",
365 "aRuntimeErrorPr",
366 "aThisApplicatio",
367 "aMicrosoftFindF",
368 "aMicrosoftOffic",
369};
370
371static int is_unwanted_sym(const char *sym)
372{
373 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
374 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
375}
376
377int main(int argc, char *argv[])
378{
379 FILE *fout, *fasm, *fhdr = NULL, *frlist;
380 const struct parsed_proto *pp;
381 int no_decorations = 0;
382 char comment_char = '#';
383 char words[20][256];
384 char word[256];
385 char line[256];
386 char last_sym[32];
387 unsigned long val;
388 unsigned long cnt;
389 uint64_t val64;
390 const char *sym;
391 enum dx_type type;
392 char **pub_syms;
393 int pub_sym_cnt = 0;
394 int pub_sym_alloc;
395 char **rlist;
396 int rlist_cnt = 0;
397 int rlist_alloc;
398 int header_mode = 0;
399 int is_ro = 0;
400 int is_label;
401 int is_bss;
402 int wordc;
403 int first;
404 int arg_out;
405 int arg = 1;
406 int len;
407 int w, i;
408 char *p;
409 char *p2;
410
411 if (argc < 4) {
412 // -nd: no symbol decorations
413 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n"
414 "%s -hdr <.h> <.asm>\n",
415 argv[0], argv[0]);
416 return 1;
417 }
418
419 for (arg = 1; arg < argc; arg++) {
420 if (IS(argv[arg], "-nd"))
421 no_decorations = 1;
422 else if (IS(argv[arg], "-i"))
423 g_cconv_novalidate = 1;
424 else if (IS(argv[arg], "-a")) {
425 comment_char = '@';
426 g_arm_mode = 1;
427 }
428 else if (IS(argv[arg], "-hdr"))
429 header_mode = 1;
430 else
431 break;
432 }
433
434 arg_out = arg++;
435
436 asmfn = argv[arg++];
437 fasm = fopen(asmfn, "r");
438 my_assert_not(fasm, NULL);
439
440 if (!header_mode) {
441 hdrfn = argv[arg++];
442 fhdr = fopen(hdrfn, "r");
443 my_assert_not(fhdr, NULL);
444 }
445
446 fout = fopen(argv[arg_out], "w");
447 my_assert_not(fout, NULL);
448
449 pub_sym_alloc = 64;
450 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
451 my_assert_not(pub_syms, NULL);
452
453 rlist_alloc = 64;
454 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
455 my_assert_not(rlist, NULL);
456
457 for (; arg < argc; arg++) {
458 frlist = fopen(argv[arg], "r");
459 my_assert_not(frlist, NULL);
460
461 while (my_fgets(line, sizeof(line), frlist)) {
462 p = sskip(line);
463 if (*p == 0 || *p == ';')
464 continue;
465
466 p = next_word(words[0], sizeof(words[0]), p);
467 if (words[0][0] == 0)
468 continue;
469
470 if (rlist_cnt >= rlist_alloc) {
471 rlist_alloc = rlist_alloc * 2 + 64;
472 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
473 my_assert_not(rlist, NULL);
474 }
475 rlist[rlist_cnt++] = strdup(words[0]);
476 }
477
478 fclose(frlist);
479 frlist = NULL;
480 }
481
482 if (rlist_cnt > 0)
483 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
484
485 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
486 sizeof(unwanted_syms[0]), cmpstringp);
487
488 last_sym[0] = 0;
489
490 while (1) {
491 next_section(fasm, line);
492 if (feof(fasm))
493 break;
494 if (IS(line + 1, "text"))
495 continue;
496
497 if (IS(line + 1, "rdata")) {
498 is_ro = 1;
499 if (!header_mode)
500 fprintf(fout, "\n.section .rodata\n");
501 }
502 else if (IS(line + 1, "data")) {
503 is_ro = 0;
504 if (!header_mode)
505 fprintf(fout, "\n.data\n");
506 }
507 else
508 aerr("unhandled section: '%s'\n", line);
509
510 if (!header_mode)
511 fprintf(fout, ".align %d\n", align_value(4));
512
513 while (my_fgets(line, sizeof(line), fasm))
514 {
515 sym = NULL;
516 asmln++;
517
518 p = sskip(line);
519 if (*p == 0)
520 continue;
521
522 if (*p == ';') {
523 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
524 // ;org is only seen at section start, so assume . addr 0
525 i &= 0xfff;
526 if (i != 0 && !header_mode)
527 fprintf(fout, "\t\t .skip 0x%x\n", i);
528 }
529 continue;
530 }
531
532 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
533 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
534 if (*p == 0 || *p == ';') {
535 wordc++;
536 break;
537 }
538 if (*p == ',') {
539 p = sskip(p + 1);
540 }
541 }
542
543 if (*p == ';') {
544 p = sskip(p + 1);
545 if (IS_START(p, "sctclrtype"))
546 g_func_sym_pp = NULL;
547 }
548
549 if (wordc == 2 && IS(words[1], "ends"))
550 break;
551 if (wordc <= 2 && IS(words[0], "end"))
552 break;
553 if (wordc < 2)
554 aerr("unhandled: '%s'\n", words[0]);
555
556 // don't cares
557 if (IS(words[0], "assume"))
558 continue;
559
560 if (IS(words[0], "align")) {
561 if (header_mode)
562 continue;
563
564 val = parse_number(words[1], 0);
565 fprintf(fout, "\t\t .align %d", align_value(val));
566 goto fin;
567 }
568
569 if (IS(words[0], "public")) {
570 // skip, sym should appear in header anyway
571 continue;
572 }
573
574 w = 1;
575 type = parse_dx_directive(words[0]);
576 if (type == DXT_UNSPEC) {
577 type = parse_dx_directive(words[1]);
578 sym = words[0];
579 w = 2;
580 }
581 if (type == DXT_UNSPEC)
582 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
583
584 if (sym != NULL)
585 {
586 if (header_mode) {
587 int is_str = 0;
588
589 fprintf(fout, "extern ");
590 if (is_ro)
591 fprintf(fout, "const ");
592
593 switch (type) {
594 case DXT_BYTE:
595 for (i = w; i < wordc; i++)
596 if (words[i][0] == '\'')
597 is_str = 1;
598 if (is_str)
599 fprintf(fout, "char %s[];\n", sym);
600 else
601 fprintf(fout, "uint8_t %s;\n", sym);
602 break;
603
604 case DXT_WORD:
605 fprintf(fout, "uint16_t %s;\n", sym);
606 break;
607
608 case DXT_DWORD:
609 fprintf(fout, "uint32_t %s;\n", sym);
610 break;
611
612 default:
613 fprintf(fout, "_UNKNOWN %s;\n", sym);
614 break;
615 }
616
617 continue;
618 }
619
620 snprintf(last_sym, sizeof(last_sym), "%s", sym);
621
622 pp = proto_parse(fhdr, sym, 1);
623 if (pp != NULL) {
624 g_func_sym_pp = NULL;
625
626 // public/global name
627 if (pub_sym_cnt >= pub_sym_alloc) {
628 pub_sym_alloc *= 2;
629 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
630 my_assert_not(pub_syms, NULL);
631 }
632 pub_syms[pub_sym_cnt++] = strdup(sym);
633 }
634
635 len = strlen(sym);
636 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
637
638 len += 2;
639 if (len < 8)
640 fprintf(fout, "\t");
641 if (len < 16)
642 fprintf(fout, "\t");
643 if (len <= 16)
644 fprintf(fout, " ");
645 else
646 fprintf(fout, " ");
647 }
648 else {
649 if (header_mode)
650 continue;
651
652 fprintf(fout, "\t\t ");
653 }
654
655 // fill out some unwanted strings with zeroes..
656 if (type == DXT_BYTE && words[w][0] == '\''
657 && is_unwanted_sym(last_sym))
658 {
659 len = 0;
660 for (; w < wordc; w++) {
661 if (words[w][0] == '\'') {
662 p = words[w] + 1;
663 for (; *p && *p != '\''; p++)
664 len++;
665 }
666 else {
667 // assume encoded byte
668 len++;
669 }
670 }
671 fprintf(fout, ".skip %d", len);
672 goto fin;
673 }
674 else if (type == DXT_BYTE
675 && (words[w][0] == '\''
676 || (w + 1 < wordc && words[w + 1][0] == '\'')))
677 {
678 // string; use asciz for most common case
679 if (w == wordc - 2 && IS(words[w + 1], "0")) {
680 fprintf(fout, ".asciz \"");
681 wordc--;
682 }
683 else
684 fprintf(fout, ".ascii \"");
685
686 for (; w < wordc; w++) {
687 if (words[w][0] == '\'') {
688 p = words[w] + 1;
689 p2 = strchr(p, '\'');
690 if (p2 == NULL)
691 aerr("unterminated string? '%s'\n", p);
692 memcpy(word, p, p2 - p);
693 word[p2 - p] = 0;
694 fprintf(fout, "%s", escape_string(word));
695 }
696 else {
697 val = parse_number(words[w], 0);
698 if (val & ~0xff)
699 aerr("bad string trailing byte?\n");
700 // unfortunately \xHH is unusable - gas interprets
701 // things like \x27b as 0x7b, so have to use octal here
702 fprintf(fout, "\\%03lo", val);
703 }
704 }
705 fprintf(fout, "\"");
706 goto fin;
707 }
708
709 if (w == wordc - 2) {
710 if (IS_START(words[w + 1], "dup(")) {
711 cnt = parse_number(words[w], 0);
712 p = words[w + 1] + 4;
713 p2 = strchr(p, ')');
714 if (p2 == NULL)
715 aerr("bad dup?\n");
716 memmove(word, p, p2 - p);
717 word[p2 - p] = 0;
718
719 val = 0;
720 if (!IS(word, "?"))
721 val = parse_number(word, 0);
722
723 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
724 cnt, type_size(type), val);
725 goto fin;
726 }
727 }
728
729 if (type == DXT_DWORD && words[w][0] == '\''
730 && words[w][5] == '\'' && strlen(words[w]) == 6)
731 {
732 if (w != wordc - 1)
733 aerr("TODO\n");
734
735 p = words[w];
736 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
737 fprintf(fout, ".long 0x%lx", val);
738 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
739 goto fin;
740 }
741
742 if (type >= DXT_DWORD && strchr(words[w], '.'))
743 {
744 if (w != wordc - 1)
745 aerr("TODO\n");
746
747 if (g_arm_mode && type == DXT_TEN) {
748 fprintf(fout, ".fill 10");
749 snprintf(g_comment, sizeof(g_comment), "%s %s",
750 type_name_float(type), words[w]);
751 }
752 else
753 fprintf(fout, "%s %s", type_name_float(type), words[w]);
754 goto fin;
755 }
756
757 first = 1;
758 fprintf(fout, "%s ", type_name(type));
759 for (; w < wordc; w++)
760 {
761 if (!first)
762 fprintf(fout, ", ");
763
764 is_label = is_bss = 0;
765 if (w <= wordc - 2 && IS(words[w], "offset")) {
766 is_label = 1;
767 w++;
768 }
769 else if (IS(words[w], "?")) {
770 is_bss = 1;
771 }
772 else if (type == DXT_DWORD
773 && !('0' <= words[w][0] && words[w][0] <= '9'))
774 {
775 // assume label
776 is_label = 1;
777 }
778
779 if (is_bss) {
780 fprintf(fout, "0");
781 }
782 else if (is_label) {
783 p = words[w];
784 if (IS_START(p, "loc_") || IS_START(p, "__imp")
785 || strchr(p, '?') || strchr(p, '@')
786 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
787 cmpstringp))
788 {
789 fprintf(fout, "0");
790 snprintf(g_comment, sizeof(g_comment), "%s", p);
791 }
792 else {
793 pp = check_var(fhdr, sym, p);
794 if (pp == NULL) {
795 fprintf(fout, "%s%s",
796 (no_decorations || p[0] == '_') ? "" : "_", p);
797 }
798 else {
799 if (no_decorations)
800 fprintf(fout, "%s", pp->name);
801 else
802 output_decorated_pp(fout, pp);
803 }
804 }
805 }
806 else {
807 val64 = parse_number(words[w], 1);
808 if (val64 < 10)
809 fprintf(fout, "%d", (int)val64);
810 else
811 fprintf(fout, "0x%" PRIx64, val64);
812 }
813
814 first = 0;
815 }
816
817fin:
818 if (g_comment[0] != 0) {
819 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
820 g_comment[0] = 0;
821 }
822 fprintf(fout, "\n");
823 }
824 }
825
826 fprintf(fout, "\n");
827
828 // dump public syms
829 for (i = 0; i < pub_sym_cnt; i++)
830 fprintf(fout, ".global %s%s\n",
831 no_decorations ? "" : "_", pub_syms[i]);
832
833 fclose(fout);
834 fclose(fasm);
835 if (fhdr != NULL)
836 fclose(fhdr);
837
838 return 0;
839}
840
841// vim:ts=2:shiftwidth=2:expandtab