more 32bit fixes
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13#include <stdint.h>
14#include <inttypes.h>
15
16#include "my_assert.h"
17#include "my_str.h"
18#include "common.h"
19
20#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
21#define IS(w, y) !strcmp(w, y)
22#define IS_START(w, y) !strncmp(w, y, strlen(y))
23
24#include "protoparse.h"
25
26static const char *asmfn;
27static int asmln;
28
29static const struct parsed_proto *g_func_sym_pp;
30static char g_comment[256];
31static int g_warn_cnt;
32static int g_cconv_novalidate;
33static int g_arm_mode;
34
35// note: must be in ascending order
36enum dx_type {
37 DXT_UNSPEC,
38 DXT_BYTE,
39 DXT_WORD,
40 DXT_DWORD,
41 DXT_QUAD,
42 DXT_TEN,
43};
44
45#define anote(fmt, ...) \
46 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
47#define awarn(fmt, ...) do { \
48 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
49 if (++g_warn_cnt == 10) { \
50 fcloseall(); \
51 exit(1); \
52 } \
53} while (0)
54#define aerr(fmt, ...) do { \
55 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
56 fcloseall(); \
57 exit(1); \
58} while (0)
59
60#include "masm_tools.h"
61
62static char *next_word_s(char *w, size_t wsize, char *s)
63{
64 int quote = 0;
65 size_t i;
66
67 s = sskip(s);
68
69 for (i = 0; i < wsize - 1; i++) {
70 if (s[i] == '\'')
71 quote ^= 1;
72 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
73 break;
74 w[i] = s[i];
75 }
76 w[i] = 0;
77
78 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
79 printf("warning: '%s' truncated\n", w);
80
81 return s + i;
82}
83
84static void next_section(FILE *fasm, char *name)
85{
86 char words[2][256];
87 char line[256];
88 int wordc;
89 char *p;
90
91 name[0] = 0;
92
93 while (my_fgets(line, sizeof(line), fasm))
94 {
95 wordc = 0;
96 asmln++;
97
98 p = sskip(line);
99 if (*p == 0)
100 continue;
101
102 if (*p == ';')
103 continue;
104
105 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
106 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
107 if (*p == 0 || *p == ';') {
108 wordc++;
109 break;
110 }
111 }
112
113 if (wordc < 2)
114 continue;
115
116 if (!IS(words[1], "segment"))
117 continue;
118
119 strcpy(name, words[0]);
120 break;
121 }
122}
123
124static enum dx_type parse_dx_directive(const char *name)
125{
126 if (IS(name, "dd"))
127 return DXT_DWORD;
128 if (IS(name, "dw"))
129 return DXT_WORD;
130 if (IS(name, "db"))
131 return DXT_BYTE;
132 if (IS(name, "dq"))
133 return DXT_QUAD;
134 if (IS(name, "dt"))
135 return DXT_TEN;
136
137 return DXT_UNSPEC;
138}
139
140static const char *type_name(enum dx_type type)
141{
142 switch (type) {
143 case DXT_BYTE:
144 return ".byte";
145 case DXT_WORD:
146 return ".hword";
147 case DXT_DWORD:
148 return ".long";
149 case DXT_QUAD:
150 return ".quad";
151 case DXT_TEN:
152 return ".tfloat";
153 case DXT_UNSPEC:
154 break;
155 }
156 return "<bad>";
157}
158
159static const char *type_name_float(enum dx_type type)
160{
161 switch (type) {
162 case DXT_DWORD:
163 return ".float";
164 case DXT_QUAD:
165 return ".double";
166 case DXT_TEN:
167 return ".tfloat";
168 default:
169 break;
170 }
171 return "<bad_float>";
172}
173
174static int type_size(enum dx_type type)
175{
176 switch (type) {
177 case DXT_BYTE:
178 return 1;
179 case DXT_WORD:
180 return 2;
181 case DXT_DWORD:
182 return 4;
183 case DXT_QUAD:
184 return 8;
185 case DXT_TEN:
186 return 10;
187 case DXT_UNSPEC:
188 break;
189 }
190 return -1;
191}
192
193static char *escape_string(char *s)
194{
195 char buf[256];
196 char *t = buf;
197
198 for (; *s != 0; s++) {
199 if (*s == '"') {
200 strcpy(t, "\\\"");
201 t += strlen(t);
202 continue;
203 }
204 if (*s == '\\') {
205 strcpy(t, "\\\\");
206 t += strlen(t);
207 continue;
208 }
209 *t++ = *s;
210 }
211 *t++ = *s;
212 if (t - buf > sizeof(buf))
213 aerr("string is too long\n");
214 return strcpy(s, buf);
215}
216
217static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
218 size_t buf_size)
219{
220 char *p = buf;
221 size_t l;
222 int i;
223
224 if (pp->ret_type.is_ptr)
225 *p++ = 'p';
226 else if (IS(pp->ret_type.name, "void"))
227 *p++ = 'v';
228 else
229 *p++ = 'i';
230 *p++ = '(';
231 l = 2;
232
233 for (i = 0; i < pp->argc; i++) {
234 if (pp->arg[i].reg != NULL)
235 snprintf(buf + l, buf_size - l, "%s%s",
236 i == 0 ? "" : ",", pp->arg[i].reg);
237 else
238 snprintf(buf + l, buf_size - l, "%sa%d",
239 i == 0 ? "" : ",", i + 1);
240 l = strlen(buf);
241 }
242 snprintf(buf + l, buf_size - l, ")");
243}
244
245static const struct parsed_proto *check_var(FILE *fhdr,
246 const char *sym, const char *varname)
247{
248 const struct parsed_proto *pp, *pp_sym;
249 char fp_sym[256], fp_var[256], *p;
250 int i;
251
252 pp = proto_parse(fhdr, varname, 1);
253 if (pp == NULL) {
254 if (IS_START(varname, "sub_"))
255 awarn("sub_ sym missing proto: '%s'\n", varname);
256 return NULL;
257 }
258
259 if (!pp->is_func && !pp->is_fptr)
260 return NULL;
261
262 pp_print(fp_var, sizeof(fp_var), pp);
263
264 if (pp->argc_reg == 0)
265 goto check_sym;
266 if (pp->argc_reg == 1 && pp->argc_stack == 0
267 && IS(pp->arg[0].reg, "ecx"))
268 {
269 goto check_sym;
270 }
271 if (!g_cconv_novalidate
272 && (pp->argc_reg != 2
273 || !IS(pp->arg[0].reg, "ecx")
274 || !IS(pp->arg[1].reg, "edx")))
275 {
276 awarn("unhandled reg call: %s\n", fp_var);
277 }
278
279check_sym:
280 // fptrs must use 32bit args, callsite might have no information and
281 // lack a cast to smaller types, which results in incorrectly masked
282 // args passed (callee may assume masked args, it does on ARM)
283 for (i = 0; i < pp->argc; i++) {
284 if (pp->arg[i].type.is_ptr)
285 continue;
286 p = pp->arg[i].type.name;
287 if (strstr(p, "int8") || strstr(p, "int16")
288 || strstr(p, "char") || strstr(p, "short"))
289 {
290 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
291 }
292 }
293
294 sprint_pp_short(pp, g_comment, sizeof(g_comment));
295
296 if (sym != NULL) {
297 g_func_sym_pp = NULL;
298 pp_sym = proto_parse(fhdr, sym, 1);
299 if (pp_sym == NULL)
300 return pp;
301 if (!pp_sym->is_fptr)
302 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
303 g_func_sym_pp = pp_sym;
304 }
305 else {
306 pp_sym = g_func_sym_pp;
307 if (pp_sym == NULL)
308 return pp;
309 }
310
311 if (pp_cmp_func(pp, pp_sym)) {
312 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
313 anote("var: %s\n", fp_var);
314 anote("sym: %s\n", fp_sym);
315 awarn("^ mismatch\n");
316 }
317
318 return pp;
319}
320
321static void output_decorated_pp(FILE *fout,
322 const struct parsed_proto *pp)
323{
324 if (pp->name[0] != '_')
325 fprintf(fout, pp->is_fastcall ? "@" : "_");
326 fprintf(fout, "%s", pp->name);
327 if (pp->is_stdcall && pp->argc > 0)
328 fprintf(fout, "@%d", pp->argc * 4);
329}
330
331static int align_value(int src_val)
332{
333 if (src_val <= 0) {
334 awarn("bad align: %d\n", src_val);
335 src_val = 1;
336 }
337 if (!g_arm_mode)
338 return src_val;
339
340 return __builtin_ffs(src_val) - 1;
341}
342
343static int cmpstringp(const void *p1, const void *p2)
344{
345 return strcmp(*(char * const *)p1, *(char * const *)p2);
346}
347
348/* XXX: maybe move to external file? */
349static const char *unwanted_syms[] = {
350 "aRuntimeError",
351 "aTlossError",
352 "aSingError",
353 "aDomainError",
354 "aR6029ThisAppli",
355 "aR6028UnableToI",
356 "aR6027NotEnough",
357 "aR6026NotEnough",
358 "aR6025PureVirtu",
359 "aR6024NotEnough",
360 "aR6019UnableToO",
361 "aR6018Unexpecte",
362 "aR6017Unexpecte",
363 "aR6016NotEnough",
364 "aAbnormalProgra",
365 "aR6009NotEnough",
366 "aR6008NotEnough",
367 "aR6002FloatingP",
368 "aMicrosoftVisua",
369 "aRuntimeErrorPr",
370 "aThisApplicatio",
371 "aMicrosoftFindF",
372 "aMicrosoftOffic",
373};
374
375static int is_unwanted_sym(const char *sym)
376{
377 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
378 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
379}
380
381int main(int argc, char *argv[])
382{
383 FILE *fout, *fasm, *fhdr = NULL, *frlist;
384 const struct parsed_proto *pp;
385 int no_decorations = 0;
386 char comment_char = '#';
387 char words[20][256];
388 char word[256];
389 char line[256];
390 char last_sym[32];
391 unsigned long val;
392 unsigned long cnt;
393 uint64_t val64;
394 const char *sym;
395 enum dx_type type;
396 char **pub_syms;
397 int pub_sym_cnt = 0;
398 int pub_sym_alloc;
399 char **rlist;
400 int rlist_cnt = 0;
401 int rlist_alloc;
402 int header_mode = 0;
403 int is_ro = 0;
404 int is_label;
405 int is_bss;
406 int wordc;
407 int first;
408 int arg_out;
409 int arg = 1;
410 int len;
411 int w, i;
412 char *p;
413 char *p2;
414
415 if (argc < 4) {
416 // -nd: no symbol decorations
417 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n"
418 "%s -hdr <.h> <.asm>\n",
419 argv[0], argv[0]);
420 return 1;
421 }
422
423 for (arg = 1; arg < argc; arg++) {
424 if (IS(argv[arg], "-nd"))
425 no_decorations = 1;
426 else if (IS(argv[arg], "-i"))
427 g_cconv_novalidate = 1;
428 else if (IS(argv[arg], "-a")) {
429 comment_char = '@';
430 g_arm_mode = 1;
431 }
432 else if (IS(argv[arg], "-hdr"))
433 header_mode = 1;
434 else
435 break;
436 }
437
438 arg_out = arg++;
439
440 asmfn = argv[arg++];
441 fasm = fopen(asmfn, "r");
442 my_assert_not(fasm, NULL);
443
444 if (!header_mode) {
445 hdrfn = argv[arg++];
446 fhdr = fopen(hdrfn, "r");
447 my_assert_not(fhdr, NULL);
448 }
449
450 fout = fopen(argv[arg_out], "w");
451 my_assert_not(fout, NULL);
452
453 pub_sym_alloc = 64;
454 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
455 my_assert_not(pub_syms, NULL);
456
457 rlist_alloc = 64;
458 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
459 my_assert_not(rlist, NULL);
460
461 for (; arg < argc; arg++) {
462 frlist = fopen(argv[arg], "r");
463 my_assert_not(frlist, NULL);
464
465 while (my_fgets(line, sizeof(line), frlist)) {
466 p = sskip(line);
467 if (*p == 0 || *p == ';')
468 continue;
469
470 p = next_word(words[0], sizeof(words[0]), p);
471 if (words[0][0] == 0)
472 continue;
473
474 if (rlist_cnt >= rlist_alloc) {
475 rlist_alloc = rlist_alloc * 2 + 64;
476 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
477 my_assert_not(rlist, NULL);
478 }
479 rlist[rlist_cnt++] = strdup(words[0]);
480 }
481
482 fclose(frlist);
483 frlist = NULL;
484 }
485
486 if (rlist_cnt > 0)
487 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
488
489 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
490 sizeof(unwanted_syms[0]), cmpstringp);
491
492 last_sym[0] = 0;
493
494 while (1) {
495 next_section(fasm, line);
496 if (feof(fasm))
497 break;
498 if (IS(line + 1, "text"))
499 continue;
500
501 if (IS(line + 1, "rdata")) {
502 is_ro = 1;
503 if (!header_mode)
504 fprintf(fout, "\n.section .rodata\n");
505 }
506 else if (IS(line + 1, "data")) {
507 is_ro = 0;
508 if (!header_mode)
509 fprintf(fout, "\n.data\n");
510 }
511 else
512 aerr("unhandled section: '%s'\n", line);
513
514 if (!header_mode)
515 fprintf(fout, ".align %d\n", align_value(4));
516
517 while (my_fgets(line, sizeof(line), fasm))
518 {
519 sym = NULL;
520 asmln++;
521
522 p = sskip(line);
523 if (*p == 0)
524 continue;
525
526 if (*p == ';') {
527 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
528 // ;org is only seen at section start, so assume . addr 0
529 i &= 0xfff;
530 if (i != 0 && !header_mode)
531 fprintf(fout, "\t\t .skip 0x%x\n", i);
532 }
533 continue;
534 }
535
536 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
537 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
538 if (*p == 0 || *p == ';') {
539 wordc++;
540 break;
541 }
542 if (*p == ',') {
543 p = sskip(p + 1);
544 }
545 }
546
547 if (*p == ';') {
548 p = sskip(p + 1);
549 if (IS_START(p, "sctclrtype"))
550 g_func_sym_pp = NULL;
551 }
552
553 if (wordc == 2 && IS(words[1], "ends"))
554 break;
555 if (wordc <= 2 && IS(words[0], "end"))
556 break;
557 if (wordc < 2)
558 aerr("unhandled: '%s'\n", words[0]);
559
560 // don't cares
561 if (IS(words[0], "assume"))
562 continue;
563
564 if (IS(words[0], "align")) {
565 if (header_mode)
566 continue;
567
568 val = parse_number(words[1], 0);
569 fprintf(fout, "\t\t .align %d", align_value(val));
570 goto fin;
571 }
572
573 w = 1;
574 type = parse_dx_directive(words[0]);
575 if (type == DXT_UNSPEC) {
576 type = parse_dx_directive(words[1]);
577 sym = words[0];
578 w = 2;
579 }
580 if (type == DXT_UNSPEC)
581 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
582
583 if (sym != NULL)
584 {
585 if (header_mode) {
586 int is_str = 0;
587
588 fprintf(fout, "extern ");
589 if (is_ro)
590 fprintf(fout, "const ");
591
592 switch (type) {
593 case DXT_BYTE:
594 for (i = w; i < wordc; i++)
595 if (words[i][0] == '\'')
596 is_str = 1;
597 if (is_str)
598 fprintf(fout, "char %s[];\n", sym);
599 else
600 fprintf(fout, "uint8_t %s;\n", sym);
601 break;
602
603 case DXT_WORD:
604 fprintf(fout, "uint16_t %s;\n", sym);
605 break;
606
607 case DXT_DWORD:
608 fprintf(fout, "uint32_t %s;\n", sym);
609 break;
610
611 default:
612 fprintf(fout, "_UNKNOWN %s;\n", sym);
613 break;
614 }
615
616 continue;
617 }
618
619 snprintf(last_sym, sizeof(last_sym), "%s", sym);
620
621 pp = proto_parse(fhdr, sym, 1);
622 if (pp != NULL) {
623 g_func_sym_pp = NULL;
624
625 // public/global name
626 if (pub_sym_cnt >= pub_sym_alloc) {
627 pub_sym_alloc *= 2;
628 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
629 my_assert_not(pub_syms, NULL);
630 }
631 pub_syms[pub_sym_cnt++] = strdup(sym);
632 }
633
634 len = strlen(sym);
635 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
636
637 len += 2;
638 if (len < 8)
639 fprintf(fout, "\t");
640 if (len < 16)
641 fprintf(fout, "\t");
642 if (len <= 16)
643 fprintf(fout, " ");
644 else
645 fprintf(fout, " ");
646 }
647 else {
648 if (header_mode)
649 continue;
650
651 fprintf(fout, "\t\t ");
652 }
653
654 // fill out some unwanted strings with zeroes..
655 if (type == DXT_BYTE && words[w][0] == '\''
656 && is_unwanted_sym(last_sym))
657 {
658 len = 0;
659 for (; w < wordc; w++) {
660 if (words[w][0] == '\'') {
661 p = words[w] + 1;
662 for (; *p && *p != '\''; p++)
663 len++;
664 }
665 else {
666 // assume encoded byte
667 len++;
668 }
669 }
670 fprintf(fout, ".skip %d", len);
671 goto fin;
672 }
673 else if (type == DXT_BYTE
674 && (words[w][0] == '\''
675 || (w + 1 < wordc && words[w + 1][0] == '\'')))
676 {
677 // string; use asciz for most common case
678 if (w == wordc - 2 && IS(words[w + 1], "0")) {
679 fprintf(fout, ".asciz \"");
680 wordc--;
681 }
682 else
683 fprintf(fout, ".ascii \"");
684
685 for (; w < wordc; w++) {
686 if (words[w][0] == '\'') {
687 p = words[w] + 1;
688 p2 = strchr(p, '\'');
689 if (p2 == NULL)
690 aerr("unterminated string? '%s'\n", p);
691 memcpy(word, p, p2 - p);
692 word[p2 - p] = 0;
693 fprintf(fout, "%s", escape_string(word));
694 }
695 else {
696 val = parse_number(words[w], 0);
697 if (val & ~0xff)
698 aerr("bad string trailing byte?\n");
699 // unfortunately \xHH is unusable - gas interprets
700 // things like \x27b as 0x7b, so have to use octal here
701 fprintf(fout, "\\%03lo", val);
702 }
703 }
704 fprintf(fout, "\"");
705 goto fin;
706 }
707
708 if (w == wordc - 2) {
709 if (IS_START(words[w + 1], "dup(")) {
710 cnt = parse_number(words[w], 0);
711 p = words[w + 1] + 4;
712 p2 = strchr(p, ')');
713 if (p2 == NULL)
714 aerr("bad dup?\n");
715 memmove(word, p, p2 - p);
716 word[p2 - p] = 0;
717
718 val = 0;
719 if (!IS(word, "?"))
720 val = parse_number(word, 0);
721
722 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
723 cnt, type_size(type), val);
724 goto fin;
725 }
726 }
727
728 if (type == DXT_DWORD && words[w][0] == '\''
729 && words[w][5] == '\'' && strlen(words[w]) == 6)
730 {
731 if (w != wordc - 1)
732 aerr("TODO\n");
733
734 p = words[w];
735 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
736 fprintf(fout, ".long 0x%lx", val);
737 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
738 goto fin;
739 }
740
741 if (type >= DXT_DWORD && strchr(words[w], '.'))
742 {
743 if (w != wordc - 1)
744 aerr("TODO\n");
745
746 if (g_arm_mode && type == DXT_TEN) {
747 fprintf(fout, ".fill 10");
748 snprintf(g_comment, sizeof(g_comment), "%s %s",
749 type_name_float(type), words[w]);
750 }
751 else
752 fprintf(fout, "%s %s", type_name_float(type), words[w]);
753 goto fin;
754 }
755
756 first = 1;
757 fprintf(fout, "%s ", type_name(type));
758 for (; w < wordc; w++)
759 {
760 if (!first)
761 fprintf(fout, ", ");
762
763 is_label = is_bss = 0;
764 if (w <= wordc - 2 && IS(words[w], "offset")) {
765 is_label = 1;
766 w++;
767 }
768 else if (IS(words[w], "?")) {
769 is_bss = 1;
770 }
771 else if (type == DXT_DWORD
772 && !('0' <= words[w][0] && words[w][0] <= '9'))
773 {
774 // assume label
775 is_label = 1;
776 }
777
778 if (is_bss) {
779 fprintf(fout, "0");
780 }
781 else if (is_label) {
782 p = words[w];
783 if (IS_START(p, "loc_") || IS_START(p, "__imp")
784 || strchr(p, '?') || strchr(p, '@')
785 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
786 cmpstringp))
787 {
788 fprintf(fout, "0");
789 snprintf(g_comment, sizeof(g_comment), "%s", p);
790 }
791 else {
792 pp = check_var(fhdr, sym, p);
793 if (pp == NULL) {
794 fprintf(fout, "%s%s",
795 (no_decorations || p[0] == '_') ? "" : "_", p);
796 }
797 else {
798 if (no_decorations)
799 fprintf(fout, "%s", pp->name);
800 else
801 output_decorated_pp(fout, pp);
802 }
803 }
804 }
805 else {
806 val64 = parse_number(words[w], 1);
807 if (val64 < 10)
808 fprintf(fout, "%d", (int)val64);
809 else
810 fprintf(fout, "0x%" PRIx64, val64);
811 }
812
813 first = 0;
814 }
815
816fin:
817 if (g_comment[0] != 0) {
818 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
819 g_comment[0] = 0;
820 }
821 fprintf(fout, "\n");
822 }
823 }
824
825 fprintf(fout, "\n");
826
827 // dump public syms
828 for (i = 0; i < pub_sym_cnt; i++)
829 fprintf(fout, ".global %s%s\n",
830 no_decorations ? "" : "_", pub_syms[i]);
831
832 fclose(fout);
833 fclose(fasm);
834 if (fhdr != NULL)
835 fclose(fhdr);
836
837 return 0;
838}
839
840// vim:ts=2:shiftwidth=2:expandtab