translate: allow negative offsets on indexed stack accesses
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "my_assert.h"
15#include "my_str.h"
16#include "common.h"
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
19#define IS(w, y) !strcmp(w, y)
20#define IS_START(w, y) !strncmp(w, y, strlen(y))
21
22#include "protoparse.h"
23
24static const char *asmfn;
25static int asmln;
26
27static const struct parsed_proto *g_func_sym_pp;
28static char g_comment[256];
29static int g_warn_cnt;
30static int g_cconv_novalidate;
31static int g_arm_mode;
32
33// note: must be in ascending order
34enum dx_type {
35 DXT_UNSPEC,
36 DXT_BYTE,
37 DXT_WORD,
38 DXT_DWORD,
39 DXT_QUAD,
40 DXT_TEN,
41};
42
43#define anote(fmt, ...) \
44 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
45#define awarn(fmt, ...) do { \
46 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
47 if (++g_warn_cnt == 10) { \
48 fcloseall(); \
49 exit(1); \
50 } \
51} while (0)
52#define aerr(fmt, ...) do { \
53 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
54 fcloseall(); \
55 exit(1); \
56} while (0)
57
58#include "masm_tools.h"
59
60static char *next_word_s(char *w, size_t wsize, char *s)
61{
62 int quote = 0;
63 size_t i;
64
65 s = sskip(s);
66
67 for (i = 0; i < wsize - 1; i++) {
68 if (s[i] == '\'')
69 quote ^= 1;
70 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
71 break;
72 w[i] = s[i];
73 }
74 w[i] = 0;
75
76 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
77 printf("warning: '%s' truncated\n", w);
78
79 return s + i;
80}
81
82static void next_section(FILE *fasm, char *name)
83{
84 char words[2][256];
85 char line[256];
86 int wordc;
87 char *p;
88
89 name[0] = 0;
90
91 while (my_fgets(line, sizeof(line), fasm))
92 {
93 wordc = 0;
94 asmln++;
95
96 p = sskip(line);
97 if (*p == 0)
98 continue;
99
100 if (*p == ';')
101 continue;
102
103 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
104 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
105 if (*p == 0 || *p == ';') {
106 wordc++;
107 break;
108 }
109 }
110
111 if (wordc < 2)
112 continue;
113
114 if (!IS(words[1], "segment"))
115 continue;
116
117 strcpy(name, words[0]);
118 break;
119 }
120}
121
122static enum dx_type parse_dx_directive(const char *name)
123{
124 if (IS(name, "dd"))
125 return DXT_DWORD;
126 if (IS(name, "dw"))
127 return DXT_WORD;
128 if (IS(name, "db"))
129 return DXT_BYTE;
130 if (IS(name, "dq"))
131 return DXT_QUAD;
132 if (IS(name, "dt"))
133 return DXT_TEN;
134
135 return DXT_UNSPEC;
136}
137
138static const char *type_name(enum dx_type type)
139{
140 switch (type) {
141 case DXT_BYTE:
142 return ".byte";
143 case DXT_WORD:
144 return ".hword";
145 case DXT_DWORD:
146 return ".long";
147 case DXT_QUAD:
148 return ".quad";
149 case DXT_TEN:
150 return ".tfloat";
151 case DXT_UNSPEC:
152 break;
153 }
154 return "<bad>";
155}
156
157static const char *type_name_float(enum dx_type type)
158{
159 switch (type) {
160 case DXT_DWORD:
161 return ".float";
162 case DXT_QUAD:
163 return ".double";
164 case DXT_TEN:
165 return ".tfloat";
166 default:
167 break;
168 }
169 return "<bad_float>";
170}
171
172static int type_size(enum dx_type type)
173{
174 switch (type) {
175 case DXT_BYTE:
176 return 1;
177 case DXT_WORD:
178 return 2;
179 case DXT_DWORD:
180 return 4;
181 case DXT_QUAD:
182 return 8;
183 case DXT_TEN:
184 return 10;
185 case DXT_UNSPEC:
186 break;
187 }
188 return -1;
189}
190
191static char *escape_string(char *s)
192{
193 char buf[256];
194 char *t = buf;
195
196 for (; *s != 0; s++) {
197 if (*s == '"') {
198 strcpy(t, "\\x22");
199 t += strlen(t);
200 continue;
201 }
202 if (*s == '\\') {
203 strcpy(t, "\\\\");
204 t += strlen(t);
205 continue;
206 }
207 *t++ = *s;
208 }
209 *t = *s;
210 return strcpy(s, buf);
211}
212
213static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
214 size_t buf_size)
215{
216 char *p = buf;
217 size_t l;
218 int i;
219
220 if (pp->ret_type.is_ptr)
221 *p++ = 'p';
222 else if (IS(pp->ret_type.name, "void"))
223 *p++ = 'v';
224 else
225 *p++ = 'i';
226 *p++ = '(';
227 l = 2;
228
229 for (i = 0; i < pp->argc; i++) {
230 if (pp->arg[i].reg != NULL)
231 snprintf(buf + l, buf_size - l, "%s%s",
232 i == 0 ? "" : ",", pp->arg[i].reg);
233 else
234 snprintf(buf + l, buf_size - l, "%sa%d",
235 i == 0 ? "" : ",", i + 1);
236 l = strlen(buf);
237 }
238 snprintf(buf + l, buf_size - l, ")");
239}
240
241static const struct parsed_proto *check_var(FILE *fhdr,
242 const char *sym, const char *varname)
243{
244 const struct parsed_proto *pp, *pp_sym;
245 char fp_sym[256], fp_var[256], *p;
246 int i;
247
248 pp = proto_parse(fhdr, varname, 1);
249 if (pp == NULL) {
250 if (IS_START(varname, "sub_"))
251 awarn("sub_ sym missing proto: '%s'\n", varname);
252 return NULL;
253 }
254
255 if (!pp->is_func && !pp->is_fptr)
256 return NULL;
257
258 pp_print(fp_var, sizeof(fp_var), pp);
259
260 if (pp->argc_reg == 0)
261 goto check_sym;
262 if (pp->argc_reg == 1 && pp->argc_stack == 0
263 && IS(pp->arg[0].reg, "ecx"))
264 {
265 goto check_sym;
266 }
267 if (!g_cconv_novalidate
268 && (pp->argc_reg != 2
269 || !IS(pp->arg[0].reg, "ecx")
270 || !IS(pp->arg[1].reg, "edx")))
271 {
272 awarn("unhandled reg call: %s\n", fp_var);
273 }
274
275check_sym:
276 // fptrs must use 32bit args, callsite might have no information and
277 // lack a cast to smaller types, which results in incorrectly masked
278 // args passed (callee may assume masked args, it does on ARM)
279 for (i = 0; i < pp->argc; i++) {
280 if (pp->arg[i].type.is_ptr)
281 continue;
282 p = pp->arg[i].type.name;
283 if (strstr(p, "int8") || strstr(p, "int16")
284 || strstr(p, "char") || strstr(p, "short"))
285 {
286 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
287 }
288 }
289
290 sprint_pp_short(pp, g_comment, sizeof(g_comment));
291
292 if (sym != NULL) {
293 g_func_sym_pp = NULL;
294 pp_sym = proto_parse(fhdr, sym, 1);
295 if (pp_sym == NULL)
296 return pp;
297 if (!pp_sym->is_fptr)
298 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
299 g_func_sym_pp = pp_sym;
300 }
301 else {
302 pp_sym = g_func_sym_pp;
303 if (pp_sym == NULL)
304 return pp;
305 }
306
307 if (pp_cmp_func(pp, pp_sym)) {
308 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
309 anote("var: %s\n", fp_var);
310 anote("sym: %s\n", fp_sym);
311 awarn("^ mismatch\n");
312 }
313
314 return pp;
315}
316
317static void output_decorated_pp(FILE *fout,
318 const struct parsed_proto *pp)
319{
320 if (pp->name[0] != '_')
321 fprintf(fout, pp->is_fastcall ? "@" : "_");
322 fprintf(fout, "%s", pp->name);
323 if (pp->is_stdcall && pp->argc > 0)
324 fprintf(fout, "@%d", pp->argc * 4);
325}
326
327static int align_value(int src_val)
328{
329 if (src_val <= 0) {
330 awarn("bad align: %d\n", src_val);
331 src_val = 1;
332 }
333 if (!g_arm_mode)
334 return src_val;
335
336 return __builtin_ffs(src_val) - 1;
337}
338
339static int cmpstringp(const void *p1, const void *p2)
340{
341 return strcmp(*(char * const *)p1, *(char * const *)p2);
342}
343
344/* XXX: maybe move to external file? */
345static const char *unwanted_syms[] = {
346 "aRuntimeError",
347 "aTlossError",
348 "aSingError",
349 "aDomainError",
350 "aR6029ThisAppli",
351 "aR6028UnableToI",
352 "aR6027NotEnough",
353 "aR6026NotEnough",
354 "aR6025PureVirtu",
355 "aR6024NotEnough",
356 "aR6019UnableToO",
357 "aR6018Unexpecte",
358 "aR6017Unexpecte",
359 "aR6016NotEnough",
360 "aAbnormalProgra",
361 "aR6009NotEnough",
362 "aR6008NotEnough",
363 "aR6002FloatingP",
364 "aMicrosoftVisua",
365 "aRuntimeErrorPr",
366 "aThisApplicatio",
367 "aMicrosoftFindF",
368 "aMicrosoftOffic",
369};
370
371static int is_unwanted_sym(const char *sym)
372{
373 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
374 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
375}
376
377int main(int argc, char *argv[])
378{
379 FILE *fout, *fasm, *fhdr = NULL, *frlist;
380 const struct parsed_proto *pp;
381 int no_decorations = 0;
382 char comment_char = '#';
383 char words[20][256];
384 char word[256];
385 char line[256];
386 char last_sym[32];
387 unsigned long val;
388 unsigned long cnt;
389 const char *sym;
390 enum dx_type type;
391 char **pub_syms;
392 int pub_sym_cnt = 0;
393 int pub_sym_alloc;
394 char **rlist;
395 int rlist_cnt = 0;
396 int rlist_alloc;
397 int header_mode = 0;
398 int is_ro = 0;
399 int is_label;
400 int is_bss;
401 int wordc;
402 int first;
403 int arg_out;
404 int arg = 1;
405 int len;
406 int w, i;
407 char *p;
408 char *p2;
409
410 if (argc < 4) {
411 // -nd: no symbol decorations
412 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n"
413 "%s -hdr <.h> <.asm>\n",
414 argv[0], argv[0]);
415 return 1;
416 }
417
418 for (arg = 1; arg < argc; arg++) {
419 if (IS(argv[arg], "-nd"))
420 no_decorations = 1;
421 else if (IS(argv[arg], "-i"))
422 g_cconv_novalidate = 1;
423 else if (IS(argv[arg], "-a")) {
424 comment_char = '@';
425 g_arm_mode = 1;
426 }
427 else if (IS(argv[arg], "-hdr"))
428 header_mode = 1;
429 else
430 break;
431 }
432
433 arg_out = arg++;
434
435 asmfn = argv[arg++];
436 fasm = fopen(asmfn, "r");
437 my_assert_not(fasm, NULL);
438
439 if (!header_mode) {
440 hdrfn = argv[arg++];
441 fhdr = fopen(hdrfn, "r");
442 my_assert_not(fhdr, NULL);
443 }
444
445 fout = fopen(argv[arg_out], "w");
446 my_assert_not(fout, NULL);
447
448 pub_sym_alloc = 64;
449 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
450 my_assert_not(pub_syms, NULL);
451
452 rlist_alloc = 64;
453 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
454 my_assert_not(rlist, NULL);
455
456 for (; arg < argc; arg++) {
457 frlist = fopen(argv[arg], "r");
458 my_assert_not(frlist, NULL);
459
460 while (my_fgets(line, sizeof(line), frlist)) {
461 p = sskip(line);
462 if (*p == 0 || *p == ';')
463 continue;
464
465 p = next_word(words[0], sizeof(words[0]), p);
466 if (words[0][0] == 0)
467 continue;
468
469 if (rlist_cnt >= rlist_alloc) {
470 rlist_alloc = rlist_alloc * 2 + 64;
471 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
472 my_assert_not(rlist, NULL);
473 }
474 rlist[rlist_cnt++] = strdup(words[0]);
475 }
476
477 fclose(frlist);
478 frlist = NULL;
479 }
480
481 if (rlist_cnt > 0)
482 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
483
484 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
485 sizeof(unwanted_syms[0]), cmpstringp);
486
487 last_sym[0] = 0;
488
489 while (1) {
490 next_section(fasm, line);
491 if (feof(fasm))
492 break;
493 if (IS(line + 1, "text"))
494 continue;
495
496 if (IS(line + 1, "rdata")) {
497 is_ro = 1;
498 if (!header_mode)
499 fprintf(fout, "\n.section .rodata\n");
500 }
501 else if (IS(line + 1, "data")) {
502 is_ro = 0;
503 if (!header_mode)
504 fprintf(fout, "\n.data\n");
505 }
506 else
507 aerr("unhandled section: '%s'\n", line);
508
509 if (!header_mode)
510 fprintf(fout, ".align %d\n", align_value(4));
511
512 while (my_fgets(line, sizeof(line), fasm))
513 {
514 sym = NULL;
515 asmln++;
516
517 p = sskip(line);
518 if (*p == 0)
519 continue;
520
521 if (*p == ';') {
522 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
523 // ;org is only seen at section start, so assume . addr 0
524 i &= 0xfff;
525 if (i != 0 && !header_mode)
526 fprintf(fout, "\t\t .skip 0x%x\n", i);
527 }
528 continue;
529 }
530
531 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
532 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
533 if (*p == 0 || *p == ';') {
534 wordc++;
535 break;
536 }
537 if (*p == ',') {
538 p = sskip(p + 1);
539 }
540 }
541
542 if (*p == ';') {
543 p = sskip(p + 1);
544 if (IS_START(p, "sctclrtype"))
545 g_func_sym_pp = NULL;
546 }
547
548 if (wordc == 2 && IS(words[1], "ends"))
549 break;
550 if (wordc <= 2 && IS(words[0], "end"))
551 break;
552 if (wordc < 2)
553 aerr("unhandled: '%s'\n", words[0]);
554
555 // don't cares
556 if (IS(words[0], "assume"))
557 continue;
558
559 if (IS(words[0], "align")) {
560 if (header_mode)
561 continue;
562
563 val = parse_number(words[1]);
564 fprintf(fout, "\t\t .align %d", align_value(val));
565 goto fin;
566 }
567
568 w = 1;
569 type = parse_dx_directive(words[0]);
570 if (type == DXT_UNSPEC) {
571 type = parse_dx_directive(words[1]);
572 sym = words[0];
573 w = 2;
574 }
575 if (type == DXT_UNSPEC)
576 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
577
578 if (sym != NULL)
579 {
580 if (header_mode) {
581 int is_str = 0;
582
583 fprintf(fout, "extern ");
584 if (is_ro)
585 fprintf(fout, "const ");
586
587 switch (type) {
588 case DXT_BYTE:
589 for (i = w; i < wordc; i++)
590 if (words[i][0] == '\'')
591 is_str = 1;
592 if (is_str)
593 fprintf(fout, "char %s[];\n", sym);
594 else
595 fprintf(fout, "uint8_t %s;\n", sym);
596 break;
597
598 case DXT_WORD:
599 fprintf(fout, "uint16_t %s;\n", sym);
600 break;
601
602 case DXT_DWORD:
603 fprintf(fout, "uint32_t %s;\n", sym);
604 break;
605
606 default:
607 fprintf(fout, "_UNKNOWN %s;\n", sym);
608 break;
609 }
610
611 continue;
612 }
613
614 snprintf(last_sym, sizeof(last_sym), "%s", sym);
615
616 pp = proto_parse(fhdr, sym, 1);
617 if (pp != NULL) {
618 g_func_sym_pp = NULL;
619
620 // public/global name
621 if (pub_sym_cnt >= pub_sym_alloc) {
622 pub_sym_alloc *= 2;
623 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
624 my_assert_not(pub_syms, NULL);
625 }
626 pub_syms[pub_sym_cnt++] = strdup(sym);
627 }
628
629 len = strlen(sym);
630 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
631
632 len += 2;
633 if (len < 8)
634 fprintf(fout, "\t");
635 if (len < 16)
636 fprintf(fout, "\t");
637 if (len <= 16)
638 fprintf(fout, " ");
639 else
640 fprintf(fout, " ");
641 }
642 else {
643 if (header_mode)
644 continue;
645
646 fprintf(fout, "\t\t ");
647 }
648
649 // fill out some unwanted strings with zeroes..
650 if (type == DXT_BYTE && words[w][0] == '\''
651 && is_unwanted_sym(last_sym))
652 {
653 len = 0;
654 for (; w < wordc; w++) {
655 if (words[w][0] == '\'') {
656 p = words[w] + 1;
657 for (; *p && *p != '\''; p++)
658 len++;
659 }
660 else {
661 // assume encoded byte
662 len++;
663 }
664 }
665 fprintf(fout, ".skip %d", len);
666 goto fin;
667 }
668 else if (type == DXT_BYTE
669 && (words[w][0] == '\''
670 || (w + 1 < wordc && words[w + 1][0] == '\'')))
671 {
672 // string; use asciz for most common case
673 if (w == wordc - 2 && IS(words[w + 1], "0")) {
674 fprintf(fout, ".asciz \"");
675 wordc--;
676 }
677 else
678 fprintf(fout, ".ascii \"");
679
680 for (; w < wordc; w++) {
681 if (words[w][0] == '\'') {
682 p = words[w] + 1;
683 p2 = strchr(p, '\'');
684 if (p2 == NULL)
685 aerr("unterminated string? '%s'\n", p);
686 memcpy(word, p, p2 - p);
687 word[p2 - p] = 0;
688 fprintf(fout, "%s", escape_string(word));
689 }
690 else {
691 val = parse_number(words[w]);
692 if (val & ~0xff)
693 aerr("bad string trailing byte?\n");
694 fprintf(fout, "\\x%02lx", val);
695 }
696 }
697 fprintf(fout, "\"");
698 goto fin;
699 }
700
701 if (w == wordc - 2) {
702 if (IS_START(words[w + 1], "dup(")) {
703 cnt = parse_number(words[w]);
704 p = words[w + 1] + 4;
705 p2 = strchr(p, ')');
706 if (p2 == NULL)
707 aerr("bad dup?\n");
708 memmove(word, p, p2 - p);
709 word[p2 - p] = 0;
710
711 val = 0;
712 if (!IS(word, "?"))
713 val = parse_number(word);
714
715 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
716 cnt, type_size(type), val);
717 goto fin;
718 }
719 }
720
721 if (type == DXT_DWORD && words[w][0] == '\''
722 && words[w][5] == '\'' && strlen(words[w]) == 6)
723 {
724 if (w != wordc - 1)
725 aerr("TODO\n");
726
727 p = words[w];
728 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
729 fprintf(fout, ".long 0x%lx", val);
730 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
731 goto fin;
732 }
733
734 if (type >= DXT_DWORD && strchr(words[w], '.'))
735 {
736 if (w != wordc - 1)
737 aerr("TODO\n");
738
739 if (g_arm_mode && type == DXT_TEN) {
740 fprintf(fout, ".fill 10");
741 snprintf(g_comment, sizeof(g_comment), "%s %s",
742 type_name_float(type), words[w]);
743 }
744 else
745 fprintf(fout, "%s %s", type_name_float(type), words[w]);
746 goto fin;
747 }
748
749 first = 1;
750 fprintf(fout, "%s ", type_name(type));
751 for (; w < wordc; w++)
752 {
753 if (!first)
754 fprintf(fout, ", ");
755
756 is_label = is_bss = 0;
757 if (w <= wordc - 2 && IS(words[w], "offset")) {
758 is_label = 1;
759 w++;
760 }
761 else if (IS(words[w], "?")) {
762 is_bss = 1;
763 }
764 else if (type == DXT_DWORD
765 && !('0' <= words[w][0] && words[w][0] <= '9'))
766 {
767 // assume label
768 is_label = 1;
769 }
770
771 if (is_bss) {
772 fprintf(fout, "0");
773 }
774 else if (is_label) {
775 p = words[w];
776 if (IS_START(p, "loc_") || IS_START(p, "__imp")
777 || strchr(p, '?') || strchr(p, '@')
778 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
779 cmpstringp))
780 {
781 fprintf(fout, "0");
782 snprintf(g_comment, sizeof(g_comment), "%s", p);
783 }
784 else {
785 pp = check_var(fhdr, sym, p);
786 if (pp == NULL) {
787 fprintf(fout, "%s%s",
788 (no_decorations || p[0] == '_') ? "" : "_", p);
789 }
790 else {
791 if (no_decorations)
792 fprintf(fout, "%s", pp->name);
793 else
794 output_decorated_pp(fout, pp);
795 }
796 }
797 }
798 else {
799 val = parse_number(words[w]);
800 if (val < 10)
801 fprintf(fout, "%ld", val);
802 else
803 fprintf(fout, "0x%lx", val);
804 }
805
806 first = 0;
807 }
808
809fin:
810 if (g_comment[0] != 0) {
811 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
812 g_comment[0] = 0;
813 }
814 fprintf(fout, "\n");
815 }
816 }
817
818 fprintf(fout, "\n");
819
820 // dump public syms
821 for (i = 0; i < pub_sym_cnt; i++)
822 fprintf(fout, ".global %s%s\n",
823 no_decorations ? "" : "_", pub_syms[i]);
824
825 fclose(fout);
826 fclose(fasm);
827 if (fhdr != NULL)
828 fclose(fhdr);
829
830 return 0;
831}
832
833// vim:ts=2:shiftwidth=2:expandtab