translate: add some help text
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "my_assert.h"
15#include "my_str.h"
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
18#define IS(w, y) !strcmp(w, y)
19#define IS_START(w, y) !strncmp(w, y, strlen(y))
20
21#include "protoparse.h"
22
23static const char *asmfn;
24static int asmln;
25
26static const struct parsed_proto *g_func_sym_pp;
27static char g_comment[256];
28static int g_warn_cnt;
29static int g_cconv_novalidate;
30static int g_arm_mode;
31
32// note: must be in ascending order
33enum dx_type {
34 DXT_UNSPEC,
35 DXT_BYTE,
36 DXT_WORD,
37 DXT_DWORD,
38 DXT_QUAD,
39 DXT_TEN,
40};
41
42#define anote(fmt, ...) \
43 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
44#define awarn(fmt, ...) do { \
45 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
46 if (++g_warn_cnt == 10) { \
47 fcloseall(); \
48 exit(1); \
49 } \
50} while (0)
51#define aerr(fmt, ...) do { \
52 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
53 fcloseall(); \
54 exit(1); \
55} while (0)
56
57#include "masm_tools.h"
58
59static char *next_word_s(char *w, size_t wsize, char *s)
60{
61 int quote = 0;
62 size_t i;
63
64 s = sskip(s);
65
66 for (i = 0; i < wsize - 1; i++) {
67 if (s[i] == '\'')
68 quote ^= 1;
69 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
70 break;
71 w[i] = s[i];
72 }
73 w[i] = 0;
74
75 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
76 printf("warning: '%s' truncated\n", w);
77
78 return s + i;
79}
80
81static void next_section(FILE *fasm, char *name)
82{
83 char words[2][256];
84 char line[256];
85 int wordc;
86 char *p;
87
88 name[0] = 0;
89
90 while (fgets(line, sizeof(line), fasm))
91 {
92 wordc = 0;
93 asmln++;
94
95 p = sskip(line);
96 if (*p == 0)
97 continue;
98
99 if (*p == ';') {
100 while (strlen(line) == sizeof(line) - 1) {
101 // one of those long comment lines..
102 if (!fgets(line, sizeof(line), fasm))
103 break;
104 }
105 continue;
106 }
107
108 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
109 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
110 if (*p == 0 || *p == ';') {
111 wordc++;
112 break;
113 }
114 }
115
116 if (wordc < 2)
117 continue;
118
119 if (!IS(words[1], "segment"))
120 continue;
121
122 strcpy(name, words[0]);
123 break;
124 }
125}
126
127static enum dx_type parse_dx_directive(const char *name)
128{
129 if (IS(name, "dd"))
130 return DXT_DWORD;
131 if (IS(name, "dw"))
132 return DXT_WORD;
133 if (IS(name, "db"))
134 return DXT_BYTE;
135 if (IS(name, "dq"))
136 return DXT_QUAD;
137 if (IS(name, "dt"))
138 return DXT_TEN;
139
140 return DXT_UNSPEC;
141}
142
143static const char *type_name(enum dx_type type)
144{
145 switch (type) {
146 case DXT_BYTE:
147 return ".byte";
148 case DXT_WORD:
149 return ".hword";
150 case DXT_DWORD:
151 return ".long";
152 case DXT_QUAD:
153 return ".quad";
154 case DXT_TEN:
155 return ".tfloat";
156 case DXT_UNSPEC:
157 break;
158 }
159 return "<bad>";
160}
161
162static const char *type_name_float(enum dx_type type)
163{
164 switch (type) {
165 case DXT_DWORD:
166 return ".float";
167 case DXT_QUAD:
168 return ".double";
169 case DXT_TEN:
170 return ".tfloat";
171 default:
172 break;
173 }
174 return "<bad_float>";
175}
176
177static int type_size(enum dx_type type)
178{
179 switch (type) {
180 case DXT_BYTE:
181 return 1;
182 case DXT_WORD:
183 return 2;
184 case DXT_DWORD:
185 return 4;
186 case DXT_QUAD:
187 return 8;
188 case DXT_TEN:
189 return 10;
190 case DXT_UNSPEC:
191 break;
192 }
193 return -1;
194}
195
196static char *escape_string(char *s)
197{
198 char buf[256];
199 char *t = buf;
200
201 for (; *s != 0; s++) {
202 if (*s == '"') {
203 strcpy(t, "\\22");
204 t += strlen(t);
205 continue;
206 }
207 if (*s == '\\') {
208 strcpy(t, "\\\\");
209 t += strlen(t);
210 continue;
211 }
212 *t++ = *s;
213 }
214 *t = *s;
215 return strcpy(s, buf);
216}
217
218static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
219 size_t buf_size)
220{
221 char *p = buf;
222 size_t l;
223 int i;
224
225 if (pp->ret_type.is_ptr)
226 *p++ = 'p';
227 else if (IS(pp->ret_type.name, "void"))
228 *p++ = 'v';
229 else
230 *p++ = 'i';
231 *p++ = '(';
232 l = 2;
233
234 for (i = 0; i < pp->argc; i++) {
235 if (pp->arg[i].reg != NULL)
236 snprintf(buf + l, buf_size - l, "%s%s",
237 i == 0 ? "" : ",", pp->arg[i].reg);
238 else
239 snprintf(buf + l, buf_size - l, "%sa%d",
240 i == 0 ? "" : ",", i + 1);
241 l = strlen(buf);
242 }
243 snprintf(buf + l, buf_size - l, ")");
244}
245
246static const struct parsed_proto *check_var(FILE *fhdr,
247 const char *sym, const char *varname)
248{
249 const struct parsed_proto *pp, *pp_sym;
250 char fp_sym[256], fp_var[256], *p;
251 int i;
252
253 pp = proto_parse(fhdr, varname, 1);
254 if (pp == NULL) {
255 if (IS_START(varname, "sub_"))
256 awarn("sub_ sym missing proto: '%s'\n", varname);
257 return NULL;
258 }
259
260 if (!pp->is_func && !pp->is_fptr)
261 return NULL;
262
263 pp_print(fp_var, sizeof(fp_var), pp);
264
265 if (pp->argc_reg == 0)
266 goto check_sym;
267 if (pp->argc_reg == 1 && pp->argc_stack == 0
268 && IS(pp->arg[0].reg, "ecx"))
269 {
270 goto check_sym;
271 }
272 if (!g_cconv_novalidate
273 && (pp->argc_reg != 2
274 || !IS(pp->arg[0].reg, "ecx")
275 || !IS(pp->arg[1].reg, "edx")))
276 {
277 awarn("unhandled reg call: %s\n", fp_var);
278 }
279
280check_sym:
281 // fptrs must use 32bit args, callsite might have no information and
282 // lack a cast to smaller types, which results in incorrectly masked
283 // args passed (callee may assume masked args, it does on ARM)
284 for (i = 0; i < pp->argc; i++) {
285 if (pp->arg[i].type.is_ptr)
286 continue;
287 p = pp->arg[i].type.name;
288 if (strstr(p, "int8") || strstr(p, "int16")
289 || strstr(p, "char") || strstr(p, "short"))
290 {
291 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
292 }
293 }
294
295 sprint_pp_short(pp, g_comment, sizeof(g_comment));
296
297 if (sym != NULL) {
298 g_func_sym_pp = NULL;
299 pp_sym = proto_parse(fhdr, sym, 1);
300 if (pp_sym == NULL)
301 return pp;
302 if (!pp_sym->is_fptr)
303 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
304 g_func_sym_pp = pp_sym;
305 }
306 else {
307 pp_sym = g_func_sym_pp;
308 if (pp_sym == NULL)
309 return pp;
310 }
311
312 if (pp_cmp_func(pp, pp_sym)) {
313 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
314 anote("var: %s\n", fp_var);
315 anote("sym: %s\n", fp_sym);
316 awarn("^ mismatch\n");
317 }
318
319 return pp;
320}
321
322static void output_decorated_pp(FILE *fout,
323 const struct parsed_proto *pp)
324{
325 if (pp->name[0] != '_')
326 fprintf(fout, pp->is_fastcall ? "@" : "_");
327 fprintf(fout, "%s", pp->name);
328 if (pp->is_stdcall && pp->argc > 0)
329 fprintf(fout, "@%d", pp->argc * 4);
330}
331
332static int align_value(int src_val)
333{
334 if (src_val <= 0) {
335 awarn("bad align: %d\n", src_val);
336 src_val = 1;
337 }
338 if (!g_arm_mode)
339 return src_val;
340
341 return __builtin_ffs(src_val) - 1;
342}
343
344static int cmpstringp(const void *p1, const void *p2)
345{
346 return strcmp(*(char * const *)p1, *(char * const *)p2);
347}
348
349/* XXX: maybe move to external file? */
350static const char *unwanted_syms[] = {
351 "aRuntimeError",
352 "aTlossError",
353 "aSingError",
354 "aDomainError",
355 "aR6029ThisAppli",
356 "aR6028UnableToI",
357 "aR6027NotEnough",
358 "aR6026NotEnough",
359 "aR6025PureVirtu",
360 "aR6024NotEnough",
361 "aR6019UnableToO",
362 "aR6018Unexpecte",
363 "aR6017Unexpecte",
364 "aR6016NotEnough",
365 "aAbnormalProgra",
366 "aR6009NotEnough",
367 "aR6008NotEnough",
368 "aR6002FloatingP",
369 "aMicrosoftVisua",
370 "aRuntimeErrorPr",
371 "aThisApplicatio",
372 "aMicrosoftFindF",
373 "aMicrosoftOffic",
374};
375
376static int is_unwanted_sym(const char *sym)
377{
378 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
379 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
380}
381
382int main(int argc, char *argv[])
383{
384 FILE *fout, *fasm, *fhdr = NULL, *frlist;
385 const struct parsed_proto *pp;
386 int no_decorations = 0;
387 char comment_char = '#';
388 char words[20][256];
389 char word[256];
390 char line[256];
391 char last_sym[32];
392 unsigned long val;
393 unsigned long cnt;
394 const char *sym;
395 enum dx_type type;
396 char **pub_syms;
397 int pub_sym_cnt = 0;
398 int pub_sym_alloc;
399 char **rlist;
400 int rlist_cnt = 0;
401 int rlist_alloc;
402 int header_mode = 0;
403 int is_ro = 0;
404 int is_label;
405 int is_bss;
406 int wordc;
407 int first;
408 int arg_out;
409 int arg = 1;
410 int len;
411 int w, i;
412 char *p;
413 char *p2;
414
415 if (argc < 4) {
416 // -nd: no symbol decorations
417 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n"
418 "%s -hdr <.h> <.asm>\n",
419 argv[0], argv[0]);
420 return 1;
421 }
422
423 for (arg = 1; arg < argc; arg++) {
424 if (IS(argv[arg], "-nd"))
425 no_decorations = 1;
426 else if (IS(argv[arg], "-i"))
427 g_cconv_novalidate = 1;
428 else if (IS(argv[arg], "-a")) {
429 comment_char = '@';
430 g_arm_mode = 1;
431 }
432 else if (IS(argv[arg], "-hdr"))
433 header_mode = 1;
434 else
435 break;
436 }
437
438 arg_out = arg++;
439
440 asmfn = argv[arg++];
441 fasm = fopen(asmfn, "r");
442 my_assert_not(fasm, NULL);
443
444 if (!header_mode) {
445 hdrfn = argv[arg++];
446 fhdr = fopen(hdrfn, "r");
447 my_assert_not(fhdr, NULL);
448 }
449
450 fout = fopen(argv[arg_out], "w");
451 my_assert_not(fout, NULL);
452
453 pub_sym_alloc = 64;
454 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
455 my_assert_not(pub_syms, NULL);
456
457 rlist_alloc = 64;
458 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
459 my_assert_not(rlist, NULL);
460
461 for (; arg < argc; arg++) {
462 frlist = fopen(argv[arg], "r");
463 my_assert_not(frlist, NULL);
464
465 while (fgets(line, sizeof(line), frlist)) {
466 p = sskip(line);
467 if (*p == 0 || *p == ';')
468 continue;
469
470 p = next_word(words[0], sizeof(words[0]), p);
471 if (words[0][0] == 0)
472 continue;
473
474 if (rlist_cnt >= rlist_alloc) {
475 rlist_alloc = rlist_alloc * 2 + 64;
476 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
477 my_assert_not(rlist, NULL);
478 }
479 rlist[rlist_cnt++] = strdup(words[0]);
480 }
481
482 fclose(frlist);
483 frlist = NULL;
484 }
485
486 if (rlist_cnt > 0)
487 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
488
489 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
490 sizeof(unwanted_syms[0]), cmpstringp);
491
492 last_sym[0] = 0;
493
494 while (1) {
495 next_section(fasm, line);
496 if (feof(fasm))
497 break;
498 if (IS(line + 1, "text"))
499 continue;
500
501 if (IS(line + 1, "rdata")) {
502 is_ro = 1;
503 if (!header_mode)
504 fprintf(fout, "\n.section .rodata\n");
505 }
506 else if (IS(line + 1, "data")) {
507 is_ro = 0;
508 if (!header_mode)
509 fprintf(fout, "\n.data\n");
510 }
511 else
512 aerr("unhandled section: '%s'\n", line);
513
514 if (!header_mode)
515 fprintf(fout, ".align %d\n", align_value(4));
516
517 while (fgets(line, sizeof(line), fasm))
518 {
519 sym = NULL;
520 asmln++;
521
522 p = sskip(line);
523 if (*p == 0)
524 continue;
525
526 if (*p == ';') {
527 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
528 // ;org is only seen at section start, so assume . addr 0
529 i &= 0xfff;
530 if (i != 0 && !header_mode)
531 fprintf(fout, "\t\t .skip 0x%x\n", i);
532 }
533 continue;
534 }
535
536 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
537 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
538 if (*p == 0 || *p == ';') {
539 wordc++;
540 break;
541 }
542 if (*p == ',') {
543 p = sskip(p + 1);
544 }
545 }
546
547 if (*p == ';') {
548 p = sskip(p + 1);
549 if (IS_START(p, "sctclrtype"))
550 g_func_sym_pp = NULL;
551 }
552
553 if (wordc == 2 && IS(words[1], "ends"))
554 break;
555 if (wordc <= 2 && IS(words[0], "end"))
556 break;
557 if (wordc < 2)
558 aerr("unhandled: '%s'\n", words[0]);
559
560 // don't cares
561 if (IS(words[0], "assume"))
562 continue;
563
564 if (IS(words[0], "align")) {
565 if (header_mode)
566 continue;
567
568 val = parse_number(words[1]);
569 fprintf(fout, "\t\t .align %d", align_value(val));
570 goto fin;
571 }
572
573 w = 1;
574 type = parse_dx_directive(words[0]);
575 if (type == DXT_UNSPEC) {
576 type = parse_dx_directive(words[1]);
577 sym = words[0];
578 w = 2;
579 }
580 if (type == DXT_UNSPEC)
581 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
582
583 if (sym != NULL)
584 {
585 if (header_mode) {
586 int is_str = 0;
587
588 fprintf(fout, "extern ");
589 if (is_ro)
590 fprintf(fout, "const ");
591
592 switch (type) {
593 case DXT_BYTE:
594 for (i = w; i < wordc; i++)
595 if (words[i][0] == '\'')
596 is_str = 1;
597 if (is_str)
598 fprintf(fout, "char %s[];\n", sym);
599 else
600 fprintf(fout, "uint8_t %s;\n", sym);
601 break;
602
603 case DXT_WORD:
604 fprintf(fout, "uint16_t %s;\n", sym);
605 break;
606
607 case DXT_DWORD:
608 fprintf(fout, "uint32_t %s;\n", sym);
609 break;
610
611 default:
612 fprintf(fout, "_UNKNOWN %s;\n", sym);
613 break;
614 }
615
616 continue;
617 }
618
619 snprintf(last_sym, sizeof(last_sym), "%s", sym);
620
621 pp = proto_parse(fhdr, sym, 1);
622 if (pp != NULL) {
623 g_func_sym_pp = NULL;
624
625 // public/global name
626 if (pub_sym_cnt >= pub_sym_alloc) {
627 pub_sym_alloc *= 2;
628 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
629 my_assert_not(pub_syms, NULL);
630 }
631 pub_syms[pub_sym_cnt++] = strdup(sym);
632 }
633
634 len = strlen(sym);
635 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
636
637 len += 2;
638 if (len < 8)
639 fprintf(fout, "\t");
640 if (len < 16)
641 fprintf(fout, "\t");
642 if (len <= 16)
643 fprintf(fout, " ");
644 else
645 fprintf(fout, " ");
646 }
647 else {
648 if (header_mode)
649 continue;
650
651 fprintf(fout, "\t\t ");
652 }
653
654 // fill out some unwanted strings with zeroes..
655 if (type == DXT_BYTE && words[w][0] == '\''
656 && is_unwanted_sym(last_sym))
657 {
658 len = 0;
659 for (; w < wordc; w++) {
660 if (words[w][0] == '\'') {
661 p = words[w] + 1;
662 for (; *p && *p != '\''; p++)
663 len++;
664 }
665 else {
666 // assume encoded byte
667 len++;
668 }
669 }
670 fprintf(fout, ".skip %d", len);
671 goto fin;
672 }
673 else if (type == DXT_BYTE
674 && (words[w][0] == '\''
675 || (w + 1 < wordc && words[w + 1][0] == '\'')))
676 {
677 // string; use asciz for most common case
678 if (w == wordc - 2 && IS(words[w + 1], "0")) {
679 fprintf(fout, ".asciz \"");
680 wordc--;
681 }
682 else
683 fprintf(fout, ".ascii \"");
684
685 for (; w < wordc; w++) {
686 if (words[w][0] == '\'') {
687 p = words[w] + 1;
688 p2 = strchr(p, '\'');
689 if (p2 == NULL)
690 aerr("unterminated string? '%s'\n", p);
691 memcpy(word, p, p2 - p);
692 word[p2 - p] = 0;
693 fprintf(fout, "%s", escape_string(word));
694 }
695 else {
696 val = parse_number(words[w]);
697 if (val & ~0xff)
698 aerr("bad string trailing byte?\n");
699 fprintf(fout, "\\x%02lx", val);
700 }
701 }
702 fprintf(fout, "\"");
703 goto fin;
704 }
705
706 if (w == wordc - 2) {
707 if (IS_START(words[w + 1], "dup(")) {
708 cnt = parse_number(words[w]);
709 p = words[w + 1] + 4;
710 p2 = strchr(p, ')');
711 if (p2 == NULL)
712 aerr("bad dup?\n");
713 memmove(word, p, p2 - p);
714 word[p2 - p] = 0;
715
716 val = 0;
717 if (!IS(word, "?"))
718 val = parse_number(word);
719
720 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
721 cnt, type_size(type), val);
722 goto fin;
723 }
724 }
725
726 if (type == DXT_DWORD && words[w][0] == '\''
727 && words[w][5] == '\'' && strlen(words[w]) == 6)
728 {
729 if (w != wordc - 1)
730 aerr("TODO\n");
731
732 p = words[w];
733 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
734 fprintf(fout, ".long 0x%lx", val);
735 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
736 goto fin;
737 }
738
739 if (type >= DXT_DWORD && strchr(words[w], '.'))
740 {
741 if (w != wordc - 1)
742 aerr("TODO\n");
743
744 if (g_arm_mode && type == DXT_TEN) {
745 fprintf(fout, ".fill 10");
746 snprintf(g_comment, sizeof(g_comment), "%s %s",
747 type_name_float(type), words[w]);
748 }
749 else
750 fprintf(fout, "%s %s", type_name_float(type), words[w]);
751 goto fin;
752 }
753
754 first = 1;
755 fprintf(fout, "%s ", type_name(type));
756 for (; w < wordc; w++)
757 {
758 if (!first)
759 fprintf(fout, ", ");
760
761 is_label = is_bss = 0;
762 if (w <= wordc - 2 && IS(words[w], "offset")) {
763 is_label = 1;
764 w++;
765 }
766 else if (IS(words[w], "?")) {
767 is_bss = 1;
768 }
769 else if (type == DXT_DWORD
770 && !('0' <= words[w][0] && words[w][0] <= '9'))
771 {
772 // assume label
773 is_label = 1;
774 }
775
776 if (is_bss) {
777 fprintf(fout, "0");
778 }
779 else if (is_label) {
780 p = words[w];
781 if (IS_START(p, "loc_") || IS_START(p, "__imp")
782 || strchr(p, '?') || strchr(p, '@')
783 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
784 cmpstringp))
785 {
786 fprintf(fout, "0");
787 snprintf(g_comment, sizeof(g_comment), "%s", p);
788 }
789 else {
790 pp = check_var(fhdr, sym, p);
791 if (pp == NULL) {
792 fprintf(fout, "%s%s",
793 (no_decorations || p[0] == '_') ? "" : "_", p);
794 }
795 else {
796 if (no_decorations)
797 fprintf(fout, "%s", pp->name);
798 else
799 output_decorated_pp(fout, pp);
800 }
801 }
802 }
803 else {
804 val = parse_number(words[w]);
805 if (val < 10)
806 fprintf(fout, "%ld", val);
807 else
808 fprintf(fout, "0x%lx", val);
809 }
810
811 first = 0;
812 }
813
814fin:
815 if (g_comment[0] != 0) {
816 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
817 g_comment[0] = 0;
818 }
819 fprintf(fout, "\n");
820 }
821 }
822
823 fprintf(fout, "\n");
824
825 // dump public syms
826 for (i = 0; i < pub_sym_cnt; i++)
827 fprintf(fout, ".global %s%s\n",
828 no_decorations ? "" : "_", pub_syms[i]);
829
830 fclose(fout);
831 fclose(fasm);
832 if (fhdr != NULL)
833 fclose(fhdr);
834
835 return 0;
836}
837
838// vim:ts=2:shiftwidth=2:expandtab