cvt_data: fix escape chars
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "my_assert.h"
15#include "my_str.h"
16#include "common.h"
17
18#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
19#define IS(w, y) !strcmp(w, y)
20#define IS_START(w, y) !strncmp(w, y, strlen(y))
21
22#include "protoparse.h"
23
24static const char *asmfn;
25static int asmln;
26
27static const struct parsed_proto *g_func_sym_pp;
28static char g_comment[256];
29static int g_warn_cnt;
30static int g_cconv_novalidate;
31static int g_arm_mode;
32
33// note: must be in ascending order
34enum dx_type {
35 DXT_UNSPEC,
36 DXT_BYTE,
37 DXT_WORD,
38 DXT_DWORD,
39 DXT_QUAD,
40 DXT_TEN,
41};
42
43#define anote(fmt, ...) \
44 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
45#define awarn(fmt, ...) do { \
46 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
47 if (++g_warn_cnt == 10) { \
48 fcloseall(); \
49 exit(1); \
50 } \
51} while (0)
52#define aerr(fmt, ...) do { \
53 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
54 fcloseall(); \
55 exit(1); \
56} while (0)
57
58#include "masm_tools.h"
59
60static char *next_word_s(char *w, size_t wsize, char *s)
61{
62 int quote = 0;
63 size_t i;
64
65 s = sskip(s);
66
67 for (i = 0; i < wsize - 1; i++) {
68 if (s[i] == '\'')
69 quote ^= 1;
70 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
71 break;
72 w[i] = s[i];
73 }
74 w[i] = 0;
75
76 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
77 printf("warning: '%s' truncated\n", w);
78
79 return s + i;
80}
81
82static void next_section(FILE *fasm, char *name)
83{
84 char words[2][256];
85 char line[256];
86 int wordc;
87 char *p;
88
89 name[0] = 0;
90
91 while (my_fgets(line, sizeof(line), fasm))
92 {
93 wordc = 0;
94 asmln++;
95
96 p = sskip(line);
97 if (*p == 0)
98 continue;
99
100 if (*p == ';')
101 continue;
102
103 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
104 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
105 if (*p == 0 || *p == ';') {
106 wordc++;
107 break;
108 }
109 }
110
111 if (wordc < 2)
112 continue;
113
114 if (!IS(words[1], "segment"))
115 continue;
116
117 strcpy(name, words[0]);
118 break;
119 }
120}
121
122static enum dx_type parse_dx_directive(const char *name)
123{
124 if (IS(name, "dd"))
125 return DXT_DWORD;
126 if (IS(name, "dw"))
127 return DXT_WORD;
128 if (IS(name, "db"))
129 return DXT_BYTE;
130 if (IS(name, "dq"))
131 return DXT_QUAD;
132 if (IS(name, "dt"))
133 return DXT_TEN;
134
135 return DXT_UNSPEC;
136}
137
138static const char *type_name(enum dx_type type)
139{
140 switch (type) {
141 case DXT_BYTE:
142 return ".byte";
143 case DXT_WORD:
144 return ".hword";
145 case DXT_DWORD:
146 return ".long";
147 case DXT_QUAD:
148 return ".quad";
149 case DXT_TEN:
150 return ".tfloat";
151 case DXT_UNSPEC:
152 break;
153 }
154 return "<bad>";
155}
156
157static const char *type_name_float(enum dx_type type)
158{
159 switch (type) {
160 case DXT_DWORD:
161 return ".float";
162 case DXT_QUAD:
163 return ".double";
164 case DXT_TEN:
165 return ".tfloat";
166 default:
167 break;
168 }
169 return "<bad_float>";
170}
171
172static int type_size(enum dx_type type)
173{
174 switch (type) {
175 case DXT_BYTE:
176 return 1;
177 case DXT_WORD:
178 return 2;
179 case DXT_DWORD:
180 return 4;
181 case DXT_QUAD:
182 return 8;
183 case DXT_TEN:
184 return 10;
185 case DXT_UNSPEC:
186 break;
187 }
188 return -1;
189}
190
191static char *escape_string(char *s)
192{
193 char buf[256];
194 char *t = buf;
195
196 for (; *s != 0; s++) {
197 if (*s == '"') {
198 strcpy(t, "\\\"");
199 t += strlen(t);
200 continue;
201 }
202 if (*s == '\\') {
203 strcpy(t, "\\\\");
204 t += strlen(t);
205 continue;
206 }
207 *t++ = *s;
208 }
209 *t++ = *s;
210 if (t - buf > sizeof(buf))
211 aerr("string is too long\n");
212 return strcpy(s, buf);
213}
214
215static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
216 size_t buf_size)
217{
218 char *p = buf;
219 size_t l;
220 int i;
221
222 if (pp->ret_type.is_ptr)
223 *p++ = 'p';
224 else if (IS(pp->ret_type.name, "void"))
225 *p++ = 'v';
226 else
227 *p++ = 'i';
228 *p++ = '(';
229 l = 2;
230
231 for (i = 0; i < pp->argc; i++) {
232 if (pp->arg[i].reg != NULL)
233 snprintf(buf + l, buf_size - l, "%s%s",
234 i == 0 ? "" : ",", pp->arg[i].reg);
235 else
236 snprintf(buf + l, buf_size - l, "%sa%d",
237 i == 0 ? "" : ",", i + 1);
238 l = strlen(buf);
239 }
240 snprintf(buf + l, buf_size - l, ")");
241}
242
243static const struct parsed_proto *check_var(FILE *fhdr,
244 const char *sym, const char *varname)
245{
246 const struct parsed_proto *pp, *pp_sym;
247 char fp_sym[256], fp_var[256], *p;
248 int i;
249
250 pp = proto_parse(fhdr, varname, 1);
251 if (pp == NULL) {
252 if (IS_START(varname, "sub_"))
253 awarn("sub_ sym missing proto: '%s'\n", varname);
254 return NULL;
255 }
256
257 if (!pp->is_func && !pp->is_fptr)
258 return NULL;
259
260 pp_print(fp_var, sizeof(fp_var), pp);
261
262 if (pp->argc_reg == 0)
263 goto check_sym;
264 if (pp->argc_reg == 1 && pp->argc_stack == 0
265 && IS(pp->arg[0].reg, "ecx"))
266 {
267 goto check_sym;
268 }
269 if (!g_cconv_novalidate
270 && (pp->argc_reg != 2
271 || !IS(pp->arg[0].reg, "ecx")
272 || !IS(pp->arg[1].reg, "edx")))
273 {
274 awarn("unhandled reg call: %s\n", fp_var);
275 }
276
277check_sym:
278 // fptrs must use 32bit args, callsite might have no information and
279 // lack a cast to smaller types, which results in incorrectly masked
280 // args passed (callee may assume masked args, it does on ARM)
281 for (i = 0; i < pp->argc; i++) {
282 if (pp->arg[i].type.is_ptr)
283 continue;
284 p = pp->arg[i].type.name;
285 if (strstr(p, "int8") || strstr(p, "int16")
286 || strstr(p, "char") || strstr(p, "short"))
287 {
288 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
289 }
290 }
291
292 sprint_pp_short(pp, g_comment, sizeof(g_comment));
293
294 if (sym != NULL) {
295 g_func_sym_pp = NULL;
296 pp_sym = proto_parse(fhdr, sym, 1);
297 if (pp_sym == NULL)
298 return pp;
299 if (!pp_sym->is_fptr)
300 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
301 g_func_sym_pp = pp_sym;
302 }
303 else {
304 pp_sym = g_func_sym_pp;
305 if (pp_sym == NULL)
306 return pp;
307 }
308
309 if (pp_cmp_func(pp, pp_sym)) {
310 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
311 anote("var: %s\n", fp_var);
312 anote("sym: %s\n", fp_sym);
313 awarn("^ mismatch\n");
314 }
315
316 return pp;
317}
318
319static void output_decorated_pp(FILE *fout,
320 const struct parsed_proto *pp)
321{
322 if (pp->name[0] != '_')
323 fprintf(fout, pp->is_fastcall ? "@" : "_");
324 fprintf(fout, "%s", pp->name);
325 if (pp->is_stdcall && pp->argc > 0)
326 fprintf(fout, "@%d", pp->argc * 4);
327}
328
329static int align_value(int src_val)
330{
331 if (src_val <= 0) {
332 awarn("bad align: %d\n", src_val);
333 src_val = 1;
334 }
335 if (!g_arm_mode)
336 return src_val;
337
338 return __builtin_ffs(src_val) - 1;
339}
340
341static int cmpstringp(const void *p1, const void *p2)
342{
343 return strcmp(*(char * const *)p1, *(char * const *)p2);
344}
345
346/* XXX: maybe move to external file? */
347static const char *unwanted_syms[] = {
348 "aRuntimeError",
349 "aTlossError",
350 "aSingError",
351 "aDomainError",
352 "aR6029ThisAppli",
353 "aR6028UnableToI",
354 "aR6027NotEnough",
355 "aR6026NotEnough",
356 "aR6025PureVirtu",
357 "aR6024NotEnough",
358 "aR6019UnableToO",
359 "aR6018Unexpecte",
360 "aR6017Unexpecte",
361 "aR6016NotEnough",
362 "aAbnormalProgra",
363 "aR6009NotEnough",
364 "aR6008NotEnough",
365 "aR6002FloatingP",
366 "aMicrosoftVisua",
367 "aRuntimeErrorPr",
368 "aThisApplicatio",
369 "aMicrosoftFindF",
370 "aMicrosoftOffic",
371};
372
373static int is_unwanted_sym(const char *sym)
374{
375 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
376 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
377}
378
379int main(int argc, char *argv[])
380{
381 FILE *fout, *fasm, *fhdr = NULL, *frlist;
382 const struct parsed_proto *pp;
383 int no_decorations = 0;
384 char comment_char = '#';
385 char words[20][256];
386 char word[256];
387 char line[256];
388 char last_sym[32];
389 unsigned long val;
390 unsigned long cnt;
391 const char *sym;
392 enum dx_type type;
393 char **pub_syms;
394 int pub_sym_cnt = 0;
395 int pub_sym_alloc;
396 char **rlist;
397 int rlist_cnt = 0;
398 int rlist_alloc;
399 int header_mode = 0;
400 int is_ro = 0;
401 int is_label;
402 int is_bss;
403 int wordc;
404 int first;
405 int arg_out;
406 int arg = 1;
407 int len;
408 int w, i;
409 char *p;
410 char *p2;
411
412 if (argc < 4) {
413 // -nd: no symbol decorations
414 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n"
415 "%s -hdr <.h> <.asm>\n",
416 argv[0], argv[0]);
417 return 1;
418 }
419
420 for (arg = 1; arg < argc; arg++) {
421 if (IS(argv[arg], "-nd"))
422 no_decorations = 1;
423 else if (IS(argv[arg], "-i"))
424 g_cconv_novalidate = 1;
425 else if (IS(argv[arg], "-a")) {
426 comment_char = '@';
427 g_arm_mode = 1;
428 }
429 else if (IS(argv[arg], "-hdr"))
430 header_mode = 1;
431 else
432 break;
433 }
434
435 arg_out = arg++;
436
437 asmfn = argv[arg++];
438 fasm = fopen(asmfn, "r");
439 my_assert_not(fasm, NULL);
440
441 if (!header_mode) {
442 hdrfn = argv[arg++];
443 fhdr = fopen(hdrfn, "r");
444 my_assert_not(fhdr, NULL);
445 }
446
447 fout = fopen(argv[arg_out], "w");
448 my_assert_not(fout, NULL);
449
450 pub_sym_alloc = 64;
451 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
452 my_assert_not(pub_syms, NULL);
453
454 rlist_alloc = 64;
455 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
456 my_assert_not(rlist, NULL);
457
458 for (; arg < argc; arg++) {
459 frlist = fopen(argv[arg], "r");
460 my_assert_not(frlist, NULL);
461
462 while (my_fgets(line, sizeof(line), frlist)) {
463 p = sskip(line);
464 if (*p == 0 || *p == ';')
465 continue;
466
467 p = next_word(words[0], sizeof(words[0]), p);
468 if (words[0][0] == 0)
469 continue;
470
471 if (rlist_cnt >= rlist_alloc) {
472 rlist_alloc = rlist_alloc * 2 + 64;
473 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
474 my_assert_not(rlist, NULL);
475 }
476 rlist[rlist_cnt++] = strdup(words[0]);
477 }
478
479 fclose(frlist);
480 frlist = NULL;
481 }
482
483 if (rlist_cnt > 0)
484 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
485
486 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
487 sizeof(unwanted_syms[0]), cmpstringp);
488
489 last_sym[0] = 0;
490
491 while (1) {
492 next_section(fasm, line);
493 if (feof(fasm))
494 break;
495 if (IS(line + 1, "text"))
496 continue;
497
498 if (IS(line + 1, "rdata")) {
499 is_ro = 1;
500 if (!header_mode)
501 fprintf(fout, "\n.section .rodata\n");
502 }
503 else if (IS(line + 1, "data")) {
504 is_ro = 0;
505 if (!header_mode)
506 fprintf(fout, "\n.data\n");
507 }
508 else
509 aerr("unhandled section: '%s'\n", line);
510
511 if (!header_mode)
512 fprintf(fout, ".align %d\n", align_value(4));
513
514 while (my_fgets(line, sizeof(line), fasm))
515 {
516 sym = NULL;
517 asmln++;
518
519 p = sskip(line);
520 if (*p == 0)
521 continue;
522
523 if (*p == ';') {
524 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
525 // ;org is only seen at section start, so assume . addr 0
526 i &= 0xfff;
527 if (i != 0 && !header_mode)
528 fprintf(fout, "\t\t .skip 0x%x\n", i);
529 }
530 continue;
531 }
532
533 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
534 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
535 if (*p == 0 || *p == ';') {
536 wordc++;
537 break;
538 }
539 if (*p == ',') {
540 p = sskip(p + 1);
541 }
542 }
543
544 if (*p == ';') {
545 p = sskip(p + 1);
546 if (IS_START(p, "sctclrtype"))
547 g_func_sym_pp = NULL;
548 }
549
550 if (wordc == 2 && IS(words[1], "ends"))
551 break;
552 if (wordc <= 2 && IS(words[0], "end"))
553 break;
554 if (wordc < 2)
555 aerr("unhandled: '%s'\n", words[0]);
556
557 // don't cares
558 if (IS(words[0], "assume"))
559 continue;
560
561 if (IS(words[0], "align")) {
562 if (header_mode)
563 continue;
564
565 val = parse_number(words[1]);
566 fprintf(fout, "\t\t .align %d", align_value(val));
567 goto fin;
568 }
569
570 w = 1;
571 type = parse_dx_directive(words[0]);
572 if (type == DXT_UNSPEC) {
573 type = parse_dx_directive(words[1]);
574 sym = words[0];
575 w = 2;
576 }
577 if (type == DXT_UNSPEC)
578 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
579
580 if (sym != NULL)
581 {
582 if (header_mode) {
583 int is_str = 0;
584
585 fprintf(fout, "extern ");
586 if (is_ro)
587 fprintf(fout, "const ");
588
589 switch (type) {
590 case DXT_BYTE:
591 for (i = w; i < wordc; i++)
592 if (words[i][0] == '\'')
593 is_str = 1;
594 if (is_str)
595 fprintf(fout, "char %s[];\n", sym);
596 else
597 fprintf(fout, "uint8_t %s;\n", sym);
598 break;
599
600 case DXT_WORD:
601 fprintf(fout, "uint16_t %s;\n", sym);
602 break;
603
604 case DXT_DWORD:
605 fprintf(fout, "uint32_t %s;\n", sym);
606 break;
607
608 default:
609 fprintf(fout, "_UNKNOWN %s;\n", sym);
610 break;
611 }
612
613 continue;
614 }
615
616 snprintf(last_sym, sizeof(last_sym), "%s", sym);
617
618 pp = proto_parse(fhdr, sym, 1);
619 if (pp != NULL) {
620 g_func_sym_pp = NULL;
621
622 // public/global name
623 if (pub_sym_cnt >= pub_sym_alloc) {
624 pub_sym_alloc *= 2;
625 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
626 my_assert_not(pub_syms, NULL);
627 }
628 pub_syms[pub_sym_cnt++] = strdup(sym);
629 }
630
631 len = strlen(sym);
632 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
633
634 len += 2;
635 if (len < 8)
636 fprintf(fout, "\t");
637 if (len < 16)
638 fprintf(fout, "\t");
639 if (len <= 16)
640 fprintf(fout, " ");
641 else
642 fprintf(fout, " ");
643 }
644 else {
645 if (header_mode)
646 continue;
647
648 fprintf(fout, "\t\t ");
649 }
650
651 // fill out some unwanted strings with zeroes..
652 if (type == DXT_BYTE && words[w][0] == '\''
653 && is_unwanted_sym(last_sym))
654 {
655 len = 0;
656 for (; w < wordc; w++) {
657 if (words[w][0] == '\'') {
658 p = words[w] + 1;
659 for (; *p && *p != '\''; p++)
660 len++;
661 }
662 else {
663 // assume encoded byte
664 len++;
665 }
666 }
667 fprintf(fout, ".skip %d", len);
668 goto fin;
669 }
670 else if (type == DXT_BYTE
671 && (words[w][0] == '\''
672 || (w + 1 < wordc && words[w + 1][0] == '\'')))
673 {
674 // string; use asciz for most common case
675 if (w == wordc - 2 && IS(words[w + 1], "0")) {
676 fprintf(fout, ".asciz \"");
677 wordc--;
678 }
679 else
680 fprintf(fout, ".ascii \"");
681
682 for (; w < wordc; w++) {
683 if (words[w][0] == '\'') {
684 p = words[w] + 1;
685 p2 = strchr(p, '\'');
686 if (p2 == NULL)
687 aerr("unterminated string? '%s'\n", p);
688 memcpy(word, p, p2 - p);
689 word[p2 - p] = 0;
690 fprintf(fout, "%s", escape_string(word));
691 }
692 else {
693 val = parse_number(words[w]);
694 if (val & ~0xff)
695 aerr("bad string trailing byte?\n");
696 // unfortunately \xHH is unusable - gas interprets
697 // things like \x27b as 0x7b, so have to use octal here
698 fprintf(fout, "\\%03lo", val);
699 }
700 }
701 fprintf(fout, "\"");
702 goto fin;
703 }
704
705 if (w == wordc - 2) {
706 if (IS_START(words[w + 1], "dup(")) {
707 cnt = parse_number(words[w]);
708 p = words[w + 1] + 4;
709 p2 = strchr(p, ')');
710 if (p2 == NULL)
711 aerr("bad dup?\n");
712 memmove(word, p, p2 - p);
713 word[p2 - p] = 0;
714
715 val = 0;
716 if (!IS(word, "?"))
717 val = parse_number(word);
718
719 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
720 cnt, type_size(type), val);
721 goto fin;
722 }
723 }
724
725 if (type == DXT_DWORD && words[w][0] == '\''
726 && words[w][5] == '\'' && strlen(words[w]) == 6)
727 {
728 if (w != wordc - 1)
729 aerr("TODO\n");
730
731 p = words[w];
732 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
733 fprintf(fout, ".long 0x%lx", val);
734 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
735 goto fin;
736 }
737
738 if (type >= DXT_DWORD && strchr(words[w], '.'))
739 {
740 if (w != wordc - 1)
741 aerr("TODO\n");
742
743 if (g_arm_mode && type == DXT_TEN) {
744 fprintf(fout, ".fill 10");
745 snprintf(g_comment, sizeof(g_comment), "%s %s",
746 type_name_float(type), words[w]);
747 }
748 else
749 fprintf(fout, "%s %s", type_name_float(type), words[w]);
750 goto fin;
751 }
752
753 first = 1;
754 fprintf(fout, "%s ", type_name(type));
755 for (; w < wordc; w++)
756 {
757 if (!first)
758 fprintf(fout, ", ");
759
760 is_label = is_bss = 0;
761 if (w <= wordc - 2 && IS(words[w], "offset")) {
762 is_label = 1;
763 w++;
764 }
765 else if (IS(words[w], "?")) {
766 is_bss = 1;
767 }
768 else if (type == DXT_DWORD
769 && !('0' <= words[w][0] && words[w][0] <= '9'))
770 {
771 // assume label
772 is_label = 1;
773 }
774
775 if (is_bss) {
776 fprintf(fout, "0");
777 }
778 else if (is_label) {
779 p = words[w];
780 if (IS_START(p, "loc_") || IS_START(p, "__imp")
781 || strchr(p, '?') || strchr(p, '@')
782 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
783 cmpstringp))
784 {
785 fprintf(fout, "0");
786 snprintf(g_comment, sizeof(g_comment), "%s", p);
787 }
788 else {
789 pp = check_var(fhdr, sym, p);
790 if (pp == NULL) {
791 fprintf(fout, "%s%s",
792 (no_decorations || p[0] == '_') ? "" : "_", p);
793 }
794 else {
795 if (no_decorations)
796 fprintf(fout, "%s", pp->name);
797 else
798 output_decorated_pp(fout, pp);
799 }
800 }
801 }
802 else {
803 val = parse_number(words[w]);
804 if (val < 10)
805 fprintf(fout, "%ld", val);
806 else
807 fprintf(fout, "0x%lx", val);
808 }
809
810 first = 0;
811 }
812
813fin:
814 if (g_comment[0] != 0) {
815 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
816 g_comment[0] = 0;
817 }
818 fprintf(fout, "\n");
819 }
820 }
821
822 fprintf(fout, "\n");
823
824 // dump public syms
825 for (i = 0; i < pub_sym_cnt; i++)
826 fprintf(fout, ".global %s%s\n",
827 no_decorations ? "" : "_", pub_syms[i]);
828
829 fclose(fout);
830 fclose(fasm);
831 if (fhdr != NULL)
832 fclose(fhdr);
833
834 return 0;
835}
836
837// vim:ts=2:shiftwidth=2:expandtab