some func ptr arg type checking
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1/*
2 * ia32rtools
3 * (C) notaz, 2013,2014
4 *
5 * This work is licensed under the terms of 3-clause BSD license.
6 * See COPYING file in the top-level directory.
7 */
8
9#define _GNU_SOURCE
10#include <stdio.h>
11#include <stdlib.h>
12#include <string.h>
13
14#include "my_assert.h"
15#include "my_str.h"
16
17#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
18#define IS(w, y) !strcmp(w, y)
19#define IS_START(w, y) !strncmp(w, y, strlen(y))
20
21#include "protoparse.h"
22
23static const char *asmfn;
24static int asmln;
25
26static const struct parsed_proto *g_func_sym_pp;
27static char g_comment[256];
28static int g_warn_cnt;
29static int g_cconv_novalidate;
30static int g_arm_mode;
31
32// note: must be in ascending order
33enum dx_type {
34 DXT_UNSPEC,
35 DXT_BYTE,
36 DXT_WORD,
37 DXT_DWORD,
38 DXT_QUAD,
39 DXT_TEN,
40};
41
42#define anote(fmt, ...) \
43 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
44#define awarn(fmt, ...) do { \
45 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
46 if (++g_warn_cnt == 10) { \
47 fcloseall(); \
48 exit(1); \
49 } \
50} while (0)
51#define aerr(fmt, ...) do { \
52 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
53 fcloseall(); \
54 exit(1); \
55} while (0)
56
57#include "masm_tools.h"
58
59static char *next_word_s(char *w, size_t wsize, char *s)
60{
61 int quote = 0;
62 size_t i;
63
64 s = sskip(s);
65
66 for (i = 0; i < wsize - 1; i++) {
67 if (s[i] == '\'')
68 quote ^= 1;
69 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
70 break;
71 w[i] = s[i];
72 }
73 w[i] = 0;
74
75 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
76 printf("warning: '%s' truncated\n", w);
77
78 return s + i;
79}
80
81static void next_section(FILE *fasm, char *name)
82{
83 char words[2][256];
84 char line[256];
85 int wordc;
86 char *p;
87
88 name[0] = 0;
89
90 while (fgets(line, sizeof(line), fasm))
91 {
92 wordc = 0;
93 asmln++;
94
95 p = sskip(line);
96 if (*p == 0)
97 continue;
98
99 if (*p == ';') {
100 while (strlen(line) == sizeof(line) - 1) {
101 // one of those long comment lines..
102 if (!fgets(line, sizeof(line), fasm))
103 break;
104 }
105 continue;
106 }
107
108 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
109 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
110 if (*p == 0 || *p == ';') {
111 wordc++;
112 break;
113 }
114 }
115
116 if (wordc < 2)
117 continue;
118
119 if (!IS(words[1], "segment"))
120 continue;
121
122 strcpy(name, words[0]);
123 break;
124 }
125}
126
127static enum dx_type parse_dx_directive(const char *name)
128{
129 if (IS(name, "dd"))
130 return DXT_DWORD;
131 if (IS(name, "dw"))
132 return DXT_WORD;
133 if (IS(name, "db"))
134 return DXT_BYTE;
135 if (IS(name, "dq"))
136 return DXT_QUAD;
137 if (IS(name, "dt"))
138 return DXT_TEN;
139
140 return DXT_UNSPEC;
141}
142
143static const char *type_name(enum dx_type type)
144{
145 switch (type) {
146 case DXT_BYTE:
147 return ".byte";
148 case DXT_WORD:
149 return ".hword";
150 case DXT_DWORD:
151 return ".long";
152 case DXT_QUAD:
153 return ".quad";
154 case DXT_TEN:
155 return ".tfloat";
156 case DXT_UNSPEC:
157 break;
158 }
159 return "<bad>";
160}
161
162static const char *type_name_float(enum dx_type type)
163{
164 switch (type) {
165 case DXT_DWORD:
166 return ".float";
167 case DXT_QUAD:
168 return ".double";
169 case DXT_TEN:
170 return ".tfloat";
171 default:
172 break;
173 }
174 return "<bad_float>";
175}
176
177static int type_size(enum dx_type type)
178{
179 switch (type) {
180 case DXT_BYTE:
181 return 1;
182 case DXT_WORD:
183 return 2;
184 case DXT_DWORD:
185 return 4;
186 case DXT_QUAD:
187 return 8;
188 case DXT_TEN:
189 return 10;
190 case DXT_UNSPEC:
191 break;
192 }
193 return -1;
194}
195
196static char *escape_string(char *s)
197{
198 char buf[256];
199 char *t = buf;
200
201 for (; *s != 0; s++) {
202 if (*s == '"') {
203 strcpy(t, "\\22");
204 t += strlen(t);
205 continue;
206 }
207 if (*s == '\\') {
208 strcpy(t, "\\\\");
209 t += strlen(t);
210 continue;
211 }
212 *t++ = *s;
213 }
214 *t = *s;
215 return strcpy(s, buf);
216}
217
218static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
219 size_t buf_size)
220{
221 char *p = buf;
222 size_t l;
223 int i;
224
225 if (pp->ret_type.is_ptr)
226 *p++ = 'p';
227 else if (IS(pp->ret_type.name, "void"))
228 *p++ = 'v';
229 else
230 *p++ = 'i';
231 *p++ = '(';
232 l = 2;
233
234 for (i = 0; i < pp->argc; i++) {
235 if (pp->arg[i].reg != NULL)
236 snprintf(buf + l, buf_size - l, "%s%s",
237 i == 0 ? "" : ",", pp->arg[i].reg);
238 else
239 snprintf(buf + l, buf_size - l, "%sa%d",
240 i == 0 ? "" : ",", i + 1);
241 l = strlen(buf);
242 }
243 snprintf(buf + l, buf_size - l, ")");
244}
245
246static const struct parsed_proto *check_var(FILE *fhdr,
247 const char *sym, const char *varname)
248{
249 const struct parsed_proto *pp, *pp_sym;
250 char fp_sym[256], fp_var[256], *p;
251 int i;
252
253 pp = proto_parse(fhdr, varname, 1);
254 if (pp == NULL) {
255 if (IS_START(varname, "sub_"))
256 awarn("sub_ sym missing proto: '%s'\n", varname);
257 return NULL;
258 }
259
260 if (!pp->is_func && !pp->is_fptr)
261 return NULL;
262
263 pp_print(fp_var, sizeof(fp_var), pp);
264
265 if (pp->argc_reg == 0)
266 goto check_sym;
267 if (pp->argc_reg == 1 && pp->argc_stack == 0
268 && IS(pp->arg[0].reg, "ecx"))
269 {
270 goto check_sym;
271 }
272 if (!g_cconv_novalidate
273 && (pp->argc_reg != 2
274 || !IS(pp->arg[0].reg, "ecx")
275 || !IS(pp->arg[1].reg, "edx")))
276 {
277 awarn("unhandled reg call: %s\n", fp_var);
278 }
279
280check_sym:
281 // fptrs must use 32bit args, callsite might have no information and
282 // lack a cast to smaller types, which results in incorrectly masked
283 // args passed (callee may assume masked args, it does on ARM)
284 for (i = 0; i < pp->argc; i++) {
285 if (pp->arg[i].type.is_ptr)
286 continue;
287 p = pp->arg[i].type.name;
288 if (strstr(p, "int8") || strstr(p, "int16")
289 || strstr(p, "char") || strstr(p, "short"))
290 {
291 awarn("reference to %s with arg%d '%s'\n", pp->name, i + 1, p);
292 }
293 }
294
295 sprint_pp_short(pp, g_comment, sizeof(g_comment));
296
297 if (sym != NULL) {
298 g_func_sym_pp = NULL;
299 pp_sym = proto_parse(fhdr, sym, 1);
300 if (pp_sym == NULL)
301 return pp;
302 if (!pp_sym->is_fptr)
303 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
304 g_func_sym_pp = pp_sym;
305 }
306 else {
307 pp_sym = g_func_sym_pp;
308 if (pp_sym == NULL)
309 return pp;
310 }
311
312 if (pp_cmp_func(pp, pp_sym)) {
313 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
314 anote("var: %s\n", fp_var);
315 anote("sym: %s\n", fp_sym);
316 awarn("^ mismatch\n");
317 }
318
319 return pp;
320}
321
322static void output_decorated_pp(FILE *fout,
323 const struct parsed_proto *pp)
324{
325 if (pp->name[0] != '_')
326 fprintf(fout, pp->is_fastcall ? "@" : "_");
327 fprintf(fout, "%s", pp->name);
328 if (pp->is_stdcall && pp->argc > 0)
329 fprintf(fout, "@%d", pp->argc * 4);
330}
331
332static int align_value(int src_val)
333{
334 if (src_val <= 0) {
335 awarn("bad align: %d\n", src_val);
336 src_val = 1;
337 }
338 if (!g_arm_mode)
339 return src_val;
340
341 return __builtin_ffs(src_val) - 1;
342}
343
344static int cmpstringp(const void *p1, const void *p2)
345{
346 return strcmp(*(char * const *)p1, *(char * const *)p2);
347}
348
349/* XXX: maybe move to external file? */
350static const char *unwanted_syms[] = {
351 "aRuntimeError",
352 "aTlossError",
353 "aSingError",
354 "aDomainError",
355 "aR6029ThisAppli",
356 "aR6028UnableToI",
357 "aR6027NotEnough",
358 "aR6026NotEnough",
359 "aR6025PureVirtu",
360 "aR6024NotEnough",
361 "aR6019UnableToO",
362 "aR6018Unexpecte",
363 "aR6017Unexpecte",
364 "aR6016NotEnough",
365 "aAbnormalProgra",
366 "aR6009NotEnough",
367 "aR6008NotEnough",
368 "aR6002FloatingP",
369 "aMicrosoftVisua",
370 "aRuntimeErrorPr",
371 "aThisApplicatio",
372 "aMicrosoftFindF",
373 "aMicrosoftOffic",
374};
375
376static int is_unwanted_sym(const char *sym)
377{
378 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
379 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
380}
381
382int main(int argc, char *argv[])
383{
384 FILE *fout, *fasm, *fhdr, *frlist;
385 const struct parsed_proto *pp;
386 int no_decorations = 0;
387 char comment_char = '#';
388 char words[20][256];
389 char word[256];
390 char line[256];
391 char last_sym[32];
392 unsigned long val;
393 unsigned long cnt;
394 const char *sym;
395 enum dx_type type;
396 char **pub_syms;
397 int pub_sym_cnt = 0;
398 int pub_sym_alloc;
399 char **rlist;
400 int rlist_cnt = 0;
401 int rlist_alloc;
402 int is_label;
403 int is_bss;
404 int wordc;
405 int first;
406 int arg_out;
407 int arg = 1;
408 int len;
409 int w, i;
410 char *p;
411 char *p2;
412
413 if (argc < 4) {
414 // -nd: no symbol decorations
415 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
416 argv[0]);
417 return 1;
418 }
419
420 for (arg = 1; arg < argc; arg++) {
421 if (IS(argv[arg], "-nd"))
422 no_decorations = 1;
423 else if (IS(argv[arg], "-i"))
424 g_cconv_novalidate = 1;
425 else if (IS(argv[arg], "-a")) {
426 comment_char = '@';
427 g_arm_mode = 1;
428 }
429 else
430 break;
431 }
432
433 arg_out = arg++;
434
435 asmfn = argv[arg++];
436 fasm = fopen(asmfn, "r");
437 my_assert_not(fasm, NULL);
438
439 hdrfn = argv[arg++];
440 fhdr = fopen(hdrfn, "r");
441 my_assert_not(fhdr, NULL);
442
443 fout = fopen(argv[arg_out], "w");
444 my_assert_not(fout, NULL);
445
446 pub_sym_alloc = 64;
447 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
448 my_assert_not(pub_syms, NULL);
449
450 rlist_alloc = 64;
451 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
452 my_assert_not(rlist, NULL);
453
454 for (; arg < argc; arg++) {
455 frlist = fopen(argv[arg], "r");
456 my_assert_not(frlist, NULL);
457
458 while (fgets(line, sizeof(line), frlist)) {
459 p = sskip(line);
460 if (*p == 0 || *p == ';')
461 continue;
462
463 p = next_word(words[0], sizeof(words[0]), p);
464 if (words[0][0] == 0)
465 continue;
466
467 if (rlist_cnt >= rlist_alloc) {
468 rlist_alloc = rlist_alloc * 2 + 64;
469 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
470 my_assert_not(rlist, NULL);
471 }
472 rlist[rlist_cnt++] = strdup(words[0]);
473 }
474
475 fclose(frlist);
476 frlist = NULL;
477 }
478
479 if (rlist_cnt > 0)
480 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
481
482 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
483 sizeof(unwanted_syms[0]), cmpstringp);
484
485 last_sym[0] = 0;
486
487 while (1) {
488 next_section(fasm, line);
489 if (feof(fasm))
490 break;
491 if (IS(line + 1, "text"))
492 continue;
493
494 if (IS(line + 1, "rdata"))
495 fprintf(fout, "\n.section .rodata\n");
496 else if (IS(line + 1, "data"))
497 fprintf(fout, "\n.data\n");
498 else
499 aerr("unhandled section: '%s'\n", line);
500
501 fprintf(fout, ".align %d\n", align_value(4));
502
503 while (fgets(line, sizeof(line), fasm))
504 {
505 sym = NULL;
506 asmln++;
507
508 p = sskip(line);
509 if (*p == 0)
510 continue;
511
512 if (*p == ';') {
513 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
514 // ;org is only seen at section start, so assume . addr 0
515 i &= 0xfff;
516 if (i != 0)
517 fprintf(fout, "\t\t .skip 0x%x\n", i);
518 }
519 continue;
520 }
521
522 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
523 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
524 if (*p == 0 || *p == ';') {
525 wordc++;
526 break;
527 }
528 if (*p == ',') {
529 p = sskip(p + 1);
530 }
531 }
532
533 if (*p == ';') {
534 p = sskip(p + 1);
535 if (IS_START(p, "sctclrtype"))
536 g_func_sym_pp = NULL;
537 }
538
539 if (wordc == 2 && IS(words[1], "ends"))
540 break;
541 if (wordc <= 2 && IS(words[0], "end"))
542 break;
543 if (wordc < 2)
544 aerr("unhandled: '%s'\n", words[0]);
545
546 // don't cares
547 if (IS(words[0], "assume"))
548 continue;
549
550 if (IS(words[0], "align")) {
551 val = parse_number(words[1]);
552 fprintf(fout, "\t\t .align %d", align_value(val));
553 goto fin;
554 }
555
556 w = 1;
557 type = parse_dx_directive(words[0]);
558 if (type == DXT_UNSPEC) {
559 type = parse_dx_directive(words[1]);
560 sym = words[0];
561 w = 2;
562 }
563 if (type == DXT_UNSPEC)
564 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
565
566 if (sym != NULL) {
567 snprintf(last_sym, sizeof(last_sym), "%s", sym);
568
569 pp = proto_parse(fhdr, sym, 1);
570 if (pp != NULL) {
571 g_func_sym_pp = NULL;
572
573 // public/global name
574 if (pub_sym_cnt >= pub_sym_alloc) {
575 pub_sym_alloc *= 2;
576 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
577 my_assert_not(pub_syms, NULL);
578 }
579 pub_syms[pub_sym_cnt++] = strdup(sym);
580 }
581
582 len = strlen(sym);
583 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
584
585 len += 2;
586 if (len < 8)
587 fprintf(fout, "\t");
588 if (len < 16)
589 fprintf(fout, "\t");
590 if (len <= 16)
591 fprintf(fout, " ");
592 else
593 fprintf(fout, " ");
594 }
595 else {
596 fprintf(fout, "\t\t ");
597 }
598
599 // fill out some unwanted strings with zeroes..
600 if (type == DXT_BYTE && words[w][0] == '\''
601 && is_unwanted_sym(last_sym))
602 {
603 len = 0;
604 for (; w < wordc; w++) {
605 if (words[w][0] == '\'') {
606 p = words[w] + 1;
607 for (; *p && *p != '\''; p++)
608 len++;
609 }
610 else {
611 // assume encoded byte
612 len++;
613 }
614 }
615 fprintf(fout, ".skip %d", len);
616 goto fin;
617 }
618 else if (type == DXT_BYTE
619 && (words[w][0] == '\''
620 || (w + 1 < wordc && words[w + 1][0] == '\'')))
621 {
622 // string; use asciz for most common case
623 if (w == wordc - 2 && IS(words[w + 1], "0")) {
624 fprintf(fout, ".asciz \"");
625 wordc--;
626 }
627 else
628 fprintf(fout, ".ascii \"");
629
630 for (; w < wordc; w++) {
631 if (words[w][0] == '\'') {
632 p = words[w] + 1;
633 p2 = strchr(p, '\'');
634 if (p2 == NULL)
635 aerr("unterminated string? '%s'\n", p);
636 memcpy(word, p, p2 - p);
637 word[p2 - p] = 0;
638 fprintf(fout, "%s", escape_string(word));
639 }
640 else {
641 val = parse_number(words[w]);
642 if (val & ~0xff)
643 aerr("bad string trailing byte?\n");
644 fprintf(fout, "\\x%02lx", val);
645 }
646 }
647 fprintf(fout, "\"");
648 goto fin;
649 }
650
651 if (w == wordc - 2) {
652 if (IS_START(words[w + 1], "dup(")) {
653 cnt = parse_number(words[w]);
654 p = words[w + 1] + 4;
655 p2 = strchr(p, ')');
656 if (p2 == NULL)
657 aerr("bad dup?\n");
658 memmove(word, p, p2 - p);
659 word[p2 - p] = 0;
660
661 val = 0;
662 if (!IS(word, "?"))
663 val = parse_number(word);
664
665 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
666 cnt, type_size(type), val);
667 goto fin;
668 }
669 }
670
671 if (type == DXT_DWORD && words[w][0] == '\''
672 && words[w][5] == '\'' && strlen(words[w]) == 6)
673 {
674 if (w != wordc - 1)
675 aerr("TODO\n");
676
677 p = words[w];
678 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
679 fprintf(fout, ".long 0x%lx", val);
680 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
681 goto fin;
682 }
683
684 if (type >= DXT_DWORD && strchr(words[w], '.'))
685 {
686 if (w != wordc - 1)
687 aerr("TODO\n");
688
689 if (g_arm_mode && type == DXT_TEN) {
690 fprintf(fout, ".fill 10");
691 snprintf(g_comment, sizeof(g_comment), "%s %s",
692 type_name_float(type), words[w]);
693 }
694 else
695 fprintf(fout, "%s %s", type_name_float(type), words[w]);
696 goto fin;
697 }
698
699 first = 1;
700 fprintf(fout, "%s ", type_name(type));
701 for (; w < wordc; w++)
702 {
703 if (!first)
704 fprintf(fout, ", ");
705
706 is_label = is_bss = 0;
707 if (w <= wordc - 2 && IS(words[w], "offset")) {
708 is_label = 1;
709 w++;
710 }
711 else if (IS(words[w], "?")) {
712 is_bss = 1;
713 }
714 else if (type == DXT_DWORD
715 && !('0' <= words[w][0] && words[w][0] <= '9'))
716 {
717 // assume label
718 is_label = 1;
719 }
720
721 if (is_bss) {
722 fprintf(fout, "0");
723 }
724 else if (is_label) {
725 p = words[w];
726 if (IS_START(p, "loc_") || IS_START(p, "__imp")
727 || strchr(p, '?') || strchr(p, '@')
728 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
729 cmpstringp))
730 {
731 fprintf(fout, "0");
732 snprintf(g_comment, sizeof(g_comment), "%s", p);
733 }
734 else {
735 pp = check_var(fhdr, sym, p);
736 if (pp == NULL) {
737 fprintf(fout, "%s%s",
738 (no_decorations || p[0] == '_') ? "" : "_", p);
739 }
740 else {
741 if (no_decorations)
742 fprintf(fout, "%s", pp->name);
743 else
744 output_decorated_pp(fout, pp);
745 }
746 }
747 }
748 else {
749 val = parse_number(words[w]);
750 if (val < 10)
751 fprintf(fout, "%ld", val);
752 else
753 fprintf(fout, "0x%lx", val);
754 }
755
756 first = 0;
757 }
758
759fin:
760 if (g_comment[0] != 0) {
761 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
762 g_comment[0] = 0;
763 }
764 fprintf(fout, "\n");
765 }
766 }
767
768 fprintf(fout, "\n");
769
770 // dump public syms
771 for (i = 0; i < pub_sym_cnt; i++)
772 fprintf(fout, ".global %s%s\n",
773 no_decorations ? "" : "_", pub_syms[i]);
774
775 fclose(fout);
776 fclose(fasm);
777 fclose(fhdr);
778
779 return 0;
780}
781
782// vim:ts=2:shiftwidth=2:expandtab