cvt_data: allow to skip validation
[ia32rtools.git] / tools / cvt_data.c
CommitLineData
054f95b2 1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
054f95b2 17
b545ba7c 18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
77f3a833 21static int g_cconv_novalidate;
b545ba7c 22
36595fd2 23// note: must be in ascending order
054f95b2 24enum dx_type {
25 DXT_UNSPEC,
26 DXT_BYTE,
27 DXT_WORD,
28 DXT_DWORD,
29 DXT_QUAD,
30 DXT_TEN,
31};
32
36595fd2 33#define anote(fmt, ...) \
34 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
b545ba7c 35#define awarn(fmt, ...) do { \
36 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
37 if (++g_warn_cnt == 10) { \
38 fcloseall(); \
39 exit(1); \
40 } \
41} while (0)
054f95b2 42#define aerr(fmt, ...) do { \
43 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
44 fcloseall(); \
45 exit(1); \
46} while (0)
47
48#include "masm_tools.h"
49
50static char *next_word_s(char *w, size_t wsize, char *s)
51{
52 int quote = 0;
53 size_t i;
54
55 s = sskip(s);
56
57 for (i = 0; i < wsize - 1; i++) {
58 if (s[i] == '\'')
59 quote ^= 1;
60 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
61 break;
62 w[i] = s[i];
63 }
64 w[i] = 0;
65
66 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
67 printf("warning: '%s' truncated\n", w);
68
69 return s + i;
70}
71
72static void next_section(FILE *fasm, char *name)
73{
74 char words[2][256];
75 char line[256];
76 int wordc;
77 char *p;
78
79 name[0] = 0;
80
81 while (fgets(line, sizeof(line), fasm))
82 {
83 wordc = 0;
84 asmln++;
85
86 p = sskip(line);
87 if (*p == 0)
88 continue;
89
90 if (*p == ';') {
91 while (strlen(line) == sizeof(line) - 1) {
92 // one of those long comment lines..
93 if (!fgets(line, sizeof(line), fasm))
94 break;
95 }
96 continue;
97 }
98
99 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
100 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
101 if (*p == 0 || *p == ';') {
102 wordc++;
103 break;
104 }
105 }
106
107 if (wordc < 2)
108 continue;
109
110 if (!IS(words[1], "segment"))
111 continue;
112
113 strcpy(name, words[0]);
114 break;
115 }
116}
117
118static enum dx_type parse_dx_directive(const char *name)
119{
120 if (IS(name, "dd"))
121 return DXT_DWORD;
122 if (IS(name, "dw"))
123 return DXT_WORD;
124 if (IS(name, "db"))
125 return DXT_BYTE;
126 if (IS(name, "dq"))
127 return DXT_QUAD;
128 if (IS(name, "dt"))
129 return DXT_TEN;
130
131 return DXT_UNSPEC;
132}
133
134static const char *type_name(enum dx_type type)
135{
136 switch (type) {
137 case DXT_BYTE:
138 return ".byte";
139 case DXT_WORD:
140 return ".word";
141 case DXT_DWORD:
142 return ".long";
143 case DXT_QUAD:
144 return ".quad";
145 case DXT_TEN:
146 return ".tfloat";
147 case DXT_UNSPEC:
148 break;
149 }
150 return "<bad>";
151}
152
36595fd2 153static const char *type_name_float(enum dx_type type)
154{
155 switch (type) {
156 case DXT_DWORD:
157 return ".float";
158 case DXT_QUAD:
159 return ".double";
160 case DXT_TEN:
161 return ".tfloat";
162 default:
163 break;
164 }
165 return "<bad_float>";
166}
167
054f95b2 168static int type_size(enum dx_type type)
169{
170 switch (type) {
171 case DXT_BYTE:
172 return 1;
173 case DXT_WORD:
174 return 2;
175 case DXT_DWORD:
176 return 4;
177 case DXT_QUAD:
178 return 8;
179 case DXT_TEN:
180 return 10;
181 case DXT_UNSPEC:
182 break;
183 }
184 return -1;
185}
186
187static char *escape_string(char *s)
188{
189 char buf[256];
190 char *t = buf;
191
192 for (; *s != 0; s++) {
193 if (*s == '"') {
194 strcpy(t, "\\22");
195 t += strlen(t);
196 continue;
197 }
198 if (*s == '\\') {
199 strcpy(t, "\\\\");
200 t += strlen(t);
201 continue;
202 }
203 *t++ = *s;
204 }
205 *t = *s;
206 return strcpy(s, buf);
207}
208
b545ba7c 209static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
210 size_t buf_size)
211{
212 char *p = buf;
213 size_t l;
214 int i;
215
216 if (pp->ret_type.is_ptr)
217 *p++ = 'p';
218 else if (IS(pp->ret_type.name, "void"))
219 *p++ = 'v';
220 else
221 *p++ = 'i';
222 *p++ = '(';
223 l = 2;
224
225 for (i = 0; i < pp->argc; i++) {
226 if (pp->arg[i].reg != NULL)
227 snprintf(buf + l, buf_size - l, "%s%s",
228 i == 0 ? "" : ",", pp->arg[i].reg);
229 else
230 snprintf(buf + l, buf_size - l, "%sa%d",
231 i == 0 ? "" : ",", i + 1);
232 l = strlen(buf);
233 }
234 snprintf(buf + l, buf_size - l, ")");
235}
236
c0050df6 237static const struct parsed_proto *check_var(FILE *fhdr,
238 const char *sym, const char *varname)
b545ba7c 239{
240 const struct parsed_proto *pp, *pp_sym;
241 char fp_sym[256], fp_var[256];
242 int i, bad = 0;
243
244 pp = proto_parse(fhdr, varname, 1);
36595fd2 245 if (pp == NULL) {
b545ba7c 246 if (IS_START(varname, "sub_"))
247 awarn("sub_ sym missing proto: '%s'\n", varname);
c0050df6 248 return NULL;
36595fd2 249 }
250
251 if (!pp->is_func && !pp->is_fptr)
c0050df6 252 return NULL;
b545ba7c 253
b74c31e3 254 pp_print(fp_var, sizeof(fp_var), pp);
b545ba7c 255
36595fd2 256 if (pp->argc_reg == 0)
b545ba7c 257 goto check_sym;
36595fd2 258 if (pp->argc_reg == 1 && pp->argc_stack == 0
259 && IS(pp->arg[0].reg, "ecx"))
260 {
b545ba7c 261 goto check_sym;
36595fd2 262 }
77f3a833 263 if (!g_cconv_novalidate
264 && (pp->argc_reg != 2
265 || !IS(pp->arg[0].reg, "ecx")
266 || !IS(pp->arg[1].reg, "edx")))
36595fd2 267 {
b545ba7c 268 awarn("unhandled reg call: %s\n", fp_var);
36595fd2 269 }
36595fd2 270
b545ba7c 271check_sym:
272 sprint_pp_short(pp, g_comment, sizeof(g_comment));
273
274 if (sym != NULL) {
275 g_func_sym_pp = NULL;
276 pp_sym = proto_parse(fhdr, sym, 1);
277 if (pp_sym == NULL)
c0050df6 278 return pp;
b545ba7c 279 if (!pp_sym->is_fptr)
280 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
281 g_func_sym_pp = pp_sym;
36595fd2 282 }
b545ba7c 283 else {
284 pp_sym = g_func_sym_pp;
285 if (pp_sym == NULL)
c0050df6 286 return pp;
b545ba7c 287 }
288
289 if (pp->argc != pp_sym->argc || pp->argc_reg != pp_sym->argc_reg)
290 bad = 1;
291 else {
292 for (i = 0; i < pp->argc; i++) {
293 if ((pp->arg[i].reg != NULL) != (pp_sym->arg[i].reg != NULL)) {
294 bad = 1;
295 break;
296 }
297 if ((pp->arg[i].reg != NULL)
298 && !IS(pp->arg[i].reg, pp_sym->arg[i].reg))
299 {
300 bad = 1;
301 break;
302 }
303 }
304 }
305
306 if (bad) {
b74c31e3 307 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
b545ba7c 308 anote("var: %s\n", fp_var);
309 anote("sym: %s\n", fp_sym);
310 awarn("^ mismatch\n");
36595fd2 311 }
c0050df6 312
313 return pp;
36595fd2 314}
315
aa1aa2c2 316static void output_decorated_pp(FILE *fout,
317 const struct parsed_proto *pp)
318{
319 if (pp->name[0] != '_')
320 fprintf(fout, pp->is_fastcall ? "@" : "_");
321 fprintf(fout, "%s", pp->name);
322 if (pp->is_stdcall && pp->argc > 0)
323 fprintf(fout, "@%d", pp->argc * 4);
324}
325
36595fd2 326static int cmpstringp(const void *p1, const void *p2)
327{
328 return strcmp(*(char * const *)p1, *(char * const *)p2);
329}
330
054f95b2 331int main(int argc, char *argv[])
332{
36595fd2 333 FILE *fout, *fasm, *fhdr, *frlist;
b545ba7c 334 const struct parsed_proto *pp;
aa1aa2c2 335 int no_decorations = 0;
054f95b2 336 char words[20][256];
054f95b2 337 char word[256];
338 char line[256];
054f95b2 339 unsigned long val;
340 unsigned long cnt;
341 const char *sym;
342 enum dx_type type;
36595fd2 343 char **pub_syms;
344 int pub_sym_cnt = 0;
345 int pub_sym_alloc;
346 char **rlist;
347 int rlist_cnt = 0;
348 int rlist_alloc;
054f95b2 349 int is_label;
36595fd2 350 int is_bss;
054f95b2 351 int wordc;
352 int first;
353 int arg_out;
354 int arg = 1;
355 int len;
36595fd2 356 int w, i;
054f95b2 357 char *p;
358 char *p2;
359
36595fd2 360 if (argc < 4) {
aa1aa2c2 361 // -nd: no symbol decorations
77f3a833 362 printf("usage:\n%s [-nd] [-i] <.s> <.asm> <hdrf> [rlist]*\n",
054f95b2 363 argv[0]);
364 return 1;
365 }
366
aa1aa2c2 367 for (arg = 1; arg < argc; arg++) {
368 if (IS(argv[arg], "-nd"))
369 no_decorations = 1;
77f3a833 370 else if (IS(argv[arg], "-i"))
371 g_cconv_novalidate = 1;
aa1aa2c2 372 else
373 break;
374 }
375
054f95b2 376 arg_out = arg++;
377
378 asmfn = argv[arg++];
379 fasm = fopen(asmfn, "r");
380 my_assert_not(fasm, NULL);
381
382 hdrfn = argv[arg++];
36595fd2 383 fhdr = fopen(hdrfn, "r");
384 my_assert_not(fhdr, NULL);
054f95b2 385
386 fout = fopen(argv[arg_out], "w");
387 my_assert_not(fout, NULL);
388
36595fd2 389 pub_sym_alloc = 64;
390 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
391 my_assert_not(pub_syms, NULL);
392
393 rlist_alloc = 64;
394 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
395 my_assert_not(rlist, NULL);
396
397 for (; arg < argc; arg++) {
398 frlist = fopen(argv[arg], "r");
399 my_assert_not(frlist, NULL);
400
401 while (fgets(line, sizeof(line), frlist)) {
402 p = sskip(line);
403 if (*p == 0 || *p == ';')
404 continue;
405
406 p = next_word(words[0], sizeof(words[0]), p);
407 if (words[0][0] == 0)
408 continue;
409
410 if (rlist_cnt >= rlist_alloc) {
411 rlist_alloc = rlist_alloc * 2 + 64;
412 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
413 my_assert_not(rlist, NULL);
414 }
415 rlist[rlist_cnt++] = strdup(words[0]);
416 }
417
418 fclose(frlist);
419 frlist = NULL;
420 }
421
422 if (rlist_cnt > 0)
423 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
424
425 while (1) {
054f95b2 426 next_section(fasm, line);
36595fd2 427 if (feof(fasm))
428 break;
054f95b2 429 if (IS(line + 1, "text"))
430 continue;
431
432 if (IS(line + 1, "rdata"))
433 fprintf(fout, "\n.section .rodata\n");
434 else if (IS(line + 1, "data"))
435 fprintf(fout, "\n.data\n");
436 else
437 aerr("unhandled section: '%s'\n", line);
438
439 fprintf(fout, ".align 4\n");
440
441 while (fgets(line, sizeof(line), fasm))
442 {
443 sym = NULL;
444 asmln++;
445
446 p = sskip(line);
447 if (*p == 0 || *p == ';')
448 continue;
449
450 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
054f95b2 451 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
452 if (*p == 0 || *p == ';') {
453 wordc++;
454 break;
455 }
456 if (*p == ',') {
054f95b2 457 p = sskip(p + 1);
458 }
459 }
460
b545ba7c 461 if (*p == ';') {
462 p = sskip(p + 1);
463 if (IS_START(p, "sctclrtype"))
464 g_func_sym_pp = NULL;
465 }
466
054f95b2 467 if (wordc == 2 && IS(words[1], "ends"))
468 break;
36595fd2 469 if (wordc <= 2 && IS(words[0], "end"))
470 break;
054f95b2 471 if (wordc < 2)
472 aerr("unhandled: '%s'\n", words[0]);
473
474 // don't cares
475 if (IS(words[0], "assume"))
476 continue;
477
478 if (IS(words[0], "align")) {
479 val = parse_number(words[1]);
480 fprintf(fout, "\t\t .align %ld", val);
481 goto fin;
482 }
483
484 w = 1;
485 type = parse_dx_directive(words[0]);
486 if (type == DXT_UNSPEC) {
487 type = parse_dx_directive(words[1]);
488 sym = words[0];
489 w = 2;
490 }
491 if (type == DXT_UNSPEC)
492 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
493
494 if (sym != NULL) {
36595fd2 495 // public/global name
496 if (pub_sym_cnt >= pub_sym_alloc) {
497 pub_sym_alloc *= 2;
498 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
499 my_assert_not(pub_syms, NULL);
500 }
501 pub_syms[pub_sym_cnt++] = strdup(sym);
502
b545ba7c 503 pp = proto_parse(fhdr, sym, 1);
504 if (pp != NULL)
505 g_func_sym_pp = NULL;
506
054f95b2 507 len = strlen(sym);
aa1aa2c2 508 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
054f95b2 509
510 len += 2;
511 if (len < 8)
512 fprintf(fout, "\t");
513 if (len < 16)
514 fprintf(fout, "\t");
515 if (len <= 16)
516 fprintf(fout, " ");
517 else
518 fprintf(fout, " ");
519 }
520 else {
521 fprintf(fout, "\t\t ");
522 }
523
efea2951 524 if (type == DXT_BYTE
525 && (words[w][0] == '\''
526 || (w + 1 < wordc && words[w + 1][0] == '\'')))
527 {
054f95b2 528 // string; use asciz for most common case
529 if (w == wordc - 2 && IS(words[w + 1], "0")) {
530 fprintf(fout, ".asciz \"");
531 wordc--;
532 }
533 else
534 fprintf(fout, ".ascii \"");
535
536 for (; w < wordc; w++) {
537 if (words[w][0] == '\'') {
538 p = words[w] + 1;
539 p2 = strchr(p, '\'');
540 if (p2 == NULL)
541 aerr("unterminated string? '%s'\n", p);
542 memcpy(word, p, p2 - p);
543 word[p2 - p] = 0;
544 fprintf(fout, "%s", escape_string(word));
545 }
546 else {
547 val = parse_number(words[w]);
548 if (val & ~0xff)
549 aerr("bad string trailing byte?\n");
550 fprintf(fout, "\\x%02lx", val);
551 }
552 }
553 fprintf(fout, "\"");
554 goto fin;
555 }
556
557 if (w == wordc - 2) {
558 if (IS_START(words[w + 1], "dup(")) {
559 cnt = parse_number(words[w]);
560 p = words[w + 1] + 4;
561 p2 = strchr(p, ')');
562 if (p2 == NULL)
563 aerr("bad dup?\n");
564 memmove(word, p, p2 - p);
565 word[p2 - p] = 0;
36595fd2 566
567 val = 0;
568 if (!IS(word, "?"))
569 val = parse_number(word);
054f95b2 570
571 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
572 cnt, type_size(type), val);
573 goto fin;
574 }
575 }
576
577 if (type == DXT_DWORD && words[w][0] == '\''
578 && words[w][5] == '\'' && strlen(words[w]) == 6)
579 {
580 if (w != wordc - 1)
581 aerr("TODO\n");
582
583 p = words[w];
584 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
585 fprintf(fout, ".long 0x%lx", val);
b545ba7c 586 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
054f95b2 587 goto fin;
588 }
589
36595fd2 590 if (type >= DXT_DWORD && strchr(words[w], '.'))
054f95b2 591 {
592 if (w != wordc - 1)
593 aerr("TODO\n");
594
36595fd2 595 fprintf(fout, "%s %s", type_name_float(type), words[w]);
054f95b2 596 goto fin;
597 }
598
599 first = 1;
600 fprintf(fout, "%s ", type_name(type));
601 for (; w < wordc; w++)
602 {
603 if (!first)
604 fprintf(fout, ", ");
605
36595fd2 606 is_label = is_bss = 0;
607 if (w <= wordc - 2 && IS(words[w], "offset")) {
054f95b2 608 is_label = 1;
609 w++;
610 }
36595fd2 611 else if (IS(words[w], "?")) {
612 is_bss = 1;
613 }
054f95b2 614 else if (type == DXT_DWORD
615 && !('0' <= words[w][0] && words[w][0] <= '9'))
616 {
617 // assume label
618 is_label = 1;
619 }
620
36595fd2 621 if (is_bss) {
622 fprintf(fout, "0");
623 }
624 else if (is_label) {
054f95b2 625 p = words[w];
ddaf8bd7 626 if (IS_START(p, "loc_") || IS_START(p, "__imp")
627 || strchr(p, '?') || strchr(p, '@')
36595fd2 628 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
629 cmpstringp))
054f95b2 630 {
631 fprintf(fout, "0");
b545ba7c 632 snprintf(g_comment, sizeof(g_comment), "%s", p);
36595fd2 633 }
634 else {
c0050df6 635 pp = check_var(fhdr, sym, p);
aa1aa2c2 636 if (pp == NULL) {
637 fprintf(fout, "%s%s",
638 (no_decorations || p[0] == '_') ? "" : "_", p);
639 }
640 else {
641 if (no_decorations)
642 fprintf(fout, "%s", pp->name);
643 else
644 output_decorated_pp(fout, pp);
645 }
054f95b2 646 }
054f95b2 647 }
648 else {
649 val = parse_number(words[w]);
650 if (val < 10)
651 fprintf(fout, "%ld", val);
652 else
653 fprintf(fout, "0x%lx", val);
654 }
655
656 first = 0;
657 }
658
659fin:
b545ba7c 660 if (g_comment[0] != 0) {
661 fprintf(fout, "\t\t# %s", g_comment);
662 g_comment[0] = 0;
054f95b2 663 }
664 fprintf(fout, "\n");
054f95b2 665 }
666 }
667
36595fd2 668 fprintf(fout, "\n");
669
670 // dump public syms
671 for (i = 0; i < pub_sym_cnt; i++)
aa1aa2c2 672 fprintf(fout, ".global %s%s\n",
673 no_decorations ? "" : "_", pub_syms[i]);
36595fd2 674
054f95b2 675 fclose(fout);
676 fclose(fasm);
36595fd2 677 fclose(fhdr);
054f95b2 678
679 return 0;
680}
681
682// vim:ts=2:shiftwidth=2:expandtab