cvt_data: use .hword (consistent 2 bytes), offsets from ;.org
[ia32rtools.git] / tools / cvt_data.c
... / ...
CommitLineData
1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
17
18static const struct parsed_proto *g_func_sym_pp;
19static char g_comment[256];
20static int g_warn_cnt;
21static int g_cconv_novalidate;
22static int g_arm_mode;
23
24// note: must be in ascending order
25enum dx_type {
26 DXT_UNSPEC,
27 DXT_BYTE,
28 DXT_WORD,
29 DXT_DWORD,
30 DXT_QUAD,
31 DXT_TEN,
32};
33
34#define anote(fmt, ...) \
35 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
36#define awarn(fmt, ...) do { \
37 printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \
38 if (++g_warn_cnt == 10) { \
39 fcloseall(); \
40 exit(1); \
41 } \
42} while (0)
43#define aerr(fmt, ...) do { \
44 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
45 fcloseall(); \
46 exit(1); \
47} while (0)
48
49#include "masm_tools.h"
50
51static char *next_word_s(char *w, size_t wsize, char *s)
52{
53 int quote = 0;
54 size_t i;
55
56 s = sskip(s);
57
58 for (i = 0; i < wsize - 1; i++) {
59 if (s[i] == '\'')
60 quote ^= 1;
61 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
62 break;
63 w[i] = s[i];
64 }
65 w[i] = 0;
66
67 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
68 printf("warning: '%s' truncated\n", w);
69
70 return s + i;
71}
72
73static void next_section(FILE *fasm, char *name)
74{
75 char words[2][256];
76 char line[256];
77 int wordc;
78 char *p;
79
80 name[0] = 0;
81
82 while (fgets(line, sizeof(line), fasm))
83 {
84 wordc = 0;
85 asmln++;
86
87 p = sskip(line);
88 if (*p == 0)
89 continue;
90
91 if (*p == ';') {
92 while (strlen(line) == sizeof(line) - 1) {
93 // one of those long comment lines..
94 if (!fgets(line, sizeof(line), fasm))
95 break;
96 }
97 continue;
98 }
99
100 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
101 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
102 if (*p == 0 || *p == ';') {
103 wordc++;
104 break;
105 }
106 }
107
108 if (wordc < 2)
109 continue;
110
111 if (!IS(words[1], "segment"))
112 continue;
113
114 strcpy(name, words[0]);
115 break;
116 }
117}
118
119static enum dx_type parse_dx_directive(const char *name)
120{
121 if (IS(name, "dd"))
122 return DXT_DWORD;
123 if (IS(name, "dw"))
124 return DXT_WORD;
125 if (IS(name, "db"))
126 return DXT_BYTE;
127 if (IS(name, "dq"))
128 return DXT_QUAD;
129 if (IS(name, "dt"))
130 return DXT_TEN;
131
132 return DXT_UNSPEC;
133}
134
135static const char *type_name(enum dx_type type)
136{
137 switch (type) {
138 case DXT_BYTE:
139 return ".byte";
140 case DXT_WORD:
141 return ".hword";
142 case DXT_DWORD:
143 return ".long";
144 case DXT_QUAD:
145 return ".quad";
146 case DXT_TEN:
147 return ".tfloat";
148 case DXT_UNSPEC:
149 break;
150 }
151 return "<bad>";
152}
153
154static const char *type_name_float(enum dx_type type)
155{
156 switch (type) {
157 case DXT_DWORD:
158 return ".float";
159 case DXT_QUAD:
160 return ".double";
161 case DXT_TEN:
162 return ".tfloat";
163 default:
164 break;
165 }
166 return "<bad_float>";
167}
168
169static int type_size(enum dx_type type)
170{
171 switch (type) {
172 case DXT_BYTE:
173 return 1;
174 case DXT_WORD:
175 return 2;
176 case DXT_DWORD:
177 return 4;
178 case DXT_QUAD:
179 return 8;
180 case DXT_TEN:
181 return 10;
182 case DXT_UNSPEC:
183 break;
184 }
185 return -1;
186}
187
188static char *escape_string(char *s)
189{
190 char buf[256];
191 char *t = buf;
192
193 for (; *s != 0; s++) {
194 if (*s == '"') {
195 strcpy(t, "\\22");
196 t += strlen(t);
197 continue;
198 }
199 if (*s == '\\') {
200 strcpy(t, "\\\\");
201 t += strlen(t);
202 continue;
203 }
204 *t++ = *s;
205 }
206 *t = *s;
207 return strcpy(s, buf);
208}
209
210static void sprint_pp_short(const struct parsed_proto *pp, char *buf,
211 size_t buf_size)
212{
213 char *p = buf;
214 size_t l;
215 int i;
216
217 if (pp->ret_type.is_ptr)
218 *p++ = 'p';
219 else if (IS(pp->ret_type.name, "void"))
220 *p++ = 'v';
221 else
222 *p++ = 'i';
223 *p++ = '(';
224 l = 2;
225
226 for (i = 0; i < pp->argc; i++) {
227 if (pp->arg[i].reg != NULL)
228 snprintf(buf + l, buf_size - l, "%s%s",
229 i == 0 ? "" : ",", pp->arg[i].reg);
230 else
231 snprintf(buf + l, buf_size - l, "%sa%d",
232 i == 0 ? "" : ",", i + 1);
233 l = strlen(buf);
234 }
235 snprintf(buf + l, buf_size - l, ")");
236}
237
238static const struct parsed_proto *check_var(FILE *fhdr,
239 const char *sym, const char *varname)
240{
241 const struct parsed_proto *pp, *pp_sym;
242 char fp_sym[256], fp_var[256];
243 int i, bad = 0;
244
245 pp = proto_parse(fhdr, varname, 1);
246 if (pp == NULL) {
247 if (IS_START(varname, "sub_"))
248 awarn("sub_ sym missing proto: '%s'\n", varname);
249 return NULL;
250 }
251
252 if (!pp->is_func && !pp->is_fptr)
253 return NULL;
254
255 pp_print(fp_var, sizeof(fp_var), pp);
256
257 if (pp->argc_reg == 0)
258 goto check_sym;
259 if (pp->argc_reg == 1 && pp->argc_stack == 0
260 && IS(pp->arg[0].reg, "ecx"))
261 {
262 goto check_sym;
263 }
264 if (!g_cconv_novalidate
265 && (pp->argc_reg != 2
266 || !IS(pp->arg[0].reg, "ecx")
267 || !IS(pp->arg[1].reg, "edx")))
268 {
269 awarn("unhandled reg call: %s\n", fp_var);
270 }
271
272check_sym:
273 sprint_pp_short(pp, g_comment, sizeof(g_comment));
274
275 if (sym != NULL) {
276 g_func_sym_pp = NULL;
277 pp_sym = proto_parse(fhdr, sym, 1);
278 if (pp_sym == NULL)
279 return pp;
280 if (!pp_sym->is_fptr)
281 aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name);
282 g_func_sym_pp = pp_sym;
283 }
284 else {
285 pp_sym = g_func_sym_pp;
286 if (pp_sym == NULL)
287 return pp;
288 }
289
290 if (pp->argc != pp_sym->argc || pp->argc_reg != pp_sym->argc_reg)
291 bad = 1;
292 else {
293 for (i = 0; i < pp->argc; i++) {
294 if ((pp->arg[i].reg != NULL) != (pp_sym->arg[i].reg != NULL)) {
295 bad = 1;
296 break;
297 }
298 if ((pp->arg[i].reg != NULL)
299 && !IS(pp->arg[i].reg, pp_sym->arg[i].reg))
300 {
301 bad = 1;
302 break;
303 }
304 }
305 }
306
307 if (bad) {
308 pp_print(fp_sym, sizeof(fp_sym), pp_sym);
309 anote("var: %s\n", fp_var);
310 anote("sym: %s\n", fp_sym);
311 awarn("^ mismatch\n");
312 }
313
314 return pp;
315}
316
317static void output_decorated_pp(FILE *fout,
318 const struct parsed_proto *pp)
319{
320 if (pp->name[0] != '_')
321 fprintf(fout, pp->is_fastcall ? "@" : "_");
322 fprintf(fout, "%s", pp->name);
323 if (pp->is_stdcall && pp->argc > 0)
324 fprintf(fout, "@%d", pp->argc * 4);
325}
326
327static int align_value(int src_val)
328{
329 if (src_val <= 0) {
330 awarn("bad align: %d\n", src_val);
331 src_val = 1;
332 }
333 if (!g_arm_mode)
334 return src_val;
335
336 return __builtin_ffs(src_val) - 1;
337}
338
339static int cmpstringp(const void *p1, const void *p2)
340{
341 return strcmp(*(char * const *)p1, *(char * const *)p2);
342}
343
344/* XXX: maybe move to external file? */
345static const char *unwanted_syms[] = {
346 "aRuntimeError",
347 "aTlossError",
348 "aSingError",
349 "aDomainError",
350 "aR6029ThisAppli",
351 "aR6028UnableToI",
352 "aR6027NotEnough",
353 "aR6026NotEnough",
354 "aR6025PureVirtu",
355 "aR6024NotEnough",
356 "aR6019UnableToO",
357 "aR6018Unexpecte",
358 "aR6017Unexpecte",
359 "aR6016NotEnough",
360 "aAbnormalProgra",
361 "aR6009NotEnough",
362 "aR6008NotEnough",
363 "aR6002FloatingP",
364 "aMicrosoftVisua",
365 "aRuntimeErrorPr",
366 "aThisApplicatio",
367 "aMicrosoftFindF",
368 "aMicrosoftOffic",
369};
370
371static int is_unwanted_sym(const char *sym)
372{
373 return bsearch(&sym, unwanted_syms, ARRAY_SIZE(unwanted_syms),
374 sizeof(unwanted_syms[0]), cmpstringp) != NULL;
375}
376
377int main(int argc, char *argv[])
378{
379 FILE *fout, *fasm, *fhdr, *frlist;
380 const struct parsed_proto *pp;
381 int no_decorations = 0;
382 char comment_char = '#';
383 char words[20][256];
384 char word[256];
385 char line[256];
386 char last_sym[32];
387 unsigned long val;
388 unsigned long cnt;
389 const char *sym;
390 enum dx_type type;
391 char **pub_syms;
392 int pub_sym_cnt = 0;
393 int pub_sym_alloc;
394 char **rlist;
395 int rlist_cnt = 0;
396 int rlist_alloc;
397 int is_label;
398 int is_bss;
399 int wordc;
400 int first;
401 int arg_out;
402 int arg = 1;
403 int len;
404 int w, i;
405 char *p;
406 char *p2;
407
408 if (argc < 4) {
409 // -nd: no symbol decorations
410 printf("usage:\n%s [-nd] [-i] [-a] <.s> <.asm> <hdrf> [rlist]*\n",
411 argv[0]);
412 return 1;
413 }
414
415 for (arg = 1; arg < argc; arg++) {
416 if (IS(argv[arg], "-nd"))
417 no_decorations = 1;
418 else if (IS(argv[arg], "-i"))
419 g_cconv_novalidate = 1;
420 else if (IS(argv[arg], "-a")) {
421 comment_char = '@';
422 g_arm_mode = 1;
423 }
424 else
425 break;
426 }
427
428 arg_out = arg++;
429
430 asmfn = argv[arg++];
431 fasm = fopen(asmfn, "r");
432 my_assert_not(fasm, NULL);
433
434 hdrfn = argv[arg++];
435 fhdr = fopen(hdrfn, "r");
436 my_assert_not(fhdr, NULL);
437
438 fout = fopen(argv[arg_out], "w");
439 my_assert_not(fout, NULL);
440
441 pub_sym_alloc = 64;
442 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
443 my_assert_not(pub_syms, NULL);
444
445 rlist_alloc = 64;
446 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
447 my_assert_not(rlist, NULL);
448
449 for (; arg < argc; arg++) {
450 frlist = fopen(argv[arg], "r");
451 my_assert_not(frlist, NULL);
452
453 while (fgets(line, sizeof(line), frlist)) {
454 p = sskip(line);
455 if (*p == 0 || *p == ';')
456 continue;
457
458 p = next_word(words[0], sizeof(words[0]), p);
459 if (words[0][0] == 0)
460 continue;
461
462 if (rlist_cnt >= rlist_alloc) {
463 rlist_alloc = rlist_alloc * 2 + 64;
464 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
465 my_assert_not(rlist, NULL);
466 }
467 rlist[rlist_cnt++] = strdup(words[0]);
468 }
469
470 fclose(frlist);
471 frlist = NULL;
472 }
473
474 if (rlist_cnt > 0)
475 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
476
477 qsort(unwanted_syms, ARRAY_SIZE(unwanted_syms),
478 sizeof(unwanted_syms[0]), cmpstringp);
479
480 last_sym[0] = 0;
481
482 while (1) {
483 next_section(fasm, line);
484 if (feof(fasm))
485 break;
486 if (IS(line + 1, "text"))
487 continue;
488
489 if (IS(line + 1, "rdata"))
490 fprintf(fout, "\n.section .rodata\n");
491 else if (IS(line + 1, "data"))
492 fprintf(fout, "\n.data\n");
493 else
494 aerr("unhandled section: '%s'\n", line);
495
496 fprintf(fout, ".align %d\n", align_value(4));
497
498 while (fgets(line, sizeof(line), fasm))
499 {
500 sym = NULL;
501 asmln++;
502
503 p = sskip(line);
504 if (*p == 0)
505 continue;
506
507 if (*p == ';') {
508 if (IS_START(p, ";org") && sscanf(p + 5, "%Xh", &i) == 1) {
509 // ;org is only seen at section start, so assume . addr 0
510 i &= 0xfff;
511 if (i != 0)
512 fprintf(fout, "\t\t .skip 0x%x\n", i);
513 }
514 continue;
515 }
516
517 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
518 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
519 if (*p == 0 || *p == ';') {
520 wordc++;
521 break;
522 }
523 if (*p == ',') {
524 p = sskip(p + 1);
525 }
526 }
527
528 if (*p == ';') {
529 p = sskip(p + 1);
530 if (IS_START(p, "sctclrtype"))
531 g_func_sym_pp = NULL;
532 }
533
534 if (wordc == 2 && IS(words[1], "ends"))
535 break;
536 if (wordc <= 2 && IS(words[0], "end"))
537 break;
538 if (wordc < 2)
539 aerr("unhandled: '%s'\n", words[0]);
540
541 // don't cares
542 if (IS(words[0], "assume"))
543 continue;
544
545 if (IS(words[0], "align")) {
546 val = parse_number(words[1]);
547 fprintf(fout, "\t\t .align %d", align_value(val));
548 goto fin;
549 }
550
551 w = 1;
552 type = parse_dx_directive(words[0]);
553 if (type == DXT_UNSPEC) {
554 type = parse_dx_directive(words[1]);
555 sym = words[0];
556 w = 2;
557 }
558 if (type == DXT_UNSPEC)
559 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
560
561 if (sym != NULL) {
562 snprintf(last_sym, sizeof(last_sym), "%s", sym);
563
564 pp = proto_parse(fhdr, sym, 1);
565 if (pp != NULL) {
566 g_func_sym_pp = NULL;
567
568 // public/global name
569 if (pub_sym_cnt >= pub_sym_alloc) {
570 pub_sym_alloc *= 2;
571 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
572 my_assert_not(pub_syms, NULL);
573 }
574 pub_syms[pub_sym_cnt++] = strdup(sym);
575 }
576
577 len = strlen(sym);
578 fprintf(fout, "%s%s:", no_decorations ? "" : "_", sym);
579
580 len += 2;
581 if (len < 8)
582 fprintf(fout, "\t");
583 if (len < 16)
584 fprintf(fout, "\t");
585 if (len <= 16)
586 fprintf(fout, " ");
587 else
588 fprintf(fout, " ");
589 }
590 else {
591 fprintf(fout, "\t\t ");
592 }
593
594 // fill out some unwanted strings with zeroes..
595 if (type == DXT_BYTE && words[w][0] == '\''
596 && is_unwanted_sym(last_sym))
597 {
598 len = 0;
599 for (; w < wordc; w++) {
600 if (words[w][0] == '\'') {
601 p = words[w] + 1;
602 for (; *p && *p != '\''; p++)
603 len++;
604 }
605 else {
606 // assume encoded byte
607 len++;
608 }
609 }
610 fprintf(fout, ".skip %d", len);
611 goto fin;
612 }
613 else if (type == DXT_BYTE
614 && (words[w][0] == '\''
615 || (w + 1 < wordc && words[w + 1][0] == '\'')))
616 {
617 // string; use asciz for most common case
618 if (w == wordc - 2 && IS(words[w + 1], "0")) {
619 fprintf(fout, ".asciz \"");
620 wordc--;
621 }
622 else
623 fprintf(fout, ".ascii \"");
624
625 for (; w < wordc; w++) {
626 if (words[w][0] == '\'') {
627 p = words[w] + 1;
628 p2 = strchr(p, '\'');
629 if (p2 == NULL)
630 aerr("unterminated string? '%s'\n", p);
631 memcpy(word, p, p2 - p);
632 word[p2 - p] = 0;
633 fprintf(fout, "%s", escape_string(word));
634 }
635 else {
636 val = parse_number(words[w]);
637 if (val & ~0xff)
638 aerr("bad string trailing byte?\n");
639 fprintf(fout, "\\x%02lx", val);
640 }
641 }
642 fprintf(fout, "\"");
643 goto fin;
644 }
645
646 if (w == wordc - 2) {
647 if (IS_START(words[w + 1], "dup(")) {
648 cnt = parse_number(words[w]);
649 p = words[w + 1] + 4;
650 p2 = strchr(p, ')');
651 if (p2 == NULL)
652 aerr("bad dup?\n");
653 memmove(word, p, p2 - p);
654 word[p2 - p] = 0;
655
656 val = 0;
657 if (!IS(word, "?"))
658 val = parse_number(word);
659
660 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
661 cnt, type_size(type), val);
662 goto fin;
663 }
664 }
665
666 if (type == DXT_DWORD && words[w][0] == '\''
667 && words[w][5] == '\'' && strlen(words[w]) == 6)
668 {
669 if (w != wordc - 1)
670 aerr("TODO\n");
671
672 p = words[w];
673 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
674 fprintf(fout, ".long 0x%lx", val);
675 snprintf(g_comment, sizeof(g_comment), "%s", words[w]);
676 goto fin;
677 }
678
679 if (type >= DXT_DWORD && strchr(words[w], '.'))
680 {
681 if (w != wordc - 1)
682 aerr("TODO\n");
683
684 if (g_arm_mode && type == DXT_TEN) {
685 fprintf(fout, ".fill 10");
686 snprintf(g_comment, sizeof(g_comment), "%s %s",
687 type_name_float(type), words[w]);
688 }
689 else
690 fprintf(fout, "%s %s", type_name_float(type), words[w]);
691 goto fin;
692 }
693
694 first = 1;
695 fprintf(fout, "%s ", type_name(type));
696 for (; w < wordc; w++)
697 {
698 if (!first)
699 fprintf(fout, ", ");
700
701 is_label = is_bss = 0;
702 if (w <= wordc - 2 && IS(words[w], "offset")) {
703 is_label = 1;
704 w++;
705 }
706 else if (IS(words[w], "?")) {
707 is_bss = 1;
708 }
709 else if (type == DXT_DWORD
710 && !('0' <= words[w][0] && words[w][0] <= '9'))
711 {
712 // assume label
713 is_label = 1;
714 }
715
716 if (is_bss) {
717 fprintf(fout, "0");
718 }
719 else if (is_label) {
720 p = words[w];
721 if (IS_START(p, "loc_") || IS_START(p, "__imp")
722 || strchr(p, '?') || strchr(p, '@')
723 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
724 cmpstringp))
725 {
726 fprintf(fout, "0");
727 snprintf(g_comment, sizeof(g_comment), "%s", p);
728 }
729 else {
730 pp = check_var(fhdr, sym, p);
731 if (pp == NULL) {
732 fprintf(fout, "%s%s",
733 (no_decorations || p[0] == '_') ? "" : "_", p);
734 }
735 else {
736 if (no_decorations)
737 fprintf(fout, "%s", pp->name);
738 else
739 output_decorated_pp(fout, pp);
740 }
741 }
742 }
743 else {
744 val = parse_number(words[w]);
745 if (val < 10)
746 fprintf(fout, "%ld", val);
747 else
748 fprintf(fout, "0x%lx", val);
749 }
750
751 first = 0;
752 }
753
754fin:
755 if (g_comment[0] != 0) {
756 fprintf(fout, "\t\t%c %s", comment_char, g_comment);
757 g_comment[0] = 0;
758 }
759 fprintf(fout, "\n");
760 }
761 }
762
763 fprintf(fout, "\n");
764
765 // dump public syms
766 for (i = 0; i < pub_sym_cnt; i++)
767 fprintf(fout, ".global %s%s\n",
768 no_decorations ? "" : "_", pub_syms[i]);
769
770 fclose(fout);
771 fclose(fasm);
772 fclose(fhdr);
773
774 return 0;
775}
776
777// vim:ts=2:shiftwidth=2:expandtab