more work on cvt_data, works mostly
[ia32rtools.git] / tools / cvt_data.c
CommitLineData
054f95b2 1#define _GNU_SOURCE
2#include <stdio.h>
3#include <stdlib.h>
4#include <string.h>
5
6#include "my_assert.h"
7#include "my_str.h"
8
9#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0]))
10#define IS(w, y) !strcmp(w, y)
11#define IS_START(w, y) !strncmp(w, y, strlen(y))
12
13#include "protoparse.h"
14
15static const char *asmfn;
16static int asmln;
054f95b2 17
36595fd2 18// note: must be in ascending order
054f95b2 19enum dx_type {
20 DXT_UNSPEC,
21 DXT_BYTE,
22 DXT_WORD,
23 DXT_DWORD,
24 DXT_QUAD,
25 DXT_TEN,
26};
27
36595fd2 28#define anote(fmt, ...) \
29 printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__)
054f95b2 30#define aerr(fmt, ...) do { \
31 printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \
32 fcloseall(); \
33 exit(1); \
34} while (0)
35
36#include "masm_tools.h"
37
38static char *next_word_s(char *w, size_t wsize, char *s)
39{
40 int quote = 0;
41 size_t i;
42
43 s = sskip(s);
44
45 for (i = 0; i < wsize - 1; i++) {
46 if (s[i] == '\'')
47 quote ^= 1;
48 if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ',')))
49 break;
50 w[i] = s[i];
51 }
52 w[i] = 0;
53
54 if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',')
55 printf("warning: '%s' truncated\n", w);
56
57 return s + i;
58}
59
60static void next_section(FILE *fasm, char *name)
61{
62 char words[2][256];
63 char line[256];
64 int wordc;
65 char *p;
66
67 name[0] = 0;
68
69 while (fgets(line, sizeof(line), fasm))
70 {
71 wordc = 0;
72 asmln++;
73
74 p = sskip(line);
75 if (*p == 0)
76 continue;
77
78 if (*p == ';') {
79 while (strlen(line) == sizeof(line) - 1) {
80 // one of those long comment lines..
81 if (!fgets(line, sizeof(line), fasm))
82 break;
83 }
84 continue;
85 }
86
87 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
88 p = sskip(next_word(words[wordc], sizeof(words[0]), p));
89 if (*p == 0 || *p == ';') {
90 wordc++;
91 break;
92 }
93 }
94
95 if (wordc < 2)
96 continue;
97
98 if (!IS(words[1], "segment"))
99 continue;
100
101 strcpy(name, words[0]);
102 break;
103 }
104}
105
106static enum dx_type parse_dx_directive(const char *name)
107{
108 if (IS(name, "dd"))
109 return DXT_DWORD;
110 if (IS(name, "dw"))
111 return DXT_WORD;
112 if (IS(name, "db"))
113 return DXT_BYTE;
114 if (IS(name, "dq"))
115 return DXT_QUAD;
116 if (IS(name, "dt"))
117 return DXT_TEN;
118
119 return DXT_UNSPEC;
120}
121
122static const char *type_name(enum dx_type type)
123{
124 switch (type) {
125 case DXT_BYTE:
126 return ".byte";
127 case DXT_WORD:
128 return ".word";
129 case DXT_DWORD:
130 return ".long";
131 case DXT_QUAD:
132 return ".quad";
133 case DXT_TEN:
134 return ".tfloat";
135 case DXT_UNSPEC:
136 break;
137 }
138 return "<bad>";
139}
140
36595fd2 141static const char *type_name_float(enum dx_type type)
142{
143 switch (type) {
144 case DXT_DWORD:
145 return ".float";
146 case DXT_QUAD:
147 return ".double";
148 case DXT_TEN:
149 return ".tfloat";
150 default:
151 break;
152 }
153 return "<bad_float>";
154}
155
054f95b2 156static int type_size(enum dx_type type)
157{
158 switch (type) {
159 case DXT_BYTE:
160 return 1;
161 case DXT_WORD:
162 return 2;
163 case DXT_DWORD:
164 return 4;
165 case DXT_QUAD:
166 return 8;
167 case DXT_TEN:
168 return 10;
169 case DXT_UNSPEC:
170 break;
171 }
172 return -1;
173}
174
175static char *escape_string(char *s)
176{
177 char buf[256];
178 char *t = buf;
179
180 for (; *s != 0; s++) {
181 if (*s == '"') {
182 strcpy(t, "\\22");
183 t += strlen(t);
184 continue;
185 }
186 if (*s == '\\') {
187 strcpy(t, "\\\\");
188 t += strlen(t);
189 continue;
190 }
191 *t++ = *s;
192 }
193 *t = *s;
194 return strcpy(s, buf);
195}
196
36595fd2 197static void check_sym(FILE *fhdr, const char *name)
198{
199 const struct parsed_proto *pp;
200 char buf[256];
201 int i, l;
202
203 pp = proto_parse(fhdr, name, 1);
204 if (pp == NULL) {
205 if (IS_START(name, "sub_"))
206 aerr("sub_ sym missing proto: '%s'\n", name);
207 return;
208 }
209
210 if (!pp->is_func && !pp->is_fptr)
211 return;
212 if (pp->argc_reg == 0)
213 return;
214 if (pp->argc_reg == 1 && pp->argc_stack == 0
215 && IS(pp->arg[0].reg, "ecx"))
216 {
217 return;
218 }
219 if (pp->argc_reg == 2
220 && IS(pp->arg[0].reg, "ecx")
221 && IS(pp->arg[1].reg, "edx"))
222 {
223 return;
224 }
225 snprintf(buf, sizeof(buf), "%s %s(",
226 pp->ret_type.name, name);
227 l = strlen(buf);
228
229 for (i = 0; i < pp->argc_reg; i++) {
230 snprintf(buf + l, sizeof(buf) - l, "%s%s",
231 i == 0 ? "" : ", ", pp->arg[i].reg);
232 l = strlen(buf);
233 }
234 if (pp->argc_stack > 0) {
235 snprintf(buf + l, sizeof(buf) - l, ", {%d stack}", pp->argc_stack);
236 l = strlen(buf);
237 }
238 snprintf(buf + l, sizeof(buf) - l, ")");
239
240 aerr("unhandled reg call: %s\n", buf);
241}
242
243static int cmpstringp(const void *p1, const void *p2)
244{
245 return strcmp(*(char * const *)p1, *(char * const *)p2);
246}
247
054f95b2 248int main(int argc, char *argv[])
249{
36595fd2 250 FILE *fout, *fasm, *fhdr, *frlist;
054f95b2 251 char words[20][256];
054f95b2 252 char word[256];
253 char line[256];
254 char comment[256];
255 unsigned long val;
256 unsigned long cnt;
257 const char *sym;
258 enum dx_type type;
36595fd2 259 char **pub_syms;
260 int pub_sym_cnt = 0;
261 int pub_sym_alloc;
262 char **rlist;
263 int rlist_cnt = 0;
264 int rlist_alloc;
054f95b2 265 int is_label;
36595fd2 266 int is_bss;
054f95b2 267 int wordc;
268 int first;
269 int arg_out;
270 int arg = 1;
271 int len;
36595fd2 272 int w, i;
054f95b2 273 char *p;
274 char *p2;
275
36595fd2 276 if (argc < 4) {
277 printf("usage:\n%s <.s> <.asm> <hdrf> [rlist]*\n",
054f95b2 278 argv[0]);
279 return 1;
280 }
281
282 arg_out = arg++;
283
284 asmfn = argv[arg++];
285 fasm = fopen(asmfn, "r");
286 my_assert_not(fasm, NULL);
287
288 hdrfn = argv[arg++];
36595fd2 289 fhdr = fopen(hdrfn, "r");
290 my_assert_not(fhdr, NULL);
054f95b2 291
292 fout = fopen(argv[arg_out], "w");
293 my_assert_not(fout, NULL);
294
295 comment[0] = 0;
296
36595fd2 297 pub_sym_alloc = 64;
298 pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0]));
299 my_assert_not(pub_syms, NULL);
300
301 rlist_alloc = 64;
302 rlist = malloc(rlist_alloc * sizeof(rlist[0]));
303 my_assert_not(rlist, NULL);
304
305 for (; arg < argc; arg++) {
306 frlist = fopen(argv[arg], "r");
307 my_assert_not(frlist, NULL);
308
309 while (fgets(line, sizeof(line), frlist)) {
310 p = sskip(line);
311 if (*p == 0 || *p == ';')
312 continue;
313
314 p = next_word(words[0], sizeof(words[0]), p);
315 if (words[0][0] == 0)
316 continue;
317
318 if (rlist_cnt >= rlist_alloc) {
319 rlist_alloc = rlist_alloc * 2 + 64;
320 rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0]));
321 my_assert_not(rlist, NULL);
322 }
323 rlist[rlist_cnt++] = strdup(words[0]);
324 }
325
326 fclose(frlist);
327 frlist = NULL;
328 }
329
330 if (rlist_cnt > 0)
331 qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp);
332
333 while (1) {
054f95b2 334 next_section(fasm, line);
36595fd2 335 if (feof(fasm))
336 break;
054f95b2 337 if (IS(line + 1, "text"))
338 continue;
339
340 if (IS(line + 1, "rdata"))
341 fprintf(fout, "\n.section .rodata\n");
342 else if (IS(line + 1, "data"))
343 fprintf(fout, "\n.data\n");
344 else
345 aerr("unhandled section: '%s'\n", line);
346
347 fprintf(fout, ".align 4\n");
348
349 while (fgets(line, sizeof(line), fasm))
350 {
351 sym = NULL;
352 asmln++;
353
354 p = sskip(line);
355 if (*p == 0 || *p == ';')
356 continue;
357
358 for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) {
054f95b2 359 p = sskip(next_word_s(words[wordc], sizeof(words[0]), p));
360 if (*p == 0 || *p == ';') {
361 wordc++;
362 break;
363 }
364 if (*p == ',') {
054f95b2 365 p = sskip(p + 1);
366 }
367 }
368
369 if (wordc == 2 && IS(words[1], "ends"))
370 break;
36595fd2 371 if (wordc <= 2 && IS(words[0], "end"))
372 break;
054f95b2 373 if (wordc < 2)
374 aerr("unhandled: '%s'\n", words[0]);
375
376 // don't cares
377 if (IS(words[0], "assume"))
378 continue;
379
380 if (IS(words[0], "align")) {
381 val = parse_number(words[1]);
382 fprintf(fout, "\t\t .align %ld", val);
383 goto fin;
384 }
385
386 w = 1;
387 type = parse_dx_directive(words[0]);
388 if (type == DXT_UNSPEC) {
389 type = parse_dx_directive(words[1]);
390 sym = words[0];
391 w = 2;
392 }
393 if (type == DXT_UNSPEC)
394 aerr("unhandled decl: '%s %s'\n", words[0], words[1]);
395
396 if (sym != NULL) {
36595fd2 397 // public/global name
398 if (pub_sym_cnt >= pub_sym_alloc) {
399 pub_sym_alloc *= 2;
400 pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0]));
401 my_assert_not(pub_syms, NULL);
402 }
403 pub_syms[pub_sym_cnt++] = strdup(sym);
404
054f95b2 405 len = strlen(sym);
406 fprintf(fout, "_%s:", sym);
407
408 len += 2;
409 if (len < 8)
410 fprintf(fout, "\t");
411 if (len < 16)
412 fprintf(fout, "\t");
413 if (len <= 16)
414 fprintf(fout, " ");
415 else
416 fprintf(fout, " ");
417 }
418 else {
419 fprintf(fout, "\t\t ");
420 }
421
422 if (type == DXT_BYTE && words[w][0] == '\'') {
423 // string; use asciz for most common case
424 if (w == wordc - 2 && IS(words[w + 1], "0")) {
425 fprintf(fout, ".asciz \"");
426 wordc--;
427 }
428 else
429 fprintf(fout, ".ascii \"");
430
431 for (; w < wordc; w++) {
432 if (words[w][0] == '\'') {
433 p = words[w] + 1;
434 p2 = strchr(p, '\'');
435 if (p2 == NULL)
436 aerr("unterminated string? '%s'\n", p);
437 memcpy(word, p, p2 - p);
438 word[p2 - p] = 0;
439 fprintf(fout, "%s", escape_string(word));
440 }
441 else {
442 val = parse_number(words[w]);
443 if (val & ~0xff)
444 aerr("bad string trailing byte?\n");
445 fprintf(fout, "\\x%02lx", val);
446 }
447 }
448 fprintf(fout, "\"");
449 goto fin;
450 }
451
452 if (w == wordc - 2) {
453 if (IS_START(words[w + 1], "dup(")) {
454 cnt = parse_number(words[w]);
455 p = words[w + 1] + 4;
456 p2 = strchr(p, ')');
457 if (p2 == NULL)
458 aerr("bad dup?\n");
459 memmove(word, p, p2 - p);
460 word[p2 - p] = 0;
36595fd2 461
462 val = 0;
463 if (!IS(word, "?"))
464 val = parse_number(word);
054f95b2 465
466 fprintf(fout, ".fill 0x%02lx,%d,0x%02lx",
467 cnt, type_size(type), val);
468 goto fin;
469 }
470 }
471
472 if (type == DXT_DWORD && words[w][0] == '\''
473 && words[w][5] == '\'' && strlen(words[w]) == 6)
474 {
475 if (w != wordc - 1)
476 aerr("TODO\n");
477
478 p = words[w];
479 val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4];
480 fprintf(fout, ".long 0x%lx", val);
481 snprintf(comment, sizeof(comment), "%s", words[w]);
482 goto fin;
483 }
484
36595fd2 485 if (type >= DXT_DWORD && strchr(words[w], '.'))
054f95b2 486 {
487 if (w != wordc - 1)
488 aerr("TODO\n");
489
36595fd2 490 fprintf(fout, "%s %s", type_name_float(type), words[w]);
054f95b2 491 goto fin;
492 }
493
494 first = 1;
495 fprintf(fout, "%s ", type_name(type));
496 for (; w < wordc; w++)
497 {
498 if (!first)
499 fprintf(fout, ", ");
500
36595fd2 501 is_label = is_bss = 0;
502 if (w <= wordc - 2 && IS(words[w], "offset")) {
054f95b2 503 is_label = 1;
504 w++;
505 }
36595fd2 506 else if (IS(words[w], "?")) {
507 is_bss = 1;
508 }
054f95b2 509 else if (type == DXT_DWORD
510 && !('0' <= words[w][0] && words[w][0] <= '9'))
511 {
512 // assume label
513 is_label = 1;
514 }
515
36595fd2 516 if (is_bss) {
517 fprintf(fout, "0");
518 }
519 else if (is_label) {
054f95b2 520 p = words[w];
36595fd2 521 if (IS_START(p, "loc_") || strchr(p, '?') || strchr(p, '@')
522 || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]),
523 cmpstringp))
054f95b2 524 {
525 fprintf(fout, "0");
36595fd2 526 snprintf(comment, sizeof(comment), "%s", p);
527 }
528 else {
529 check_sym(fhdr, p);
530 fprintf(fout, "_%s", p);
054f95b2 531 }
054f95b2 532 }
533 else {
534 val = parse_number(words[w]);
535 if (val < 10)
536 fprintf(fout, "%ld", val);
537 else
538 fprintf(fout, "0x%lx", val);
539 }
540
541 first = 0;
542 }
543
544fin:
545 if (comment[0] != 0) {
546 fprintf(fout, "\t\t# %s", comment);
547 comment[0] = 0;
548 }
549 fprintf(fout, "\n");
550 (void)proto_parse;
551 }
552 }
553
36595fd2 554 fprintf(fout, "\n");
555
556 // dump public syms
557 for (i = 0; i < pub_sym_cnt; i++)
558 fprintf(fout, ".global _%s\n", pub_syms[i]);
559
054f95b2 560 fclose(fout);
561 fclose(fasm);
36595fd2 562 fclose(fhdr);
054f95b2 563
564 return 0;
565}
566
567// vim:ts=2:shiftwidth=2:expandtab