054f95b2 |
1 | #define _GNU_SOURCE |
2 | #include <stdio.h> |
3 | #include <stdlib.h> |
4 | #include <string.h> |
5 | |
6 | #include "my_assert.h" |
7 | #include "my_str.h" |
8 | |
9 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) |
10 | #define IS(w, y) !strcmp(w, y) |
11 | #define IS_START(w, y) !strncmp(w, y, strlen(y)) |
12 | |
13 | #include "protoparse.h" |
14 | |
15 | static const char *asmfn; |
16 | static int asmln; |
054f95b2 |
17 | |
b545ba7c |
18 | static const struct parsed_proto *g_func_sym_pp; |
19 | static char g_comment[256]; |
20 | static int g_warn_cnt; |
21 | |
36595fd2 |
22 | // note: must be in ascending order |
054f95b2 |
23 | enum dx_type { |
24 | DXT_UNSPEC, |
25 | DXT_BYTE, |
26 | DXT_WORD, |
27 | DXT_DWORD, |
28 | DXT_QUAD, |
29 | DXT_TEN, |
30 | }; |
31 | |
36595fd2 |
32 | #define anote(fmt, ...) \ |
33 | printf("%s:%d: note: " fmt, asmfn, asmln, ##__VA_ARGS__) |
b545ba7c |
34 | #define awarn(fmt, ...) do { \ |
35 | printf("%s:%d: warning: " fmt, asmfn, asmln, ##__VA_ARGS__); \ |
36 | if (++g_warn_cnt == 10) { \ |
37 | fcloseall(); \ |
38 | exit(1); \ |
39 | } \ |
40 | } while (0) |
054f95b2 |
41 | #define aerr(fmt, ...) do { \ |
42 | printf("%s:%d: error: " fmt, asmfn, asmln, ##__VA_ARGS__); \ |
43 | fcloseall(); \ |
44 | exit(1); \ |
45 | } while (0) |
46 | |
47 | #include "masm_tools.h" |
48 | |
49 | static char *next_word_s(char *w, size_t wsize, char *s) |
50 | { |
51 | int quote = 0; |
52 | size_t i; |
53 | |
54 | s = sskip(s); |
55 | |
56 | for (i = 0; i < wsize - 1; i++) { |
57 | if (s[i] == '\'') |
58 | quote ^= 1; |
59 | if (s[i] == 0 || (!quote && (my_isblank(s[i]) || s[i] == ','))) |
60 | break; |
61 | w[i] = s[i]; |
62 | } |
63 | w[i] = 0; |
64 | |
65 | if (s[i] != 0 && !my_isblank(s[i]) && s[i] != ',') |
66 | printf("warning: '%s' truncated\n", w); |
67 | |
68 | return s + i; |
69 | } |
70 | |
71 | static void next_section(FILE *fasm, char *name) |
72 | { |
73 | char words[2][256]; |
74 | char line[256]; |
75 | int wordc; |
76 | char *p; |
77 | |
78 | name[0] = 0; |
79 | |
80 | while (fgets(line, sizeof(line), fasm)) |
81 | { |
82 | wordc = 0; |
83 | asmln++; |
84 | |
85 | p = sskip(line); |
86 | if (*p == 0) |
87 | continue; |
88 | |
89 | if (*p == ';') { |
90 | while (strlen(line) == sizeof(line) - 1) { |
91 | // one of those long comment lines.. |
92 | if (!fgets(line, sizeof(line), fasm)) |
93 | break; |
94 | } |
95 | continue; |
96 | } |
97 | |
98 | for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) { |
99 | p = sskip(next_word(words[wordc], sizeof(words[0]), p)); |
100 | if (*p == 0 || *p == ';') { |
101 | wordc++; |
102 | break; |
103 | } |
104 | } |
105 | |
106 | if (wordc < 2) |
107 | continue; |
108 | |
109 | if (!IS(words[1], "segment")) |
110 | continue; |
111 | |
112 | strcpy(name, words[0]); |
113 | break; |
114 | } |
115 | } |
116 | |
117 | static enum dx_type parse_dx_directive(const char *name) |
118 | { |
119 | if (IS(name, "dd")) |
120 | return DXT_DWORD; |
121 | if (IS(name, "dw")) |
122 | return DXT_WORD; |
123 | if (IS(name, "db")) |
124 | return DXT_BYTE; |
125 | if (IS(name, "dq")) |
126 | return DXT_QUAD; |
127 | if (IS(name, "dt")) |
128 | return DXT_TEN; |
129 | |
130 | return DXT_UNSPEC; |
131 | } |
132 | |
133 | static const char *type_name(enum dx_type type) |
134 | { |
135 | switch (type) { |
136 | case DXT_BYTE: |
137 | return ".byte"; |
138 | case DXT_WORD: |
139 | return ".word"; |
140 | case DXT_DWORD: |
141 | return ".long"; |
142 | case DXT_QUAD: |
143 | return ".quad"; |
144 | case DXT_TEN: |
145 | return ".tfloat"; |
146 | case DXT_UNSPEC: |
147 | break; |
148 | } |
149 | return "<bad>"; |
150 | } |
151 | |
36595fd2 |
152 | static const char *type_name_float(enum dx_type type) |
153 | { |
154 | switch (type) { |
155 | case DXT_DWORD: |
156 | return ".float"; |
157 | case DXT_QUAD: |
158 | return ".double"; |
159 | case DXT_TEN: |
160 | return ".tfloat"; |
161 | default: |
162 | break; |
163 | } |
164 | return "<bad_float>"; |
165 | } |
166 | |
054f95b2 |
167 | static int type_size(enum dx_type type) |
168 | { |
169 | switch (type) { |
170 | case DXT_BYTE: |
171 | return 1; |
172 | case DXT_WORD: |
173 | return 2; |
174 | case DXT_DWORD: |
175 | return 4; |
176 | case DXT_QUAD: |
177 | return 8; |
178 | case DXT_TEN: |
179 | return 10; |
180 | case DXT_UNSPEC: |
181 | break; |
182 | } |
183 | return -1; |
184 | } |
185 | |
186 | static char *escape_string(char *s) |
187 | { |
188 | char buf[256]; |
189 | char *t = buf; |
190 | |
191 | for (; *s != 0; s++) { |
192 | if (*s == '"') { |
193 | strcpy(t, "\\22"); |
194 | t += strlen(t); |
195 | continue; |
196 | } |
197 | if (*s == '\\') { |
198 | strcpy(t, "\\\\"); |
199 | t += strlen(t); |
200 | continue; |
201 | } |
202 | *t++ = *s; |
203 | } |
204 | *t = *s; |
205 | return strcpy(s, buf); |
206 | } |
207 | |
b545ba7c |
208 | static void sprint_pp_short(const struct parsed_proto *pp, char *buf, |
209 | size_t buf_size) |
210 | { |
211 | char *p = buf; |
212 | size_t l; |
213 | int i; |
214 | |
215 | if (pp->ret_type.is_ptr) |
216 | *p++ = 'p'; |
217 | else if (IS(pp->ret_type.name, "void")) |
218 | *p++ = 'v'; |
219 | else |
220 | *p++ = 'i'; |
221 | *p++ = '('; |
222 | l = 2; |
223 | |
224 | for (i = 0; i < pp->argc; i++) { |
225 | if (pp->arg[i].reg != NULL) |
226 | snprintf(buf + l, buf_size - l, "%s%s", |
227 | i == 0 ? "" : ",", pp->arg[i].reg); |
228 | else |
229 | snprintf(buf + l, buf_size - l, "%sa%d", |
230 | i == 0 ? "" : ",", i + 1); |
231 | l = strlen(buf); |
232 | } |
233 | snprintf(buf + l, buf_size - l, ")"); |
234 | } |
235 | |
c0050df6 |
236 | static const struct parsed_proto *check_var(FILE *fhdr, |
237 | const char *sym, const char *varname) |
b545ba7c |
238 | { |
239 | const struct parsed_proto *pp, *pp_sym; |
240 | char fp_sym[256], fp_var[256]; |
241 | int i, bad = 0; |
242 | |
243 | pp = proto_parse(fhdr, varname, 1); |
36595fd2 |
244 | if (pp == NULL) { |
b545ba7c |
245 | if (IS_START(varname, "sub_")) |
246 | awarn("sub_ sym missing proto: '%s'\n", varname); |
c0050df6 |
247 | return NULL; |
36595fd2 |
248 | } |
249 | |
250 | if (!pp->is_func && !pp->is_fptr) |
c0050df6 |
251 | return NULL; |
b545ba7c |
252 | |
b74c31e3 |
253 | pp_print(fp_var, sizeof(fp_var), pp); |
b545ba7c |
254 | |
36595fd2 |
255 | if (pp->argc_reg == 0) |
b545ba7c |
256 | goto check_sym; |
36595fd2 |
257 | if (pp->argc_reg == 1 && pp->argc_stack == 0 |
258 | && IS(pp->arg[0].reg, "ecx")) |
259 | { |
b545ba7c |
260 | goto check_sym; |
36595fd2 |
261 | } |
b545ba7c |
262 | if (pp->argc_reg != 2 |
263 | || !IS(pp->arg[0].reg, "ecx") |
264 | || !IS(pp->arg[1].reg, "edx")) |
36595fd2 |
265 | { |
b545ba7c |
266 | awarn("unhandled reg call: %s\n", fp_var); |
36595fd2 |
267 | } |
36595fd2 |
268 | |
b545ba7c |
269 | check_sym: |
270 | sprint_pp_short(pp, g_comment, sizeof(g_comment)); |
271 | |
272 | if (sym != NULL) { |
273 | g_func_sym_pp = NULL; |
274 | pp_sym = proto_parse(fhdr, sym, 1); |
275 | if (pp_sym == NULL) |
c0050df6 |
276 | return pp; |
b545ba7c |
277 | if (!pp_sym->is_fptr) |
278 | aerr("func ptr data, but label '%s' !is_fptr\n", pp_sym->name); |
279 | g_func_sym_pp = pp_sym; |
36595fd2 |
280 | } |
b545ba7c |
281 | else { |
282 | pp_sym = g_func_sym_pp; |
283 | if (pp_sym == NULL) |
c0050df6 |
284 | return pp; |
b545ba7c |
285 | } |
286 | |
287 | if (pp->argc != pp_sym->argc || pp->argc_reg != pp_sym->argc_reg) |
288 | bad = 1; |
289 | else { |
290 | for (i = 0; i < pp->argc; i++) { |
291 | if ((pp->arg[i].reg != NULL) != (pp_sym->arg[i].reg != NULL)) { |
292 | bad = 1; |
293 | break; |
294 | } |
295 | if ((pp->arg[i].reg != NULL) |
296 | && !IS(pp->arg[i].reg, pp_sym->arg[i].reg)) |
297 | { |
298 | bad = 1; |
299 | break; |
300 | } |
301 | } |
302 | } |
303 | |
304 | if (bad) { |
b74c31e3 |
305 | pp_print(fp_sym, sizeof(fp_sym), pp_sym); |
b545ba7c |
306 | anote("var: %s\n", fp_var); |
307 | anote("sym: %s\n", fp_sym); |
308 | awarn("^ mismatch\n"); |
36595fd2 |
309 | } |
c0050df6 |
310 | |
311 | return pp; |
36595fd2 |
312 | } |
313 | |
314 | static int cmpstringp(const void *p1, const void *p2) |
315 | { |
316 | return strcmp(*(char * const *)p1, *(char * const *)p2); |
317 | } |
318 | |
054f95b2 |
319 | int main(int argc, char *argv[]) |
320 | { |
36595fd2 |
321 | FILE *fout, *fasm, *fhdr, *frlist; |
b545ba7c |
322 | const struct parsed_proto *pp; |
054f95b2 |
323 | char words[20][256]; |
054f95b2 |
324 | char word[256]; |
325 | char line[256]; |
054f95b2 |
326 | unsigned long val; |
327 | unsigned long cnt; |
328 | const char *sym; |
329 | enum dx_type type; |
36595fd2 |
330 | char **pub_syms; |
331 | int pub_sym_cnt = 0; |
332 | int pub_sym_alloc; |
333 | char **rlist; |
334 | int rlist_cnt = 0; |
335 | int rlist_alloc; |
054f95b2 |
336 | int is_label; |
36595fd2 |
337 | int is_bss; |
054f95b2 |
338 | int wordc; |
339 | int first; |
340 | int arg_out; |
341 | int arg = 1; |
342 | int len; |
36595fd2 |
343 | int w, i; |
054f95b2 |
344 | char *p; |
345 | char *p2; |
346 | |
36595fd2 |
347 | if (argc < 4) { |
348 | printf("usage:\n%s <.s> <.asm> <hdrf> [rlist]*\n", |
054f95b2 |
349 | argv[0]); |
350 | return 1; |
351 | } |
352 | |
353 | arg_out = arg++; |
354 | |
355 | asmfn = argv[arg++]; |
356 | fasm = fopen(asmfn, "r"); |
357 | my_assert_not(fasm, NULL); |
358 | |
359 | hdrfn = argv[arg++]; |
36595fd2 |
360 | fhdr = fopen(hdrfn, "r"); |
361 | my_assert_not(fhdr, NULL); |
054f95b2 |
362 | |
363 | fout = fopen(argv[arg_out], "w"); |
364 | my_assert_not(fout, NULL); |
365 | |
36595fd2 |
366 | pub_sym_alloc = 64; |
367 | pub_syms = malloc(pub_sym_alloc * sizeof(pub_syms[0])); |
368 | my_assert_not(pub_syms, NULL); |
369 | |
370 | rlist_alloc = 64; |
371 | rlist = malloc(rlist_alloc * sizeof(rlist[0])); |
372 | my_assert_not(rlist, NULL); |
373 | |
374 | for (; arg < argc; arg++) { |
375 | frlist = fopen(argv[arg], "r"); |
376 | my_assert_not(frlist, NULL); |
377 | |
378 | while (fgets(line, sizeof(line), frlist)) { |
379 | p = sskip(line); |
380 | if (*p == 0 || *p == ';') |
381 | continue; |
382 | |
383 | p = next_word(words[0], sizeof(words[0]), p); |
384 | if (words[0][0] == 0) |
385 | continue; |
386 | |
387 | if (rlist_cnt >= rlist_alloc) { |
388 | rlist_alloc = rlist_alloc * 2 + 64; |
389 | rlist = realloc(rlist, rlist_alloc * sizeof(rlist[0])); |
390 | my_assert_not(rlist, NULL); |
391 | } |
392 | rlist[rlist_cnt++] = strdup(words[0]); |
393 | } |
394 | |
395 | fclose(frlist); |
396 | frlist = NULL; |
397 | } |
398 | |
399 | if (rlist_cnt > 0) |
400 | qsort(rlist, rlist_cnt, sizeof(rlist[0]), cmpstringp); |
401 | |
402 | while (1) { |
054f95b2 |
403 | next_section(fasm, line); |
36595fd2 |
404 | if (feof(fasm)) |
405 | break; |
054f95b2 |
406 | if (IS(line + 1, "text")) |
407 | continue; |
408 | |
409 | if (IS(line + 1, "rdata")) |
410 | fprintf(fout, "\n.section .rodata\n"); |
411 | else if (IS(line + 1, "data")) |
412 | fprintf(fout, "\n.data\n"); |
413 | else |
414 | aerr("unhandled section: '%s'\n", line); |
415 | |
416 | fprintf(fout, ".align 4\n"); |
417 | |
418 | while (fgets(line, sizeof(line), fasm)) |
419 | { |
420 | sym = NULL; |
421 | asmln++; |
422 | |
423 | p = sskip(line); |
424 | if (*p == 0 || *p == ';') |
425 | continue; |
426 | |
427 | for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) { |
054f95b2 |
428 | p = sskip(next_word_s(words[wordc], sizeof(words[0]), p)); |
429 | if (*p == 0 || *p == ';') { |
430 | wordc++; |
431 | break; |
432 | } |
433 | if (*p == ',') { |
054f95b2 |
434 | p = sskip(p + 1); |
435 | } |
436 | } |
437 | |
b545ba7c |
438 | if (*p == ';') { |
439 | p = sskip(p + 1); |
440 | if (IS_START(p, "sctclrtype")) |
441 | g_func_sym_pp = NULL; |
442 | } |
443 | |
054f95b2 |
444 | if (wordc == 2 && IS(words[1], "ends")) |
445 | break; |
36595fd2 |
446 | if (wordc <= 2 && IS(words[0], "end")) |
447 | break; |
054f95b2 |
448 | if (wordc < 2) |
449 | aerr("unhandled: '%s'\n", words[0]); |
450 | |
451 | // don't cares |
452 | if (IS(words[0], "assume")) |
453 | continue; |
454 | |
455 | if (IS(words[0], "align")) { |
456 | val = parse_number(words[1]); |
457 | fprintf(fout, "\t\t .align %ld", val); |
458 | goto fin; |
459 | } |
460 | |
461 | w = 1; |
462 | type = parse_dx_directive(words[0]); |
463 | if (type == DXT_UNSPEC) { |
464 | type = parse_dx_directive(words[1]); |
465 | sym = words[0]; |
466 | w = 2; |
467 | } |
468 | if (type == DXT_UNSPEC) |
469 | aerr("unhandled decl: '%s %s'\n", words[0], words[1]); |
470 | |
471 | if (sym != NULL) { |
36595fd2 |
472 | // public/global name |
473 | if (pub_sym_cnt >= pub_sym_alloc) { |
474 | pub_sym_alloc *= 2; |
475 | pub_syms = realloc(pub_syms, pub_sym_alloc * sizeof(pub_syms[0])); |
476 | my_assert_not(pub_syms, NULL); |
477 | } |
478 | pub_syms[pub_sym_cnt++] = strdup(sym); |
479 | |
b545ba7c |
480 | pp = proto_parse(fhdr, sym, 1); |
481 | if (pp != NULL) |
482 | g_func_sym_pp = NULL; |
483 | |
054f95b2 |
484 | len = strlen(sym); |
485 | fprintf(fout, "_%s:", sym); |
486 | |
487 | len += 2; |
488 | if (len < 8) |
489 | fprintf(fout, "\t"); |
490 | if (len < 16) |
491 | fprintf(fout, "\t"); |
492 | if (len <= 16) |
493 | fprintf(fout, " "); |
494 | else |
495 | fprintf(fout, " "); |
496 | } |
497 | else { |
498 | fprintf(fout, "\t\t "); |
499 | } |
500 | |
501 | if (type == DXT_BYTE && words[w][0] == '\'') { |
502 | // string; use asciz for most common case |
503 | if (w == wordc - 2 && IS(words[w + 1], "0")) { |
504 | fprintf(fout, ".asciz \""); |
505 | wordc--; |
506 | } |
507 | else |
508 | fprintf(fout, ".ascii \""); |
509 | |
510 | for (; w < wordc; w++) { |
511 | if (words[w][0] == '\'') { |
512 | p = words[w] + 1; |
513 | p2 = strchr(p, '\''); |
514 | if (p2 == NULL) |
515 | aerr("unterminated string? '%s'\n", p); |
516 | memcpy(word, p, p2 - p); |
517 | word[p2 - p] = 0; |
518 | fprintf(fout, "%s", escape_string(word)); |
519 | } |
520 | else { |
521 | val = parse_number(words[w]); |
522 | if (val & ~0xff) |
523 | aerr("bad string trailing byte?\n"); |
524 | fprintf(fout, "\\x%02lx", val); |
525 | } |
526 | } |
527 | fprintf(fout, "\""); |
528 | goto fin; |
529 | } |
530 | |
531 | if (w == wordc - 2) { |
532 | if (IS_START(words[w + 1], "dup(")) { |
533 | cnt = parse_number(words[w]); |
534 | p = words[w + 1] + 4; |
535 | p2 = strchr(p, ')'); |
536 | if (p2 == NULL) |
537 | aerr("bad dup?\n"); |
538 | memmove(word, p, p2 - p); |
539 | word[p2 - p] = 0; |
36595fd2 |
540 | |
541 | val = 0; |
542 | if (!IS(word, "?")) |
543 | val = parse_number(word); |
054f95b2 |
544 | |
545 | fprintf(fout, ".fill 0x%02lx,%d,0x%02lx", |
546 | cnt, type_size(type), val); |
547 | goto fin; |
548 | } |
549 | } |
550 | |
551 | if (type == DXT_DWORD && words[w][0] == '\'' |
552 | && words[w][5] == '\'' && strlen(words[w]) == 6) |
553 | { |
554 | if (w != wordc - 1) |
555 | aerr("TODO\n"); |
556 | |
557 | p = words[w]; |
558 | val = (p[1] << 24) | (p[2] << 16) | (p[3] << 8) | p[4]; |
559 | fprintf(fout, ".long 0x%lx", val); |
b545ba7c |
560 | snprintf(g_comment, sizeof(g_comment), "%s", words[w]); |
054f95b2 |
561 | goto fin; |
562 | } |
563 | |
36595fd2 |
564 | if (type >= DXT_DWORD && strchr(words[w], '.')) |
054f95b2 |
565 | { |
566 | if (w != wordc - 1) |
567 | aerr("TODO\n"); |
568 | |
36595fd2 |
569 | fprintf(fout, "%s %s", type_name_float(type), words[w]); |
054f95b2 |
570 | goto fin; |
571 | } |
572 | |
573 | first = 1; |
574 | fprintf(fout, "%s ", type_name(type)); |
575 | for (; w < wordc; w++) |
576 | { |
577 | if (!first) |
578 | fprintf(fout, ", "); |
579 | |
36595fd2 |
580 | is_label = is_bss = 0; |
581 | if (w <= wordc - 2 && IS(words[w], "offset")) { |
054f95b2 |
582 | is_label = 1; |
583 | w++; |
584 | } |
36595fd2 |
585 | else if (IS(words[w], "?")) { |
586 | is_bss = 1; |
587 | } |
054f95b2 |
588 | else if (type == DXT_DWORD |
589 | && !('0' <= words[w][0] && words[w][0] <= '9')) |
590 | { |
591 | // assume label |
592 | is_label = 1; |
593 | } |
594 | |
36595fd2 |
595 | if (is_bss) { |
596 | fprintf(fout, "0"); |
597 | } |
598 | else if (is_label) { |
054f95b2 |
599 | p = words[w]; |
ddaf8bd7 |
600 | if (IS_START(p, "loc_") || IS_START(p, "__imp") |
601 | || strchr(p, '?') || strchr(p, '@') |
36595fd2 |
602 | || bsearch(&p, rlist, rlist_cnt, sizeof(rlist[0]), |
603 | cmpstringp)) |
054f95b2 |
604 | { |
605 | fprintf(fout, "0"); |
b545ba7c |
606 | snprintf(g_comment, sizeof(g_comment), "%s", p); |
36595fd2 |
607 | } |
608 | else { |
c0050df6 |
609 | pp = check_var(fhdr, sym, p); |
ddaf8bd7 |
610 | if (p[0] != '_') |
c0050df6 |
611 | fprintf(fout, (pp && pp->is_fastcall) ? "@" : "_"); |
ddaf8bd7 |
612 | fprintf(fout, "%s", p); |
c0050df6 |
613 | if (pp && pp->is_stdcall && pp->argc > 0) |
614 | fprintf(fout, "@%d", pp->argc * 4); |
054f95b2 |
615 | } |
054f95b2 |
616 | } |
617 | else { |
618 | val = parse_number(words[w]); |
619 | if (val < 10) |
620 | fprintf(fout, "%ld", val); |
621 | else |
622 | fprintf(fout, "0x%lx", val); |
623 | } |
624 | |
625 | first = 0; |
626 | } |
627 | |
628 | fin: |
b545ba7c |
629 | if (g_comment[0] != 0) { |
630 | fprintf(fout, "\t\t# %s", g_comment); |
631 | g_comment[0] = 0; |
054f95b2 |
632 | } |
633 | fprintf(fout, "\n"); |
054f95b2 |
634 | } |
635 | } |
636 | |
36595fd2 |
637 | fprintf(fout, "\n"); |
638 | |
639 | // dump public syms |
640 | for (i = 0; i < pub_sym_cnt; i++) |
641 | fprintf(fout, ".global _%s\n", pub_syms[i]); |
642 | |
054f95b2 |
643 | fclose(fout); |
644 | fclose(fasm); |
36595fd2 |
645 | fclose(fhdr); |
054f95b2 |
646 | |
647 | return 0; |
648 | } |
649 | |
650 | // vim:ts=2:shiftwidth=2:expandtab |