c36e914d |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <string.h> |
4 | |
5 | #include "my_assert.h" |
6 | #include "my_str.h" |
7 | |
8 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) |
9 | #define IS(w, y) !strcmp(w, y) |
10 | |
11 | #include "protoparse.h" |
12 | |
13 | const char *asmfn; |
14 | static int asmln; |
15 | |
16 | #define awarn(fmt, ...) \ |
17 | printf("warning:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__) |
18 | #define aerr(fmt, ...) do { \ |
19 | printf("error:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__); \ |
20 | exit(1); \ |
21 | } while (0) |
22 | |
23 | enum op_class { |
24 | OPC_UNSPEC, |
91977a1c |
25 | OPC_RMD, /* removed or optimized out */ |
26 | OPC_DATA, /* data processing */ |
27 | OPC_DATA_FLAGS, /* data processing + sets flags */ |
28 | OPC_JMP, /* .. and call */ |
29 | OPC_JCC, /* conditional jump */ |
30 | OPC_SCC, /* conditionel set */ |
c36e914d |
31 | }; |
32 | |
33 | enum op_op { |
34 | OP_INVAL, |
35 | OP_PUSH, |
36 | OP_POP, |
37 | OP_MOV, |
38 | OP_RET, |
39 | OP_ADD, |
91977a1c |
40 | OP_SUB, |
c36e914d |
41 | OP_TEST, |
42 | OP_CMP, |
43 | OP_CALL, |
44 | OP_JMP, |
45 | OP_JO, |
46 | OP_JNO, |
47 | OP_JC, |
48 | OP_JNC, |
49 | OP_JZ, |
50 | OP_JNZ, |
51 | OP_JBE, |
52 | OP_JA, |
53 | OP_JS, |
54 | OP_JNS, |
55 | OP_JP, |
56 | OP_JNP, |
57 | OP_JL, |
58 | OP_JGE, |
59 | OP_JLE, |
60 | OP_JG, |
61 | }; |
62 | |
63 | enum opr_type { |
64 | OPT_UNSPEC, |
65 | OPT_REG, |
66 | OPT_REGMEM, |
67 | OPT_LABEL, |
68 | OPT_CONST, |
69 | }; |
70 | |
71 | enum opr_lenmod { |
91977a1c |
72 | OPLM_UNSPEC, |
73 | OPLM_BYTE, |
74 | OPLM_WORD, |
75 | OPLM_DWORD, |
c36e914d |
76 | }; |
77 | |
78 | #define MAX_OPERANDS 2 |
79 | |
80 | struct parsed_opr { |
91977a1c |
81 | enum opr_type type; |
82 | enum opr_lenmod lmod; |
83 | int reg; |
84 | unsigned int val; |
85 | char name[256]; |
c36e914d |
86 | }; |
87 | |
88 | struct parsed_op { |
91977a1c |
89 | enum op_class cls; |
90 | enum op_op op; |
91 | struct parsed_opr operand[MAX_OPERANDS]; |
92 | int operand_cnt; |
93 | int regmask; // all referensed regs |
94 | void *datap; |
95 | }; |
96 | |
97 | struct parsed_equ { |
98 | char name[64]; |
99 | enum opr_lenmod lmod; |
100 | int offset; |
c36e914d |
101 | }; |
102 | |
103 | #define MAX_OPS 1024 |
104 | |
105 | static struct parsed_op ops[MAX_OPS]; |
91977a1c |
106 | static struct parsed_equ *g_eqs; |
107 | static int g_eqcnt; |
108 | static char g_labels[MAX_OPS][32]; |
109 | static struct parsed_proto g_func_pp; |
110 | static char g_func[256]; |
111 | static char g_comment[256]; |
112 | static int g_bp_frame; |
113 | static int g_bp_stack; |
114 | #define ferr(op_, fmt, ...) do { \ |
115 | printf("error:%s:#%ld: " fmt, g_func, (op_) - ops, ##__VA_ARGS__); \ |
116 | exit(1); \ |
117 | } while (0) |
118 | |
119 | #define MAX_REGS 8 |
120 | |
121 | const char *regs_r32[] = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp" }; |
122 | const char *regs_r16[] = { "ax", "bx", "cx", "dx", "si", "di", "bp", "sp" }; |
123 | const char *regs_r8l[] = { "al", "bl", "cl", "dl" }; |
124 | const char *regs_r8h[] = { "ah", "bh", "ch", "dh" }; |
125 | |
126 | enum x86_regs { xUNSPEC = -1, xAX, xBX, xCX, xDX, xSI, xDI, xBP, xSP }; |
127 | |
128 | static int char_array_i(const char *array[], size_t len, const char *s) |
129 | { |
130 | int i; |
c36e914d |
131 | |
91977a1c |
132 | for (i = 0; i < len; i++) |
133 | if (IS(s, array[i])) |
134 | return i; |
c36e914d |
135 | |
91977a1c |
136 | return -1; |
137 | } |
138 | |
139 | static void parse_reg(struct parsed_opr *opr, int *regmask, |
140 | char *s, int strict) |
141 | { |
142 | char w[16]; |
143 | int reg = xUNSPEC; |
144 | int c = 0; |
145 | |
146 | while (*s != 0) { |
147 | while (my_isblank(*s) || my_issep(*s)) |
148 | s++; |
149 | s = next_idt(w, sizeof(w), s); |
150 | if (w[0] == 0) |
151 | break; |
152 | c++; |
153 | reg = char_array_i(regs_r32, ARRAY_SIZE(regs_r32), w); |
154 | if (reg >= 0) { |
155 | opr->lmod = OPLM_DWORD; |
156 | *regmask |= 1 << reg; |
157 | continue; |
158 | } |
159 | reg = char_array_i(regs_r16, ARRAY_SIZE(regs_r16), w); |
160 | if (reg >= 0) { |
161 | opr->lmod = OPLM_WORD; |
162 | *regmask |= 1 << reg; |
163 | continue; |
164 | } |
165 | reg = char_array_i(regs_r8h, ARRAY_SIZE(regs_r8h), w); |
166 | if (reg >= 0) { |
167 | opr->lmod = OPLM_BYTE; |
168 | *regmask |= 1 << reg; |
169 | continue; |
170 | } |
171 | reg = char_array_i(regs_r8l, ARRAY_SIZE(regs_r8l), w); |
172 | if (reg >= 0) { |
173 | opr->lmod = OPLM_BYTE; |
174 | *regmask |= 1 << reg; |
175 | continue; |
176 | } |
177 | |
178 | if (strict) |
179 | aerr("bad reg: '%s'\n", w); |
180 | } |
181 | |
182 | if (c == 1) |
183 | opr->reg = reg; |
184 | } |
185 | |
186 | static long parse_number(const char *number) |
187 | { |
188 | int len = strlen(number); |
189 | const char *p = number; |
190 | char *endp = NULL; |
191 | int neg = 0; |
192 | int bad; |
193 | long ret; |
194 | |
195 | if (*p == '-') { |
196 | neg = 1; |
197 | p++; |
198 | } |
199 | if (len > 1 && *p == '0') |
200 | p++; |
201 | if (number[len - 1] == 'h') { |
202 | ret = strtol(p, &endp, 16); |
203 | bad = (*endp != 'h'); |
204 | } |
205 | else { |
206 | ret = strtol(p, &endp, 10); |
207 | bad = (*endp != 0); |
208 | } |
209 | if (bad) |
210 | aerr("number parsing failed\n"); |
211 | if (neg) |
212 | ret = -ret; |
213 | return ret; |
214 | } |
215 | |
216 | static int parse_operand(struct parsed_opr *opr, int *regmask, |
c36e914d |
217 | char words[16][256], int wordc, int w, enum op_class cls) |
218 | { |
c36e914d |
219 | int ret, len; |
220 | int i; |
221 | |
222 | if (w >= wordc) |
223 | aerr("parse_operand w %d, wordc %d\n", w, wordc); |
224 | |
91977a1c |
225 | opr->reg = xUNSPEC; |
226 | |
c36e914d |
227 | for (i = w; i < wordc; i++) { |
228 | len = strlen(words[i]); |
229 | if (words[i][len - 1] == ',') { |
230 | words[i][len - 1] = 0; |
231 | wordc = i + 1; |
232 | break; |
233 | } |
234 | } |
235 | |
236 | if (cls == OPC_JMP || cls == OPC_JCC) { |
237 | const char *label; |
238 | |
239 | if (wordc - w == 3 && IS(words[w + 1], "ptr")) |
240 | label = words[w + 2]; |
241 | else if (wordc - w == 2 && IS(words[w], "short")) |
242 | label = words[w + 1]; |
243 | else if (wordc - w == 1) |
244 | label = words[w]; |
245 | else |
246 | aerr("jump parse error"); |
247 | |
248 | opr->type = OPT_LABEL; |
249 | strcpy(opr->name, label); |
250 | return wordc; |
251 | } |
252 | |
253 | if (wordc - w >= 3) { |
254 | if (IS(words[w + 1], "ptr")) { |
255 | if (IS(words[w], "dword")) |
91977a1c |
256 | opr->lmod = OPLM_DWORD; |
c36e914d |
257 | else if (IS(words[w], "word")) |
91977a1c |
258 | opr->lmod = OPLM_WORD; |
c36e914d |
259 | else if (IS(words[w], "byte")) |
91977a1c |
260 | opr->lmod = OPLM_BYTE; |
c36e914d |
261 | else |
262 | aerr("type parsing failed\n"); |
263 | w += 2; |
264 | } |
265 | } |
266 | |
267 | if (wordc - w == 2 && IS(words[w], "offset")) { |
268 | opr->type = OPT_LABEL; |
269 | strcpy(opr->name, words[w + 1]); |
270 | return wordc; |
271 | } |
272 | |
273 | if (wordc - w != 1) |
274 | aerr("parse_operand 1 word expected\n"); |
275 | |
276 | len = strlen(words[w]); |
277 | |
278 | if (words[w][0] == '[') { |
279 | opr->type = OPT_REGMEM; |
91977a1c |
280 | ret = sscanf(words[w], "[%[^]]]", opr->name); |
c36e914d |
281 | if (ret != 1) |
282 | aerr("[] parse failure\n"); |
91977a1c |
283 | parse_reg(opr, regmask, opr->name, 0); |
c36e914d |
284 | return wordc; |
285 | } |
286 | else if (('0' <= words[w][0] && words[w][0] <= '9') |
287 | || words[w][0] == '-') |
288 | { |
91977a1c |
289 | opr->type = OPT_CONST; |
290 | opr->val = (unsigned int)parse_number(words[w]); |
291 | return wordc; |
c36e914d |
292 | } |
293 | |
c36e914d |
294 | opr->type = OPT_REG; |
91977a1c |
295 | strcpy(opr->name, words[w]); |
296 | parse_reg(opr, regmask, opr->name, 1); |
297 | |
c36e914d |
298 | return wordc; |
299 | } |
300 | |
301 | static const struct { |
302 | const char *name; |
303 | enum op_op op; |
304 | enum op_class cls; |
305 | int minopr; |
306 | int maxopr; |
307 | } op_table[] = { |
308 | { "push", OP_PUSH, OPC_DATA, 1, 1 }, |
309 | { "pop", OP_POP, OPC_DATA, 1, 1 }, |
310 | { "mov" , OP_MOV, OPC_DATA, 2, 2 }, |
311 | { "add", OP_ADD, OPC_DATA_FLAGS, 2, 2 }, |
312 | { "test", OP_TEST, OPC_DATA_FLAGS, 2, 2 }, |
313 | { "cmp", OP_CMP, OPC_DATA_FLAGS, 2, 2 }, |
314 | { "retn", OP_RET, OPC_JMP, 0, 1 }, |
315 | { "call", OP_CALL, OPC_JMP, 1, 1 }, |
316 | { "jmp", OP_JMP, OPC_JMP, 1, 1 }, |
317 | { "jo", OP_JO, OPC_JCC, 1, 1 }, // 70 OF=1 |
318 | { "jno", OP_JNO, OPC_JCC, 1, 1 }, // 71 OF=0 |
319 | { "jc", OP_JC, OPC_JCC, 1, 1 }, // 72 CF=1 |
320 | { "jb", OP_JC, OPC_JCC, 1, 1 }, // 72 |
321 | { "jnc", OP_JNC, OPC_JCC, 1, 1 }, // 73 CF=0 |
322 | { "jae", OP_JNC, OPC_JCC, 1, 1 }, // 73 |
323 | { "jz", OP_JZ, OPC_JCC, 1, 1 }, // 74 ZF=1 |
324 | { "je", OP_JZ, OPC_JCC, 1, 1 }, // 74 |
325 | { "jnz", OP_JNZ, OPC_JCC, 1, 1 }, // 75 ZF=0 |
326 | { "jne", OP_JNZ, OPC_JCC, 1, 1 }, // 75 |
327 | { "jbe", OP_JBE, OPC_JCC, 1, 1 }, // 76 CF=1 || ZF=1 |
328 | { "jna", OP_JBE, OPC_JCC, 1, 1 }, // 76 |
329 | { "ja", OP_JA, OPC_JCC, 1, 1 }, // 77 CF=0 && ZF=0 |
330 | { "jnbe", OP_JA, OPC_JCC, 1, 1 }, // 77 |
331 | { "js", OP_JS, OPC_JCC, 1, 1 }, // 78 SF=1 |
332 | { "jns", OP_JNS, OPC_JCC, 1, 1 }, // 79 SF=0 |
333 | { "jp", OP_JP, OPC_JCC, 1, 1 }, // 7a PF=1 |
334 | { "jpe", OP_JP, OPC_JCC, 1, 1 }, // 7a |
335 | { "jnp", OP_JNP, OPC_JCC, 1, 1 }, // 7b PF=0 |
336 | { "jpo", OP_JNP, OPC_JCC, 1, 1 }, // 7b |
337 | { "jl", OP_JL, OPC_JCC, 1, 1 }, // 7c SF!=OF |
338 | { "jnge", OP_JL, OPC_JCC, 1, 1 }, // 7c |
339 | { "jge", OP_JGE, OPC_JCC, 1, 1 }, // 7d SF=OF |
340 | { "jnl", OP_JGE, OPC_JCC, 1, 1 }, // 7d |
341 | { "jle", OP_JLE, OPC_JCC, 1, 1 }, // 7e ZF=1 || SF!=OF |
342 | { "jng", OP_JLE, OPC_JCC, 1, 1 }, // 7e |
343 | { "jg", OP_JG, OPC_JCC, 1, 1 }, // 7f ZF=0 && SF=OF |
344 | { "jnle", OP_JG, OPC_JCC, 1, 1 }, // 7f |
345 | }; |
346 | |
347 | static void parse_op(struct parsed_op *op, char words[16][256], int wordc) |
348 | { |
91977a1c |
349 | int opr = 0; |
350 | int w = 1; |
351 | int i; |
c36e914d |
352 | |
91977a1c |
353 | for (i = 0; i < ARRAY_SIZE(op_table); i++) { |
354 | if (!IS(words[0], op_table[i].name)) |
355 | continue; |
c36e914d |
356 | |
91977a1c |
357 | op->regmask = 0; |
c36e914d |
358 | |
91977a1c |
359 | for (opr = 0; opr < op_table[i].minopr; opr++) { |
360 | w = parse_operand(&op->operand[opr], &op->regmask, |
361 | words, wordc, w, op_table[i].cls); |
362 | } |
c36e914d |
363 | |
91977a1c |
364 | for (; w < wordc && opr < op_table[i].maxopr; opr++) { |
365 | w = parse_operand(&op->operand[opr], &op->regmask, |
366 | words, wordc, w, op_table[i].cls); |
367 | } |
c36e914d |
368 | |
91977a1c |
369 | goto done; |
370 | } |
c36e914d |
371 | |
91977a1c |
372 | aerr("unhandled op: '%s'\n", words[0]); |
c36e914d |
373 | |
91977a1c |
374 | done: |
375 | if (w < wordc) |
376 | aerr("parse_op %s incomplete: %d/%d\n", |
377 | words[0], w, wordc); |
378 | |
379 | op->cls = op_table[i].cls; |
380 | op->op = op_table[i].op; |
381 | op->operand_cnt = opr; |
382 | return; |
c36e914d |
383 | } |
384 | |
91977a1c |
385 | static const char *opr_name(struct parsed_op *po, int opr_num) |
c36e914d |
386 | { |
91977a1c |
387 | if (opr_num >= po->operand_cnt) |
388 | ferr(po, "opr OOR: %d/%d\n", opr_num, po->operand_cnt); |
389 | return po->operand[opr_num].name; |
c36e914d |
390 | } |
391 | |
91977a1c |
392 | static unsigned int opr_const(struct parsed_op *po, int opr_num) |
c36e914d |
393 | { |
91977a1c |
394 | if (opr_num >= po->operand_cnt) |
395 | ferr(po, "opr OOR: %d/%d\n", opr_num, po->operand_cnt); |
396 | if (po->operand[opr_num].type != OPT_CONST) |
397 | ferr(po, "opr %d: const expected\n", opr_num); |
398 | return po->operand[opr_num].val; |
399 | } |
c36e914d |
400 | |
91977a1c |
401 | static const char *opr_reg_p(struct parsed_op *po, struct parsed_opr *popr) |
402 | { |
403 | if ((unsigned int)popr->reg >= MAX_REGS) |
404 | ferr(po, "invalid reg: %d\n", popr->reg); |
405 | return regs_r32[popr->reg]; |
406 | } |
c36e914d |
407 | |
91977a1c |
408 | static void bg_frame_access(struct parsed_op *po, char *buf, |
409 | size_t buf_size, const char *bp_arg, int is_src) |
410 | { |
411 | struct parsed_equ *eq; |
412 | int i, arg_i, arg_s; |
413 | |
414 | snprintf(g_comment, sizeof(g_comment), "%s", bp_arg); |
415 | |
416 | for (i = 0; i < g_eqcnt; i++) |
417 | if (IS(g_eqs[i].name, bp_arg)) |
418 | break; |
419 | if (i >= g_eqcnt) |
420 | ferr(po, "unresolved bp_arg: '%s'\n", bp_arg); |
421 | eq = &g_eqs[i]; |
422 | |
423 | if (eq->offset >= 0) { |
424 | arg_i = eq->offset / 4 - 2; |
425 | if (arg_i < 0 || arg_i >= g_func_pp.argc_stack) |
426 | ferr(po, "offset %d doesn't map to any arg\n", eq->offset); |
427 | |
428 | for (i = arg_s = 0; i < g_func_pp.argc; i++) { |
429 | if (g_func_pp.arg[i].reg != NULL) |
430 | continue; |
431 | if (arg_s == arg_i) |
432 | break; |
433 | arg_s++; |
434 | } |
435 | if (i == g_func_pp.argc) |
436 | ferr(po, "arg %d not in prototype?\n", arg_i); |
437 | snprintf(buf, buf_size, "%sa%d", is_src ? "(u32)" : "", i + 1); |
438 | } |
439 | else { |
440 | if (g_bp_stack == 0) |
441 | ferr(po, "bp_stack access after it was not detected\n"); |
442 | ferr(po, "TODO\n"); |
443 | } |
444 | } |
c36e914d |
445 | |
91977a1c |
446 | static char *out_src_opr(char *buf, size_t buf_size, |
447 | struct parsed_op *po, struct parsed_opr *popr) |
448 | { |
449 | switch (popr->type) { |
450 | case OPT_REG: |
451 | switch (popr->lmod) { |
452 | case OPLM_DWORD: |
453 | snprintf(buf, buf_size, "%s", opr_reg_p(po, popr)); |
454 | break; |
455 | default: |
456 | ferr(po, "invalid src lmod: %d\n", popr->lmod); |
457 | } |
458 | break; |
459 | case OPT_REGMEM: |
460 | if (g_bp_frame && !strncmp(popr->name, "ebp+", 4)) { |
461 | bg_frame_access(po, buf, buf_size, popr->name + 4, 1); |
462 | break; |
463 | } |
464 | ferr(po, "unhandled OPT_REGMEM variation\n"); |
465 | break; |
466 | case OPT_LABEL: |
467 | snprintf(buf, buf_size, "%s", popr->name); |
468 | break; |
469 | case OPT_CONST: |
470 | snprintf(buf, buf_size, popr->val < 10 ? "%u" : "0x%02x", popr->val); |
471 | break; |
472 | default: |
473 | ferr(po, "invalid src type: %d\n", popr->type); |
474 | } |
475 | |
476 | return buf; |
477 | } |
c36e914d |
478 | |
91977a1c |
479 | static char *out_dst_opr(char *buf, size_t buf_size, |
480 | struct parsed_op *po, struct parsed_opr *popr) |
481 | { |
482 | switch (popr->type) { |
483 | case OPT_REG: |
484 | switch (popr->lmod) { |
485 | case OPLM_DWORD: |
486 | snprintf(buf, buf_size, "%s", opr_reg_p(po, popr)); |
487 | break; |
488 | default: |
489 | ferr(po, "invalid dst lmod: %d\n", popr->lmod); |
490 | } |
491 | break; |
492 | default: |
493 | ferr(po, "invalid dst type: %d\n", popr->type); |
494 | } |
495 | |
496 | return buf; |
497 | } |
c36e914d |
498 | |
91977a1c |
499 | static void split_cond(struct parsed_op *po, enum op_op *op, int *is_neg) |
500 | { |
501 | *is_neg = 0; |
502 | |
503 | switch (*op) { |
504 | case OP_JNO: |
505 | *op = OP_JO; |
506 | *is_neg = 1; |
507 | break; |
508 | case OP_JNC: |
509 | *op = OP_JC; |
510 | *is_neg = 1; |
511 | break; |
512 | case OP_JNZ: |
513 | *op = OP_JZ; |
514 | *is_neg = 1; |
515 | break; |
516 | case OP_JNS: |
517 | *op = OP_JS; |
518 | *is_neg = 1; |
519 | break; |
520 | case OP_JNP: |
521 | *op = OP_JP; |
522 | *is_neg = 1; |
523 | break; |
524 | case OP_JO: |
525 | case OP_JC: |
526 | case OP_JZ: |
527 | case OP_JS: |
528 | case OP_JP: |
529 | // |
530 | case OP_JBE: |
531 | case OP_JA: |
532 | case OP_JL: |
533 | case OP_JGE: |
534 | case OP_JLE: |
535 | case OP_JG: |
536 | break; |
537 | default: |
538 | ferr(po, "split_cond: bad op %d\n", *op); |
539 | break; |
540 | } |
541 | } |
c36e914d |
542 | |
91977a1c |
543 | static void out_test_for_cc(char *buf, size_t buf_size, |
544 | struct parsed_op *po, enum opr_lenmod lmod, const char *expr) |
545 | { |
546 | enum op_op op = po->op; |
547 | int is_neg = 0; |
548 | |
549 | split_cond(po, &op, &is_neg); |
550 | switch (op) { |
551 | case OP_JZ: |
552 | switch (lmod) { |
553 | case OPLM_DWORD: |
554 | snprintf(buf, buf_size, "(%s %s 0)", expr, is_neg ? "!=" : "=="); |
555 | break; |
556 | default: |
557 | ferr(po, "%s: unhandled lmod for JZ: %d\n", __func__, lmod); |
558 | } |
559 | break; |
560 | default: |
561 | ferr(po, "%s: unhandled op: %d\n", __func__, op); |
562 | } |
563 | } |
c36e914d |
564 | |
91977a1c |
565 | static void propagete_lmod(struct parsed_op *po, struct parsed_opr *popr1, |
566 | struct parsed_opr *popr2) |
567 | { |
568 | if (popr1->lmod == OPLM_UNSPEC && popr2->lmod == OPLM_UNSPEC) |
569 | ferr(po, "missing lmod for both operands\n"); |
570 | |
571 | if (popr1->lmod == OPLM_UNSPEC) |
572 | popr1->lmod = popr2->lmod; |
573 | else if (popr2->lmod == OPLM_UNSPEC) |
574 | popr2->lmod = popr1->lmod; |
575 | else if (popr1->lmod != popr2->lmod) |
576 | ferr(po, "conflicting lmods: %d vs %d\n", popr1->lmod, popr2->lmod); |
577 | } |
c36e914d |
578 | |
91977a1c |
579 | static void gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt) |
580 | { |
581 | struct parsed_op *delayed_op = NULL, *tmp_op; |
582 | char buf1[256], buf2[256], buf3[256]; |
583 | struct parsed_proto *pp; |
584 | const char *tmpname; |
585 | int had_decl = 0; |
586 | int regmask_arg = 0; |
587 | int regmask = 0; |
588 | int no_output; |
589 | int arg; |
590 | int i, j; |
591 | int reg; |
592 | int ret; |
593 | |
594 | g_bp_frame = g_bp_stack = 0; |
595 | |
596 | ret = proto_parse(fhdr, funcn, &g_func_pp); |
597 | if (ret) |
598 | ferr(ops, "proto_parse failed for '%s'\n", funcn); |
599 | |
600 | fprintf(fout, "%s %s(", g_func_pp.ret_type, funcn); |
601 | for (i = 0; i < g_func_pp.argc; i++) { |
602 | if (i > 0) |
603 | fprintf(fout, ", "); |
604 | fprintf(fout, "%s a%d", g_func_pp.arg[i].type, i + 1); |
605 | } |
606 | fprintf(fout, ")\n{\n"); |
607 | |
608 | // pass1: |
609 | // - handle ebp frame, remove ops related to it |
610 | if (ops[0].op == OP_PUSH && IS(opr_name(&ops[0], 0), "ebp") |
611 | && ops[1].op == OP_MOV |
612 | && IS(opr_name(&ops[1], 0), "ebp") |
613 | && IS(opr_name(&ops[1], 1), "esp")) |
614 | { |
615 | g_bp_frame = 1; |
616 | ops[0].cls = OPC_RMD; |
617 | ops[1].cls = OPC_RMD; |
618 | |
619 | if (ops[2].op == OP_SUB && IS(opr_name(&ops[2], 0), "esp")) { |
620 | g_bp_stack = opr_const(&ops[2], 1); |
621 | ops[2].cls = OPC_RMD; |
622 | } |
623 | |
624 | i = 2; |
625 | do { |
626 | for (; i < opcnt; i++) |
627 | if (ops[i].op == OP_RET) |
628 | break; |
629 | if (ops[i - 1].op != OP_POP || !IS(opr_name(&ops[i - 1], 0), "ebp")) |
630 | ferr(&ops[i - 1], "'pop ebp' expected\n"); |
631 | ops[i - 1].cls = OPC_RMD; |
632 | |
633 | if (g_bp_stack != 0) { |
634 | if (ops[i - 2].op != OP_MOV |
635 | || !IS(opr_name(&ops[i - 2], 0), "esp") |
636 | || !IS(opr_name(&ops[i - 2], 1), "ebp")) |
637 | { |
638 | ferr(&ops[i - 2], "esp restore expected\n"); |
639 | } |
640 | ops[i - 2].cls = OPC_RMD; |
641 | } |
642 | i++; |
643 | } while (i < opcnt); |
644 | } |
645 | |
646 | // pass2: |
647 | // - scan for all used registers |
648 | // - process calls |
649 | for (i = 0; i < opcnt; i++) { |
650 | if (ops[i].cls == OPC_RMD) |
651 | continue; |
652 | regmask |= ops[i].regmask; |
653 | |
654 | if (ops[i].op == OP_CALL) { |
655 | pp = malloc(sizeof(*pp)); |
656 | my_assert_not(pp, NULL); |
657 | tmpname = opr_name(&ops[i], 0); |
658 | ret = proto_parse(fhdr, tmpname, pp); |
659 | if (ret) |
660 | ferr(&ops[i], "proto_parse failed for '%s'\n", tmpname); |
661 | |
662 | for (arg = 0; arg < pp->argc; arg++) |
663 | if (pp->arg[arg].reg == NULL) |
664 | break; |
665 | |
666 | for (j = i - 1; j >= 0 && arg < pp->argc; j--) { |
667 | if (ops[j].cls == OPC_RMD) |
668 | continue; |
669 | if (ops[j].op != OP_PUSH) |
670 | continue; |
671 | |
672 | pp->arg[arg].datap = &ops[j]; |
673 | ops[j].cls = OPC_RMD; |
674 | for (arg++; arg < pp->argc; arg++) |
675 | if (pp->arg[arg].reg == NULL) |
676 | break; |
677 | } |
678 | if (arg < pp->argc) |
679 | ferr(&ops[i], "arg collect failed for '%s'\n", tmpname); |
680 | ops[i].datap = pp; |
681 | } |
682 | } |
683 | |
684 | // instantiate arg-registers |
685 | for (i = 0; i < g_func_pp.argc; i++) { |
686 | if (g_func_pp.arg[i].reg != NULL) { |
687 | reg = char_array_i(regs_r32, |
688 | ARRAY_SIZE(regs_r32), g_func_pp.arg[i].reg); |
689 | if (reg < 0) |
690 | ferr(ops, "arg '%s' is not a reg?\n", g_func_pp.arg[i].reg); |
691 | |
692 | regmask_arg |= 1 << reg; |
693 | fprintf(fout, " u32 %s = (u32)a%d;\n", |
694 | g_func_pp.arg[i].reg, i); |
695 | had_decl = 1; |
696 | } |
697 | } |
698 | |
699 | // instantiate other regs - special case for eax |
700 | if (!((regmask | regmask_arg) & 1) && !IS(g_func_pp.ret_type, "void")) { |
701 | fprintf(fout, " u32 eax = 0;\n"); |
702 | had_decl = 1; |
703 | } |
704 | |
705 | regmask &= ~regmask_arg; |
706 | if (g_bp_frame) |
707 | regmask &= ~(1 << xBP); |
708 | if (regmask) { |
709 | for (reg = 0; reg < 8; reg++) { |
710 | if (regmask & (1 << reg)) { |
711 | fprintf(fout, " u32 %s;\n", regs_r32[reg]); |
712 | had_decl = 1; |
713 | } |
714 | } |
715 | } |
716 | |
717 | if (had_decl) |
718 | fprintf(fout, "\n"); |
719 | |
720 | // output ops |
721 | for (i = 0; i < opcnt; i++) { |
722 | if (g_labels[i][0] != 0) |
723 | fprintf(fout, "\n%s:\n", g_labels[i]); |
724 | |
725 | if (ops[i].cls == OPC_RMD) |
726 | continue; |
727 | |
728 | no_output = 0; |
729 | |
730 | #define internal_error() \ |
731 | ferr(&ops[i], "%s:%d: ICE\n", __FILE__, __LINE__) |
732 | #define assert_operand_cnt(n_) \ |
733 | if (ops[i].operand_cnt != n_) \ |
734 | ferr(&ops[i], "operand_cnt is %d/%d\n", ops[i].operand_cnt, n_) |
735 | |
736 | // conditional op? |
737 | if (ops[i].cls == OPC_JCC || ops[i].cls == OPC_SCC) { |
738 | if (delayed_op == NULL) |
739 | ferr(&ops[i], "no delayed_op before cond op\n"); |
740 | if (delayed_op->op == OP_TEST) { |
741 | if (IS(opr_name(delayed_op, 0), opr_name(delayed_op, 1))) { |
742 | out_dst_opr(buf3, sizeof(buf3), delayed_op, |
743 | &delayed_op->operand[0]); |
744 | } |
745 | else { |
746 | out_dst_opr(buf1, sizeof(buf1), delayed_op, |
747 | &delayed_op->operand[0]); |
748 | out_src_opr(buf2, sizeof(buf2), delayed_op, |
749 | &delayed_op->operand[1]); |
750 | snprintf(buf3, sizeof(buf3), "(%s & %s)", buf1, buf2); |
751 | } |
752 | out_test_for_cc(buf1, sizeof(buf1), &ops[i], |
753 | delayed_op->operand[0].lmod, buf3); |
754 | if (ops[i].cls == OPC_JCC) |
755 | fprintf(fout, " if %s\n", buf1); |
756 | else { |
757 | out_dst_opr(buf2, sizeof(buf2), &ops[i], &ops[i].operand[0]); |
758 | fprintf(fout, " %s = %s;", buf2, buf1); |
759 | } |
760 | } |
761 | else { |
762 | ferr(&ops[i], "TODO\n"); |
763 | } |
764 | } |
765 | |
766 | switch (ops[i].op) |
767 | { |
768 | case OP_MOV: |
769 | assert_operand_cnt(2); |
770 | propagete_lmod(&ops[i], &ops[i].operand[0], &ops[i].operand[1]); |
771 | fprintf(fout, " %s = %s;", |
772 | out_dst_opr(buf1, sizeof(buf1), &ops[i], &ops[i].operand[0]), |
773 | out_src_opr(buf2, sizeof(buf2), &ops[i], &ops[i].operand[1])); |
774 | break; |
775 | |
776 | case OP_TEST: |
777 | case OP_CMP: |
778 | propagete_lmod(&ops[i], &ops[i].operand[0], &ops[i].operand[1]); |
779 | delayed_op = &ops[i]; |
780 | no_output = 1; |
781 | break; |
782 | |
783 | // note: we reuse OP_Jcc for SETcc, only cls differs |
784 | case OP_JO ... OP_JG: |
785 | if (ops[i].cls == OPC_JCC) |
786 | fprintf(fout, " goto %s;", ops[i].operand[0].name); |
787 | break; |
788 | |
789 | case OP_CALL: |
790 | pp = ops[i].datap; |
791 | if (pp == NULL) |
792 | ferr(&ops[i], "NULL pp\n"); |
793 | |
794 | fprintf(fout, " "); |
795 | if (!IS(pp->ret_type, "void")) { |
796 | fprintf(fout, "eax = "); |
797 | if (strchr(pp->ret_type, '*')) |
798 | fprintf(fout, "(u32)"); |
799 | } |
800 | fprintf(fout, "%s(", opr_name(&ops[i], 0)); |
801 | for (arg = 0; arg < pp->argc; arg++) { |
802 | if (arg > 0) |
803 | fprintf(fout, ", "); |
804 | if (pp->arg[arg].reg != NULL) { |
805 | fprintf(fout, "%s", pp->arg[i].reg); |
806 | continue; |
807 | } |
808 | |
809 | // stack arg |
810 | tmp_op = pp->arg[arg].datap; |
811 | if (tmp_op == NULL) |
812 | ferr(&ops[i], "parsed_op missing for arg%d\n", arg); |
813 | fprintf(fout, "%s", |
814 | out_src_opr(buf1, sizeof(buf1), tmp_op, &tmp_op->operand[0])); |
815 | } |
816 | fprintf(fout, ");"); |
817 | break; |
818 | |
819 | case OP_RET: |
820 | if (IS(g_func_pp.ret_type, "void")) |
821 | fprintf(fout, " return;"); |
822 | else |
823 | fprintf(fout, " return eax;"); |
824 | break; |
825 | |
826 | case OP_PUSH: |
827 | ferr(&ops[i], "push encountered\n"); |
828 | break; |
829 | |
830 | case OP_POP: |
831 | ferr(&ops[i], "pop encountered\n"); |
832 | break; |
833 | |
834 | default: |
835 | no_output = 1; |
836 | ferr(&ops[i], "unhandled op type %d, cls %d\n", |
837 | ops[i].op, ops[i].cls); |
838 | break; |
839 | } |
840 | |
841 | if (g_comment[0] != 0) { |
842 | fprintf(fout, " // %s", g_comment); |
843 | g_comment[0] = 0; |
844 | no_output = 0; |
845 | } |
846 | if (!no_output) |
847 | fprintf(fout, "\n"); |
848 | } |
849 | |
850 | fprintf(fout, "}\n\n"); |
851 | |
852 | // cleanup |
853 | for (i = 0; i < opcnt; i++) { |
854 | if (ops[i].op == OP_CALL) { |
855 | pp = ops[i].datap; |
856 | if (pp) { |
857 | proto_release(pp); |
858 | free(pp); |
859 | } |
860 | } |
861 | } |
862 | proto_release(&g_func_pp); |
863 | } |
c36e914d |
864 | |
91977a1c |
865 | int main(int argc, char *argv[]) |
866 | { |
867 | FILE *fout, *fasm, *fhdr; |
868 | char line[256]; |
869 | char words[16][256]; |
870 | int in_func = 0; |
871 | int eq_alloc; |
872 | int pi = 0; |
873 | int len; |
874 | char *p; |
875 | int wordc; |
876 | |
877 | if (argc != 4) { |
878 | printf("usage:\n%s <.c> <.asm> <hdrf>\n", |
879 | argv[0]); |
880 | return 1; |
881 | } |
882 | |
883 | hdrfn = argv[3]; |
884 | fhdr = fopen(hdrfn, "r"); |
885 | my_assert_not(fhdr, NULL); |
886 | |
887 | asmfn = argv[2]; |
888 | fasm = fopen(asmfn, "r"); |
889 | my_assert_not(fasm, NULL); |
890 | |
891 | fout = fopen(argv[1], "w"); |
892 | my_assert_not(fout, NULL); |
893 | |
894 | eq_alloc = 128; |
895 | g_eqs = malloc(eq_alloc * sizeof(g_eqs[0])); |
896 | my_assert_not(g_eqs, NULL); |
897 | |
898 | while (fgets(line, sizeof(line), fasm)) |
899 | { |
900 | asmln++; |
901 | |
902 | p = sskip(line); |
903 | if (*p == 0 || *p == ';') |
904 | continue; |
905 | |
906 | memset(words, 0, sizeof(words)); |
907 | for (wordc = 0; wordc < 16; wordc++) { |
908 | p = sskip(next_word(words[wordc], sizeof(words[0]), p)); |
909 | if (*p == 0 || *p == ';') { |
910 | wordc++; |
911 | break; |
912 | } |
913 | } |
914 | |
915 | if (wordc == 0) { |
916 | // shouldn't happen |
917 | awarn("wordc == 0?\n"); |
918 | continue; |
919 | } |
920 | |
921 | // don't care about this: |
922 | if (words[0][0] == '.' |
923 | || IS(words[0], "include") |
924 | || IS(words[0], "assume") || IS(words[1], "segment") |
925 | || IS(words[0], "align")) |
926 | { |
927 | continue; |
928 | } |
929 | |
930 | if (IS(words[1], "proc")) { |
931 | if (in_func) |
932 | aerr("proc '%s' while in_func '%s'?\n", |
933 | words[0], g_func); |
934 | strcpy(g_func, words[0]); |
935 | in_func = 1; |
936 | continue; |
937 | } |
938 | |
939 | if (IS(words[1], "endp")) { |
940 | if (!in_func) |
941 | aerr("endp '%s' while not in_func?\n", words[0]); |
942 | if (!IS(g_func, words[0])) |
943 | aerr("endp '%s' while in_func '%s'?\n", |
944 | words[0], g_func); |
945 | gen_func(fout, fhdr, g_func, pi); |
946 | in_func = 0; |
947 | g_func[0] = 0; |
948 | if (pi != 0) { |
949 | memset(&ops, 0, pi * sizeof(ops[0])); |
950 | memset(g_labels, 0, pi * sizeof(g_labels[0])); |
951 | pi = 0; |
952 | } |
953 | g_eqcnt = 0; |
954 | exit(1); |
955 | continue; |
956 | } |
957 | |
958 | if (IS(words[1], "=")) { |
959 | if (wordc != 5) |
960 | aerr("unhandled equ, wc=%d\n", wordc); |
961 | if (g_eqcnt >= eq_alloc) { |
962 | eq_alloc *= 2; |
963 | g_eqs = realloc(g_eqs, eq_alloc * sizeof(g_eqs[0])); |
964 | my_assert_not(g_eqs, NULL); |
965 | } |
966 | |
967 | len = strlen(words[0]); |
968 | if (len > sizeof(g_eqs[0].name) - 1) |
969 | aerr("equ name too long: %d\n", len); |
970 | strcpy(g_eqs[g_eqcnt].name, words[0]); |
971 | |
972 | if (!IS(words[3], "ptr")) |
973 | aerr("unhandled equ\n"); |
974 | if (IS(words[2], "dword")) |
975 | g_eqs[g_eqcnt].lmod = OPLM_DWORD; |
976 | else if (IS(words[2], "word")) |
977 | g_eqs[g_eqcnt].lmod = OPLM_WORD; |
978 | else if (IS(words[2], "byte")) |
979 | g_eqs[g_eqcnt].lmod = OPLM_BYTE; |
980 | else |
981 | aerr("bad lmod: '%s'\n", words[2]); |
982 | |
983 | g_eqs[g_eqcnt].offset = parse_number(words[4]); |
984 | g_eqcnt++; |
985 | continue; |
986 | } |
987 | |
988 | if (pi >= ARRAY_SIZE(ops)) |
989 | aerr("too many ops\n"); |
990 | |
991 | p = strchr(words[0], ':'); |
992 | if (p != NULL) { |
993 | len = p - words[0]; |
994 | if (len > sizeof(g_labels[0]) - 1) |
995 | aerr("label too long: %d\n", len); |
996 | if (g_labels[pi][0] != 0) |
997 | aerr("dupe label?\n"); |
998 | memcpy(g_labels[pi], words[0], len); |
999 | g_labels[pi][len] = 0; |
1000 | continue; |
1001 | } |
1002 | |
1003 | parse_op(&ops[pi], words, wordc); |
1004 | pi++; |
1005 | |
1006 | (void)proto_parse; |
1007 | } |
1008 | |
1009 | fclose(fout); |
1010 | fclose(fasm); |
1011 | fclose(fhdr); |
1012 | |
1013 | return 0; |
c36e914d |
1014 | } |
91977a1c |
1015 | |
1016 | // vim:ts=2:shiftwidth=2:expandtab |