c36e914d |
1 | #include <stdio.h> |
2 | #include <stdlib.h> |
3 | #include <string.h> |
4 | |
5 | #include "my_assert.h" |
6 | #include "my_str.h" |
7 | |
8 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) |
9 | #define IS(w, y) !strcmp(w, y) |
10 | |
11 | #include "protoparse.h" |
12 | |
13 | const char *asmfn; |
14 | static int asmln; |
15 | |
16 | #define awarn(fmt, ...) \ |
17 | printf("warning:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__) |
18 | #define aerr(fmt, ...) do { \ |
19 | printf("error:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__); \ |
20 | exit(1); \ |
21 | } while (0) |
22 | |
23 | enum op_class { |
24 | OPC_UNSPEC, |
25 | OPC_RMD, /* removed or optimized out */ |
26 | OPC_DATA, /* data processing */ |
27 | OPC_DATA_FLAGS, /* data processing + sets flags */ |
28 | OPC_JMP, /* .. and call */ |
29 | OPC_JCC, |
30 | }; |
31 | |
32 | enum op_op { |
33 | OP_INVAL, |
34 | OP_PUSH, |
35 | OP_POP, |
36 | OP_MOV, |
37 | OP_RET, |
38 | OP_ADD, |
39 | OP_TEST, |
40 | OP_CMP, |
41 | OP_CALL, |
42 | OP_JMP, |
43 | OP_JO, |
44 | OP_JNO, |
45 | OP_JC, |
46 | OP_JNC, |
47 | OP_JZ, |
48 | OP_JNZ, |
49 | OP_JBE, |
50 | OP_JA, |
51 | OP_JS, |
52 | OP_JNS, |
53 | OP_JP, |
54 | OP_JNP, |
55 | OP_JL, |
56 | OP_JGE, |
57 | OP_JLE, |
58 | OP_JG, |
59 | }; |
60 | |
61 | enum opr_type { |
62 | OPT_UNSPEC, |
63 | OPT_REG, |
64 | OPT_REGMEM, |
65 | OPT_LABEL, |
66 | OPT_CONST, |
67 | }; |
68 | |
69 | enum opr_lenmod { |
70 | OPRM_UNSPEC, |
71 | OPRM_BYTE, |
72 | OPRM_WORD, |
73 | OPRM_DWORD, |
74 | }; |
75 | |
76 | #define MAX_OPERANDS 2 |
77 | |
78 | struct parsed_opr { |
79 | enum opr_type type; |
80 | enum opr_lenmod lmod; |
81 | unsigned int val; |
82 | char name[256]; |
83 | }; |
84 | |
85 | struct parsed_op { |
86 | enum op_class cls; |
87 | enum op_op op; |
88 | struct parsed_opr operand[MAX_OPERANDS]; |
89 | int operand_cnt; |
90 | }; |
91 | |
92 | #define MAX_OPS 1024 |
93 | |
94 | static struct parsed_op ops[MAX_OPS]; |
95 | static char labels[MAX_OPS][32]; |
96 | |
97 | const char *main_regs[] = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp" }; |
98 | |
99 | static int parse_operand(struct parsed_opr *opr, |
100 | char words[16][256], int wordc, int w, enum op_class cls) |
101 | { |
102 | char *endp = NULL; |
103 | int ret, len; |
104 | int i; |
105 | |
106 | if (w >= wordc) |
107 | aerr("parse_operand w %d, wordc %d\n", w, wordc); |
108 | |
109 | for (i = w; i < wordc; i++) { |
110 | len = strlen(words[i]); |
111 | if (words[i][len - 1] == ',') { |
112 | words[i][len - 1] = 0; |
113 | wordc = i + 1; |
114 | break; |
115 | } |
116 | } |
117 | |
118 | if (cls == OPC_JMP || cls == OPC_JCC) { |
119 | const char *label; |
120 | |
121 | if (wordc - w == 3 && IS(words[w + 1], "ptr")) |
122 | label = words[w + 2]; |
123 | else if (wordc - w == 2 && IS(words[w], "short")) |
124 | label = words[w + 1]; |
125 | else if (wordc - w == 1) |
126 | label = words[w]; |
127 | else |
128 | aerr("jump parse error"); |
129 | |
130 | opr->type = OPT_LABEL; |
131 | strcpy(opr->name, label); |
132 | return wordc; |
133 | } |
134 | |
135 | if (wordc - w >= 3) { |
136 | if (IS(words[w + 1], "ptr")) { |
137 | if (IS(words[w], "dword")) |
138 | opr->lmod = OPRM_DWORD; |
139 | else if (IS(words[w], "word")) |
140 | opr->lmod = OPRM_WORD; |
141 | else if (IS(words[w], "byte")) |
142 | opr->lmod = OPRM_BYTE; |
143 | else |
144 | aerr("type parsing failed\n"); |
145 | w += 2; |
146 | } |
147 | } |
148 | |
149 | if (wordc - w == 2 && IS(words[w], "offset")) { |
150 | opr->type = OPT_LABEL; |
151 | strcpy(opr->name, words[w + 1]); |
152 | return wordc; |
153 | } |
154 | |
155 | if (wordc - w != 1) |
156 | aerr("parse_operand 1 word expected\n"); |
157 | |
158 | len = strlen(words[w]); |
159 | |
160 | if (words[w][0] == '[') { |
161 | opr->type = OPT_REGMEM; |
162 | ret = sscanf(words[w], "[%256s]", opr->name); |
163 | if (ret != 1) |
164 | aerr("[] parse failure\n"); |
165 | return wordc; |
166 | } |
167 | else if (('0' <= words[w][0] && words[w][0] <= '9') |
168 | || words[w][0] == '-') |
169 | { |
170 | opr->type = OPT_CONST; |
171 | i = 0; |
172 | if (len > 1 && words[w][0] == '0') |
173 | i = 1; |
174 | if (words[w][len - 1] == 'h') { |
175 | words[w][len - 1] = 0; |
176 | opr->val = strtoul(&words[w][i], &endp, 16); |
177 | } |
178 | else { |
179 | opr->val = strtoul(&words[w][i], &endp, 10); |
180 | } |
181 | if (*endp != 0) |
182 | aerr("const parse failed\n"); |
183 | return wordc; |
184 | } |
185 | |
186 | strcpy(opr->name, words[w]); |
187 | opr->type = OPT_REG; |
188 | return wordc; |
189 | } |
190 | |
191 | static const struct { |
192 | const char *name; |
193 | enum op_op op; |
194 | enum op_class cls; |
195 | int minopr; |
196 | int maxopr; |
197 | } op_table[] = { |
198 | { "push", OP_PUSH, OPC_DATA, 1, 1 }, |
199 | { "pop", OP_POP, OPC_DATA, 1, 1 }, |
200 | { "mov" , OP_MOV, OPC_DATA, 2, 2 }, |
201 | { "add", OP_ADD, OPC_DATA_FLAGS, 2, 2 }, |
202 | { "test", OP_TEST, OPC_DATA_FLAGS, 2, 2 }, |
203 | { "cmp", OP_CMP, OPC_DATA_FLAGS, 2, 2 }, |
204 | { "retn", OP_RET, OPC_JMP, 0, 1 }, |
205 | { "call", OP_CALL, OPC_JMP, 1, 1 }, |
206 | { "jmp", OP_JMP, OPC_JMP, 1, 1 }, |
207 | { "jo", OP_JO, OPC_JCC, 1, 1 }, // 70 OF=1 |
208 | { "jno", OP_JNO, OPC_JCC, 1, 1 }, // 71 OF=0 |
209 | { "jc", OP_JC, OPC_JCC, 1, 1 }, // 72 CF=1 |
210 | { "jb", OP_JC, OPC_JCC, 1, 1 }, // 72 |
211 | { "jnc", OP_JNC, OPC_JCC, 1, 1 }, // 73 CF=0 |
212 | { "jae", OP_JNC, OPC_JCC, 1, 1 }, // 73 |
213 | { "jz", OP_JZ, OPC_JCC, 1, 1 }, // 74 ZF=1 |
214 | { "je", OP_JZ, OPC_JCC, 1, 1 }, // 74 |
215 | { "jnz", OP_JNZ, OPC_JCC, 1, 1 }, // 75 ZF=0 |
216 | { "jne", OP_JNZ, OPC_JCC, 1, 1 }, // 75 |
217 | { "jbe", OP_JBE, OPC_JCC, 1, 1 }, // 76 CF=1 || ZF=1 |
218 | { "jna", OP_JBE, OPC_JCC, 1, 1 }, // 76 |
219 | { "ja", OP_JA, OPC_JCC, 1, 1 }, // 77 CF=0 && ZF=0 |
220 | { "jnbe", OP_JA, OPC_JCC, 1, 1 }, // 77 |
221 | { "js", OP_JS, OPC_JCC, 1, 1 }, // 78 SF=1 |
222 | { "jns", OP_JNS, OPC_JCC, 1, 1 }, // 79 SF=0 |
223 | { "jp", OP_JP, OPC_JCC, 1, 1 }, // 7a PF=1 |
224 | { "jpe", OP_JP, OPC_JCC, 1, 1 }, // 7a |
225 | { "jnp", OP_JNP, OPC_JCC, 1, 1 }, // 7b PF=0 |
226 | { "jpo", OP_JNP, OPC_JCC, 1, 1 }, // 7b |
227 | { "jl", OP_JL, OPC_JCC, 1, 1 }, // 7c SF!=OF |
228 | { "jnge", OP_JL, OPC_JCC, 1, 1 }, // 7c |
229 | { "jge", OP_JGE, OPC_JCC, 1, 1 }, // 7d SF=OF |
230 | { "jnl", OP_JGE, OPC_JCC, 1, 1 }, // 7d |
231 | { "jle", OP_JLE, OPC_JCC, 1, 1 }, // 7e ZF=1 || SF!=OF |
232 | { "jng", OP_JLE, OPC_JCC, 1, 1 }, // 7e |
233 | { "jg", OP_JG, OPC_JCC, 1, 1 }, // 7f ZF=0 && SF=OF |
234 | { "jnle", OP_JG, OPC_JCC, 1, 1 }, // 7f |
235 | }; |
236 | |
237 | static void parse_op(struct parsed_op *op, char words[16][256], int wordc) |
238 | { |
239 | int w = 1; |
240 | int opr; |
241 | int i; |
242 | |
243 | for (i = 0; i < ARRAY_SIZE(op_table); i++) { |
244 | if (!IS(words[0], op_table[i].name)) |
245 | continue; |
246 | |
247 | for (opr = 0; opr < op_table[i].minopr; opr++) { |
248 | w = parse_operand(&op->operand[opr], |
249 | words, wordc, w, op_table[i].cls); |
250 | } |
251 | |
252 | for (; w < wordc && opr < op_table[i].maxopr; opr++) { |
253 | w = parse_operand(&op->operand[opr], |
254 | words, wordc, w, op_table[i].cls); |
255 | } |
256 | |
257 | goto done; |
258 | } |
259 | |
260 | aerr("unhandled op: '%s'\n", words[0]); |
261 | |
262 | done: |
263 | if (w < wordc) |
264 | aerr("parse_op %s incomplete: %d/%d\n", |
265 | words[0], w, wordc); |
266 | |
267 | op->cls = op_table[i].cls; |
268 | op->op = op_table[i].op; |
269 | return; |
270 | } |
271 | |
272 | int gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt) |
273 | { |
274 | struct parsed_proto pp; |
275 | int ret; |
276 | int i; |
277 | |
278 | ret = proto_parse(fhdr, funcn, &pp); |
279 | if (ret) |
280 | return ret; |
281 | |
282 | fprintf(fout, "%s %s(", pp.ret_type, funcn); |
283 | for (i = 0; i < pp.argc; i++) { |
284 | if (i > 0) |
285 | fprintf(fout, ", "); |
286 | fprintf(fout, "%s a%d", pp.arg[i].type, i); |
287 | } |
288 | fprintf(fout, ")\n{\n"); |
289 | |
290 | |
291 | |
292 | fprintf(fout, "}\n\n"); |
293 | proto_release(&pp); |
294 | return 0; |
295 | } |
296 | |
297 | int main(int argc, char *argv[]) |
298 | { |
299 | FILE *fout, *fasm, *fhdr; |
300 | char line[256]; |
301 | char words[16][256]; |
302 | char func[256]; |
303 | int in_func = 0; |
304 | int pi = 0; |
305 | int len; |
306 | char *p; |
307 | int wordc; |
308 | |
309 | if (argc != 4) { |
310 | printf("usage:\n%s <.c> <.asm> <hdrf>\n", |
311 | argv[0]); |
312 | return 1; |
313 | } |
314 | |
315 | hdrfn = argv[3]; |
316 | fhdr = fopen(hdrfn, "r"); |
317 | my_assert_not(fhdr, NULL); |
318 | |
319 | asmfn = argv[2]; |
320 | fasm = fopen(asmfn, "r"); |
321 | my_assert_not(fasm, NULL); |
322 | |
323 | fout = fopen(argv[1], "w"); |
324 | my_assert_not(fout, NULL); |
325 | |
326 | |
327 | while (fgets(line, sizeof(line), fasm)) |
328 | { |
329 | asmln++; |
330 | |
331 | p = sskip(line); |
332 | if (*p == 0 || *p == ';') |
333 | continue; |
334 | |
335 | memset(words, 0, sizeof(words)); |
336 | for (wordc = 0; wordc < 16; wordc++) { |
337 | p = sskip(next_word(words[wordc], sizeof(words[0]), p)); |
338 | if (*p == 0 || *p == ';') { |
339 | wordc++; |
340 | break; |
341 | } |
342 | } |
343 | |
344 | if (wordc == 0) { |
345 | // shouldn't happen |
346 | awarn("wordc == 0?\n"); |
347 | continue; |
348 | } |
349 | |
350 | // don't care about this: |
351 | if (words[0][0] == '.' |
352 | || IS(words[0], "include") |
353 | || IS(words[0], "assume") || IS(words[1], "segment") |
354 | || IS(words[0], "align")) |
355 | { |
356 | continue; |
357 | } |
358 | |
359 | if (IS(words[1], "proc")) { |
360 | if (in_func) |
361 | aerr("proc '%s' while in_func '%s'?\n", |
362 | words[0], func); |
363 | strcpy(func, words[0]); |
364 | in_func = 1; |
365 | continue; |
366 | } |
367 | |
368 | if (IS(words[1], "endp")) { |
369 | if (!in_func) |
370 | aerr("endp '%s' while not in_func?\n", words[0]); |
371 | if (!IS(func, words[0])) |
372 | aerr("endp '%s' while in_func '%s'?\n", |
373 | words[0], func); |
374 | gen_func(fout, fhdr, func, pi); |
375 | in_func = 0; |
376 | func[0] = 0; |
377 | if (pi != 0) { |
378 | memset(&ops, 0, pi * sizeof(ops[0])); |
379 | memset(labels, 0, pi * sizeof(labels[0])); |
380 | pi = 0; |
381 | } |
382 | exit(1); |
383 | continue; |
384 | } |
385 | |
386 | if (IS(words[1], "=")) |
387 | // lots of work will be have to be done here, but for now.. |
388 | continue; |
389 | |
390 | if (pi >= ARRAY_SIZE(ops)) |
391 | aerr("too many ops\n"); |
392 | |
393 | p = strchr(words[0], ':'); |
394 | if (p != NULL) { |
395 | len = p - words[0]; |
396 | if (len > sizeof(labels[0]) - 1) |
397 | aerr("label too long: %d\n", len); |
398 | if (labels[pi][0] != 0) |
399 | aerr("dupe label?\n"); |
400 | memcpy(labels[pi], words[0], len); |
401 | labels[pi][len] = 0; |
402 | continue; |
403 | } |
404 | |
405 | parse_op(&ops[pi], words, wordc); |
406 | pi++; |
407 | |
408 | (void)proto_parse; |
409 | } |
410 | |
411 | fclose(fout); |
412 | fclose(fasm); |
413 | fclose(fhdr); |
414 | |
415 | return 0; |
416 | } |