| 1 | #include <stdio.h> |
| 2 | #include <stdlib.h> |
| 3 | #include <string.h> |
| 4 | |
| 5 | #include "my_assert.h" |
| 6 | #include "my_str.h" |
| 7 | |
| 8 | #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) |
| 9 | #define IS(w, y) !strcmp(w, y) |
| 10 | |
| 11 | #include "protoparse.h" |
| 12 | |
| 13 | const char *asmfn; |
| 14 | static int asmln; |
| 15 | |
| 16 | #define awarn(fmt, ...) \ |
| 17 | printf("warning:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__) |
| 18 | #define aerr(fmt, ...) do { \ |
| 19 | printf("error:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__); \ |
| 20 | exit(1); \ |
| 21 | } while (0) |
| 22 | |
| 23 | enum op_class { |
| 24 | OPC_UNSPEC, |
| 25 | OPC_RMD, /* removed or optimized out */ |
| 26 | OPC_DATA, /* data processing */ |
| 27 | OPC_DATA_FLAGS, /* data processing + sets flags */ |
| 28 | OPC_JMP, /* .. and call */ |
| 29 | OPC_JCC, |
| 30 | }; |
| 31 | |
| 32 | enum op_op { |
| 33 | OP_INVAL, |
| 34 | OP_PUSH, |
| 35 | OP_POP, |
| 36 | OP_MOV, |
| 37 | OP_RET, |
| 38 | OP_ADD, |
| 39 | OP_TEST, |
| 40 | OP_CMP, |
| 41 | OP_CALL, |
| 42 | OP_JMP, |
| 43 | OP_JO, |
| 44 | OP_JNO, |
| 45 | OP_JC, |
| 46 | OP_JNC, |
| 47 | OP_JZ, |
| 48 | OP_JNZ, |
| 49 | OP_JBE, |
| 50 | OP_JA, |
| 51 | OP_JS, |
| 52 | OP_JNS, |
| 53 | OP_JP, |
| 54 | OP_JNP, |
| 55 | OP_JL, |
| 56 | OP_JGE, |
| 57 | OP_JLE, |
| 58 | OP_JG, |
| 59 | }; |
| 60 | |
| 61 | enum opr_type { |
| 62 | OPT_UNSPEC, |
| 63 | OPT_REG, |
| 64 | OPT_REGMEM, |
| 65 | OPT_LABEL, |
| 66 | OPT_CONST, |
| 67 | }; |
| 68 | |
| 69 | enum opr_lenmod { |
| 70 | OPRM_UNSPEC, |
| 71 | OPRM_BYTE, |
| 72 | OPRM_WORD, |
| 73 | OPRM_DWORD, |
| 74 | }; |
| 75 | |
| 76 | #define MAX_OPERANDS 2 |
| 77 | |
| 78 | struct parsed_opr { |
| 79 | enum opr_type type; |
| 80 | enum opr_lenmod lmod; |
| 81 | unsigned int val; |
| 82 | char name[256]; |
| 83 | }; |
| 84 | |
| 85 | struct parsed_op { |
| 86 | enum op_class cls; |
| 87 | enum op_op op; |
| 88 | struct parsed_opr operand[MAX_OPERANDS]; |
| 89 | int operand_cnt; |
| 90 | }; |
| 91 | |
| 92 | #define MAX_OPS 1024 |
| 93 | |
| 94 | static struct parsed_op ops[MAX_OPS]; |
| 95 | static char labels[MAX_OPS][32]; |
| 96 | |
| 97 | const char *main_regs[] = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp" }; |
| 98 | |
| 99 | static int parse_operand(struct parsed_opr *opr, |
| 100 | char words[16][256], int wordc, int w, enum op_class cls) |
| 101 | { |
| 102 | char *endp = NULL; |
| 103 | int ret, len; |
| 104 | int i; |
| 105 | |
| 106 | if (w >= wordc) |
| 107 | aerr("parse_operand w %d, wordc %d\n", w, wordc); |
| 108 | |
| 109 | for (i = w; i < wordc; i++) { |
| 110 | len = strlen(words[i]); |
| 111 | if (words[i][len - 1] == ',') { |
| 112 | words[i][len - 1] = 0; |
| 113 | wordc = i + 1; |
| 114 | break; |
| 115 | } |
| 116 | } |
| 117 | |
| 118 | if (cls == OPC_JMP || cls == OPC_JCC) { |
| 119 | const char *label; |
| 120 | |
| 121 | if (wordc - w == 3 && IS(words[w + 1], "ptr")) |
| 122 | label = words[w + 2]; |
| 123 | else if (wordc - w == 2 && IS(words[w], "short")) |
| 124 | label = words[w + 1]; |
| 125 | else if (wordc - w == 1) |
| 126 | label = words[w]; |
| 127 | else |
| 128 | aerr("jump parse error"); |
| 129 | |
| 130 | opr->type = OPT_LABEL; |
| 131 | strcpy(opr->name, label); |
| 132 | return wordc; |
| 133 | } |
| 134 | |
| 135 | if (wordc - w >= 3) { |
| 136 | if (IS(words[w + 1], "ptr")) { |
| 137 | if (IS(words[w], "dword")) |
| 138 | opr->lmod = OPRM_DWORD; |
| 139 | else if (IS(words[w], "word")) |
| 140 | opr->lmod = OPRM_WORD; |
| 141 | else if (IS(words[w], "byte")) |
| 142 | opr->lmod = OPRM_BYTE; |
| 143 | else |
| 144 | aerr("type parsing failed\n"); |
| 145 | w += 2; |
| 146 | } |
| 147 | } |
| 148 | |
| 149 | if (wordc - w == 2 && IS(words[w], "offset")) { |
| 150 | opr->type = OPT_LABEL; |
| 151 | strcpy(opr->name, words[w + 1]); |
| 152 | return wordc; |
| 153 | } |
| 154 | |
| 155 | if (wordc - w != 1) |
| 156 | aerr("parse_operand 1 word expected\n"); |
| 157 | |
| 158 | len = strlen(words[w]); |
| 159 | |
| 160 | if (words[w][0] == '[') { |
| 161 | opr->type = OPT_REGMEM; |
| 162 | ret = sscanf(words[w], "[%256s]", opr->name); |
| 163 | if (ret != 1) |
| 164 | aerr("[] parse failure\n"); |
| 165 | return wordc; |
| 166 | } |
| 167 | else if (('0' <= words[w][0] && words[w][0] <= '9') |
| 168 | || words[w][0] == '-') |
| 169 | { |
| 170 | opr->type = OPT_CONST; |
| 171 | i = 0; |
| 172 | if (len > 1 && words[w][0] == '0') |
| 173 | i = 1; |
| 174 | if (words[w][len - 1] == 'h') { |
| 175 | words[w][len - 1] = 0; |
| 176 | opr->val = strtoul(&words[w][i], &endp, 16); |
| 177 | } |
| 178 | else { |
| 179 | opr->val = strtoul(&words[w][i], &endp, 10); |
| 180 | } |
| 181 | if (*endp != 0) |
| 182 | aerr("const parse failed\n"); |
| 183 | return wordc; |
| 184 | } |
| 185 | |
| 186 | strcpy(opr->name, words[w]); |
| 187 | opr->type = OPT_REG; |
| 188 | return wordc; |
| 189 | } |
| 190 | |
| 191 | static const struct { |
| 192 | const char *name; |
| 193 | enum op_op op; |
| 194 | enum op_class cls; |
| 195 | int minopr; |
| 196 | int maxopr; |
| 197 | } op_table[] = { |
| 198 | { "push", OP_PUSH, OPC_DATA, 1, 1 }, |
| 199 | { "pop", OP_POP, OPC_DATA, 1, 1 }, |
| 200 | { "mov" , OP_MOV, OPC_DATA, 2, 2 }, |
| 201 | { "add", OP_ADD, OPC_DATA_FLAGS, 2, 2 }, |
| 202 | { "test", OP_TEST, OPC_DATA_FLAGS, 2, 2 }, |
| 203 | { "cmp", OP_CMP, OPC_DATA_FLAGS, 2, 2 }, |
| 204 | { "retn", OP_RET, OPC_JMP, 0, 1 }, |
| 205 | { "call", OP_CALL, OPC_JMP, 1, 1 }, |
| 206 | { "jmp", OP_JMP, OPC_JMP, 1, 1 }, |
| 207 | { "jo", OP_JO, OPC_JCC, 1, 1 }, // 70 OF=1 |
| 208 | { "jno", OP_JNO, OPC_JCC, 1, 1 }, // 71 OF=0 |
| 209 | { "jc", OP_JC, OPC_JCC, 1, 1 }, // 72 CF=1 |
| 210 | { "jb", OP_JC, OPC_JCC, 1, 1 }, // 72 |
| 211 | { "jnc", OP_JNC, OPC_JCC, 1, 1 }, // 73 CF=0 |
| 212 | { "jae", OP_JNC, OPC_JCC, 1, 1 }, // 73 |
| 213 | { "jz", OP_JZ, OPC_JCC, 1, 1 }, // 74 ZF=1 |
| 214 | { "je", OP_JZ, OPC_JCC, 1, 1 }, // 74 |
| 215 | { "jnz", OP_JNZ, OPC_JCC, 1, 1 }, // 75 ZF=0 |
| 216 | { "jne", OP_JNZ, OPC_JCC, 1, 1 }, // 75 |
| 217 | { "jbe", OP_JBE, OPC_JCC, 1, 1 }, // 76 CF=1 || ZF=1 |
| 218 | { "jna", OP_JBE, OPC_JCC, 1, 1 }, // 76 |
| 219 | { "ja", OP_JA, OPC_JCC, 1, 1 }, // 77 CF=0 && ZF=0 |
| 220 | { "jnbe", OP_JA, OPC_JCC, 1, 1 }, // 77 |
| 221 | { "js", OP_JS, OPC_JCC, 1, 1 }, // 78 SF=1 |
| 222 | { "jns", OP_JNS, OPC_JCC, 1, 1 }, // 79 SF=0 |
| 223 | { "jp", OP_JP, OPC_JCC, 1, 1 }, // 7a PF=1 |
| 224 | { "jpe", OP_JP, OPC_JCC, 1, 1 }, // 7a |
| 225 | { "jnp", OP_JNP, OPC_JCC, 1, 1 }, // 7b PF=0 |
| 226 | { "jpo", OP_JNP, OPC_JCC, 1, 1 }, // 7b |
| 227 | { "jl", OP_JL, OPC_JCC, 1, 1 }, // 7c SF!=OF |
| 228 | { "jnge", OP_JL, OPC_JCC, 1, 1 }, // 7c |
| 229 | { "jge", OP_JGE, OPC_JCC, 1, 1 }, // 7d SF=OF |
| 230 | { "jnl", OP_JGE, OPC_JCC, 1, 1 }, // 7d |
| 231 | { "jle", OP_JLE, OPC_JCC, 1, 1 }, // 7e ZF=1 || SF!=OF |
| 232 | { "jng", OP_JLE, OPC_JCC, 1, 1 }, // 7e |
| 233 | { "jg", OP_JG, OPC_JCC, 1, 1 }, // 7f ZF=0 && SF=OF |
| 234 | { "jnle", OP_JG, OPC_JCC, 1, 1 }, // 7f |
| 235 | }; |
| 236 | |
| 237 | static void parse_op(struct parsed_op *op, char words[16][256], int wordc) |
| 238 | { |
| 239 | int w = 1; |
| 240 | int opr; |
| 241 | int i; |
| 242 | |
| 243 | for (i = 0; i < ARRAY_SIZE(op_table); i++) { |
| 244 | if (!IS(words[0], op_table[i].name)) |
| 245 | continue; |
| 246 | |
| 247 | for (opr = 0; opr < op_table[i].minopr; opr++) { |
| 248 | w = parse_operand(&op->operand[opr], |
| 249 | words, wordc, w, op_table[i].cls); |
| 250 | } |
| 251 | |
| 252 | for (; w < wordc && opr < op_table[i].maxopr; opr++) { |
| 253 | w = parse_operand(&op->operand[opr], |
| 254 | words, wordc, w, op_table[i].cls); |
| 255 | } |
| 256 | |
| 257 | goto done; |
| 258 | } |
| 259 | |
| 260 | aerr("unhandled op: '%s'\n", words[0]); |
| 261 | |
| 262 | done: |
| 263 | if (w < wordc) |
| 264 | aerr("parse_op %s incomplete: %d/%d\n", |
| 265 | words[0], w, wordc); |
| 266 | |
| 267 | op->cls = op_table[i].cls; |
| 268 | op->op = op_table[i].op; |
| 269 | return; |
| 270 | } |
| 271 | |
| 272 | int gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt) |
| 273 | { |
| 274 | struct parsed_proto pp; |
| 275 | int ret; |
| 276 | int i; |
| 277 | |
| 278 | ret = proto_parse(fhdr, funcn, &pp); |
| 279 | if (ret) |
| 280 | return ret; |
| 281 | |
| 282 | fprintf(fout, "%s %s(", pp.ret_type, funcn); |
| 283 | for (i = 0; i < pp.argc; i++) { |
| 284 | if (i > 0) |
| 285 | fprintf(fout, ", "); |
| 286 | fprintf(fout, "%s a%d", pp.arg[i].type, i); |
| 287 | } |
| 288 | fprintf(fout, ")\n{\n"); |
| 289 | |
| 290 | |
| 291 | |
| 292 | fprintf(fout, "}\n\n"); |
| 293 | proto_release(&pp); |
| 294 | return 0; |
| 295 | } |
| 296 | |
| 297 | int main(int argc, char *argv[]) |
| 298 | { |
| 299 | FILE *fout, *fasm, *fhdr; |
| 300 | char line[256]; |
| 301 | char words[16][256]; |
| 302 | char func[256]; |
| 303 | int in_func = 0; |
| 304 | int pi = 0; |
| 305 | int len; |
| 306 | char *p; |
| 307 | int wordc; |
| 308 | |
| 309 | if (argc != 4) { |
| 310 | printf("usage:\n%s <.c> <.asm> <hdrf>\n", |
| 311 | argv[0]); |
| 312 | return 1; |
| 313 | } |
| 314 | |
| 315 | hdrfn = argv[3]; |
| 316 | fhdr = fopen(hdrfn, "r"); |
| 317 | my_assert_not(fhdr, NULL); |
| 318 | |
| 319 | asmfn = argv[2]; |
| 320 | fasm = fopen(asmfn, "r"); |
| 321 | my_assert_not(fasm, NULL); |
| 322 | |
| 323 | fout = fopen(argv[1], "w"); |
| 324 | my_assert_not(fout, NULL); |
| 325 | |
| 326 | |
| 327 | while (fgets(line, sizeof(line), fasm)) |
| 328 | { |
| 329 | asmln++; |
| 330 | |
| 331 | p = sskip(line); |
| 332 | if (*p == 0 || *p == ';') |
| 333 | continue; |
| 334 | |
| 335 | memset(words, 0, sizeof(words)); |
| 336 | for (wordc = 0; wordc < 16; wordc++) { |
| 337 | p = sskip(next_word(words[wordc], sizeof(words[0]), p)); |
| 338 | if (*p == 0 || *p == ';') { |
| 339 | wordc++; |
| 340 | break; |
| 341 | } |
| 342 | } |
| 343 | |
| 344 | if (wordc == 0) { |
| 345 | // shouldn't happen |
| 346 | awarn("wordc == 0?\n"); |
| 347 | continue; |
| 348 | } |
| 349 | |
| 350 | // don't care about this: |
| 351 | if (words[0][0] == '.' |
| 352 | || IS(words[0], "include") |
| 353 | || IS(words[0], "assume") || IS(words[1], "segment") |
| 354 | || IS(words[0], "align")) |
| 355 | { |
| 356 | continue; |
| 357 | } |
| 358 | |
| 359 | if (IS(words[1], "proc")) { |
| 360 | if (in_func) |
| 361 | aerr("proc '%s' while in_func '%s'?\n", |
| 362 | words[0], func); |
| 363 | strcpy(func, words[0]); |
| 364 | in_func = 1; |
| 365 | continue; |
| 366 | } |
| 367 | |
| 368 | if (IS(words[1], "endp")) { |
| 369 | if (!in_func) |
| 370 | aerr("endp '%s' while not in_func?\n", words[0]); |
| 371 | if (!IS(func, words[0])) |
| 372 | aerr("endp '%s' while in_func '%s'?\n", |
| 373 | words[0], func); |
| 374 | gen_func(fout, fhdr, func, pi); |
| 375 | in_func = 0; |
| 376 | func[0] = 0; |
| 377 | if (pi != 0) { |
| 378 | memset(&ops, 0, pi * sizeof(ops[0])); |
| 379 | memset(labels, 0, pi * sizeof(labels[0])); |
| 380 | pi = 0; |
| 381 | } |
| 382 | exit(1); |
| 383 | continue; |
| 384 | } |
| 385 | |
| 386 | if (IS(words[1], "=")) |
| 387 | // lots of work will be have to be done here, but for now.. |
| 388 | continue; |
| 389 | |
| 390 | if (pi >= ARRAY_SIZE(ops)) |
| 391 | aerr("too many ops\n"); |
| 392 | |
| 393 | p = strchr(words[0], ':'); |
| 394 | if (p != NULL) { |
| 395 | len = p - words[0]; |
| 396 | if (len > sizeof(labels[0]) - 1) |
| 397 | aerr("label too long: %d\n", len); |
| 398 | if (labels[pi][0] != 0) |
| 399 | aerr("dupe label?\n"); |
| 400 | memcpy(labels[pi], words[0], len); |
| 401 | labels[pi][len] = 0; |
| 402 | continue; |
| 403 | } |
| 404 | |
| 405 | parse_op(&ops[pi], words, wordc); |
| 406 | pi++; |
| 407 | |
| 408 | (void)proto_parse; |
| 409 | } |
| 410 | |
| 411 | fclose(fout); |
| 412 | fclose(fasm); |
| 413 | fclose(fhdr); |
| 414 | |
| 415 | return 0; |
| 416 | } |