From c36e914d6c0dd787c146395192fa14a7a2bd4218 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 29 Nov 2013 23:18:58 +0200 Subject: [PATCH] start work on asm->c translation --- tools/Makefile | 2 +- tools/mkbridge.c | 369 +++++----------------------------------- tools/protoparse.h | 289 +++++++++++++++++++++++++++++++ tools/translate.c | 416 +++++++++++++++++++++++++++++++++++++++++++++ 4 files changed, 745 insertions(+), 331 deletions(-) create mode 100644 tools/protoparse.h create mode 100644 tools/translate.c diff --git a/tools/Makefile b/tools/Makefile index 2f21042..d2fa856 100644 --- a/tools/Makefile +++ b/tools/Makefile @@ -1,6 +1,6 @@ CFLAGS += -Wall -ggdb -T = asmproc cmpmrg_text mkbridge +T = asmproc cmpmrg_text mkbridge translate all: $(T) diff --git a/tools/mkbridge.c b/tools/mkbridge.c index ae2aec3..ac25d38 100644 --- a/tools/mkbridge.c +++ b/tools/mkbridge.c @@ -8,247 +8,7 @@ #define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) #define IS(w, y) !strcmp(w, y) -static int find_protostr(char *dst, size_t dlen, FILE *fhdr, - const char *sym, int *pline) -{ - int line = 0; - char *p; - - rewind(fhdr); - - while (fgets(dst, dlen, fhdr)) - { - line++; - if (strstr(dst, sym) != NULL) - break; - } - *pline = line; - - if (feof(fhdr)) - return -1; - - p = dst + strlen(dst); - for (p--; p > dst && my_isblank(*p); --p) - *p = 0; - - return 0; -} - -static int get_regparm(char *dst, size_t dlen, char *p) -{ - int i, o; - - if (*p != '<') - return 0; - - for (o = 0, i = 1; o < dlen; i++) { - if (p[i] == 0) - return 0; - if (p[i] == '>') - break; - dst[o++] = p[i]; - } - dst[o] = 0; - return i + 1; -} - -// hmh.. -static const char *known_types[] = { - "const void *", - "void *", - "char *", - "FILE *", - "unsigned __int8", - "unsigned __int16", - "unsigned int", - "signed int", - "char", - "__int8", - "__int16", - "int", - "bool", - "void", - "BYTE", - "WORD", - "DWORD", - "HMODULE", - "HANDLE", - "HWND", - "LPCSTR", - "size_t", -}; - -static int check_type(const char *name) -{ - int i, l; - - for (i = 0; i < ARRAY_SIZE(known_types); i++) { - l = strlen(known_types[i]); - if (strncmp(known_types[i], name, l) == 0) - return l; - } - - return 0; -} - -/* args are always expanded to 32bit */ -static const char *map_reg(const char *reg) -{ - const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" }; - const char *regs_w[] = { "ax", "bx", "cx", "dx", "si", "di" }; - const char *regs_b[] = { "al", "bl", "cl", "dl" }; - int i; - - for (i = 0; i < ARRAY_SIZE(regs_w); i++) - if (IS(reg, regs_w[i])) - return regs_f[i]; - - for (i = 0; i < ARRAY_SIZE(regs_b); i++) - if (IS(reg, regs_b[i])) - return regs_f[i]; - - return reg; -} - -static const char *hdrfn; -static int pline = 0; - -static int parse_protostr(char *protostr, char **reglist, int *cnt_out, - int *is_stdcall) -{ - char regparm[16]; - char buf[256]; - char cconv[32]; - int xarg = 0; - int ret; - char *p; - - p = protostr; - if (p[0] == '/' && p[1] == '/') { - //printf("warning: decl for sym '%s' is commented out\n", sym); - p = sskip(p + 2); - } - - ret = check_type(p); - if (ret <= 0) { - printf("%s:%d:%ld: unhandled return in '%s'\n", - hdrfn, pline, (p - protostr) + 1, protostr); - return 1; - } - p += ret; - p = sskip(p); - - p = next_word(cconv, sizeof(cconv), p); - p = sskip(p); - if (cconv[0] == 0) { - printf("%s:%d:%ld: cconv missing\n", - hdrfn, pline, (p - protostr) + 1); - return 1; - } - if (IS(cconv, "__cdecl")) - *is_stdcall = 0; - else if (IS(cconv, "__stdcall")) - *is_stdcall = 1; - else if (IS(cconv, "__fastcall")) - *is_stdcall = 1; - else if (IS(cconv, "__thiscall")) - *is_stdcall = 1; - else if (IS(cconv, "__userpurge")) - *is_stdcall = 1; // in all cases seen.. - else if (IS(cconv, "__usercall")) - *is_stdcall = 0; // ..or is it? - else { - printf("%s:%d:%ld: unhandled cconv: '%s'\n", - hdrfn, pline, (p - protostr) + 1, cconv); - return 1; - } - - p = next_idt(buf, sizeof(buf), p); - p = sskip(p); - if (buf[0] == 0) { - printf("%s:%d:%ld: func name missing\n", - hdrfn, pline, (p - protostr) + 1); - return 1; - } - - ret = get_regparm(regparm, sizeof(regparm), p); - if (ret > 0) { - if (!IS(regparm, "eax") && !IS(regparm, "ax") - && !IS(regparm, "al")) - { - printf("%s:%d:%ld: bad regparm: %s\n", - hdrfn, pline, (p - protostr) + 1, regparm); - return 1; - } - p += ret; - p = sskip(p); - } - - if (*p != '(') { - printf("%s:%d:%ld: '(' expected, got '%c'\n", - hdrfn, pline, (p - protostr) + 1, *p); - return 1; - } - p++; - - while (1) { - p = sskip(p); - if (*p == ')') - break; - if (*p == ',') - p = sskip(p + 1); - - xarg++; - - ret = check_type(p); - if (ret <= 0) { - printf("%s:%d:%ld: unhandled type for arg%d\n", - hdrfn, pline, (p - protostr) + 1, xarg); - return 1; - } - p += ret; - p = sskip(p); - - p = next_idt(buf, sizeof(buf), p); - p = sskip(p); -#if 0 - if (buf[0] == 0) { - printf("%s:%d:%ld: idt missing for arg%d\n", - hdrfn, pline, (p - protostr) + 1, xarg); - return 1; - } -#endif - reglist[xarg - 1] = NULL; - - ret = get_regparm(regparm, sizeof(regparm), p); - if (ret > 0) { - p += ret; - p = sskip(p); - - reglist[xarg - 1] = strdup(map_reg(regparm)); - } - } - - if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) { - if (reglist[0] != NULL) { - printf("%s:%d: %s with arg1 spec %s?\n", - hdrfn, pline, cconv, reglist[0]); - } - reglist[0] = strdup("ecx"); - } - - if (xarg > 1 && IS(cconv, "__fastcall")) { - if (reglist[1] != NULL) { - printf("%s:%d: %s with arg2 spec %s?\n", - hdrfn, pline, cconv, reglist[1]); - } - reglist[1] = strdup("edx"); - } - - *cnt_out = xarg; - - return 0; -} +#include "protoparse.h" static int is_x86_reg_saved(const char *reg) { @@ -263,55 +23,47 @@ static int is_x86_reg_saved(const char *reg) return !nosave; } -static void out_toasm_x86(FILE *f, char *sym, char *reg_list[], int reg_cnt, - int is_stdcall) +static void out_toasm_x86(FILE *f, char *sym, struct parsed_proto *pp) { - int have_normal = 0; // normal args - int have_regs = 0; int must_save = 0; int sarg_ofs = 1; // stack offset to args, in DWORDs int args_repushed = 0; int i; - for (i = 0; i < reg_cnt; i++) { - if (reg_list[i] == NULL) { - have_normal++; - continue; - } - - have_regs++; - must_save |= is_x86_reg_saved(reg_list[i]); + for (i = 0; i < pp->argc; i++) { + if (pp->arg[i].reg != NULL) + must_save |= is_x86_reg_saved(pp->arg[i].reg); } fprintf(f, ".global _%s\n", sym); fprintf(f, "_%s:\n", sym); - if (!have_regs && !is_stdcall) { + if (pp->argc_reg == 0 && !pp->is_stdcall) { fprintf(f, "\tjmp %s\n\n", sym); return; } - if (!have_normal && !must_save && !is_stdcall) { + if (pp->argc_stack == 0 && !must_save && !pp->is_stdcall) { // load arg regs - for (i = 0; i < reg_cnt; i++) { + for (i = 0; i < pp->argc; i++) { fprintf(f, "\tmovl %d(%%esp), %%%s\n", - (i + sarg_ofs) * 4, reg_list[i]); + (i + sarg_ofs) * 4, pp->arg[i].reg); } fprintf(f, "\tjmp %s\n\n", sym); return; } // save the regs - for (i = 0; i < reg_cnt; i++) { - if (reg_list[i] != NULL && is_x86_reg_saved(reg_list[i])) { - fprintf(f, "\tpushl %%%s\n", reg_list[i]); + for (i = 0; i < pp->argc; i++) { + if (pp->arg[i].reg != NULL && is_x86_reg_saved(pp->arg[i].reg)) { + fprintf(f, "\tpushl %%%s\n", pp->arg[i].reg); sarg_ofs++; } } // reconstruct arg stack - for (i = reg_cnt - 1; i >= 0; i--) { - if (reg_list[i] == NULL) { + for (i = pp->argc - 1; i >= 0; i--) { + if (pp->arg[i].reg == NULL) { fprintf(f, "\tmovl %d(%%esp), %%eax\n", (i + sarg_ofs) * 4); fprintf(f, "\tpushl %%eax\n"); @@ -319,54 +71,42 @@ static void out_toasm_x86(FILE *f, char *sym, char *reg_list[], int reg_cnt, args_repushed++; } } - my_assert(args_repushed, have_normal); + my_assert(args_repushed, pp->argc_stack); // load arg regs - for (i = 0; i < reg_cnt; i++) { - if (reg_list[i] != NULL) { + for (i = 0; i < pp->argc; i++) { + if (pp->arg[i].reg != NULL) { fprintf(f, "\tmovl %d(%%esp), %%%s\n", - (i + sarg_ofs) * 4, reg_list[i]); + (i + sarg_ofs) * 4, pp->arg[i].reg); } } - fprintf(f, "\n\t# %s\n", is_stdcall ? "__stdcall" : "__cdecl"); + fprintf(f, "\n\t# %s\n", pp->is_stdcall ? "__stdcall" : "__cdecl"); fprintf(f, "\tcall %s\n\n", sym); - if (args_repushed && !is_stdcall) + if (args_repushed && !pp->is_stdcall) fprintf(f, "\tadd $%d,%%esp\n", args_repushed * 4); // restore regs - for (i = reg_cnt - 1; i >= 0; i--) { - if (reg_list[i] != NULL && is_x86_reg_saved(reg_list[i])) - fprintf(f, "\tpopl %%%s\n", reg_list[i]); + for (i = pp->argc - 1; i >= 0; i--) { + if (pp->arg[i].reg != NULL && is_x86_reg_saved(pp->arg[i].reg)) + fprintf(f, "\tpopl %%%s\n", pp->arg[i].reg); } fprintf(f, "\tret\n\n"); } -static void out_fromasm_x86(FILE *f, char *sym, char *reg_list[], int reg_cnt, - int is_stdcall) +static void out_fromasm_x86(FILE *f, char *sym, struct parsed_proto *pp) { - int have_normal = 0; // normal args - int have_regs = 0; int sarg_ofs = 1; // stack offset to args, in DWORDs int stack_args; int i; - for (i = 0; i < reg_cnt; i++) { - if (reg_list[i] == NULL) { - have_normal++; - continue; - } - - have_regs++; - } - - fprintf(f, "# %s\n", is_stdcall ? "__stdcall" : "__cdecl"); + fprintf(f, "# %s\n", pp->is_stdcall ? "__stdcall" : "__cdecl"); fprintf(f, ".global %s\n", sym); fprintf(f, "%s:\n", sym); - if (!have_regs && !is_stdcall) { + if (pp->argc_reg == 0 && !pp->is_stdcall) { fprintf(f, "\tjmp _%s\n\n", sym); return; } @@ -375,16 +115,16 @@ static void out_fromasm_x86(FILE *f, char *sym, char *reg_list[], int reg_cnt, sarg_ofs++; // construct arg stack - stack_args = have_normal; - for (i = reg_cnt - 1; i >= 0; i--) { - if (reg_list[i] == NULL) { + stack_args = pp->argc_stack; + for (i = pp->argc - 1; i >= 0; i--) { + if (pp->arg[i].reg == NULL) { fprintf(f, "\tmovl %d(%%esp), %%edx\n", (sarg_ofs + stack_args - 1) * 4); fprintf(f, "\tpushl %%edx\n"); stack_args--; } else { - fprintf(f, "\tpushl %%%s\n", reg_list[i]); + fprintf(f, "\tpushl %%%s\n", pp->arg[i].reg); } sarg_ofs++; } @@ -397,33 +137,18 @@ static void out_fromasm_x86(FILE *f, char *sym, char *reg_list[], int reg_cnt, fprintf(f, "\tpopl %%edx\n"); - if (is_stdcall && have_normal) - fprintf(f, "\tret $%d\n\n", have_normal * 4); + if (pp->is_stdcall && pp->argc_stack) + fprintf(f, "\tret $%d\n\n", pp->argc_stack * 4); else fprintf(f, "\tret\n\n"); } -static void free_reglist(char *reg_list[], int reg_cnt) -{ - int i; - - for (i = 0; i < reg_cnt; i++) { - if (reg_list[i] == NULL) { - free(reg_list[i]); - reg_list[i] = NULL; - } - } -} - int main(int argc, char *argv[]) { FILE *fout, *fsyms_to, *fsyms_from, *fhdr; - char protostr[256]; + struct parsed_proto pp; char line[256]; char sym[256]; - char *reg_list[16]; - int is_stdcall = 0; - int reg_cnt = 0; int ret; if (argc != 5) { @@ -454,20 +179,12 @@ int main(int argc, char *argv[]) if (sym[0] == 0 || sym[0] == ';' || sym[0] == '#') continue; - ret = find_protostr(protostr, sizeof(protostr), fhdr, - sym, &pline); - if (ret != 0) { - printf("%s: sym '%s' is missing\n", - hdrfn, sym); - goto out; - } - - ret = parse_protostr(protostr, reg_list, ®_cnt, &is_stdcall); + ret = proto_parse(fhdr, sym, &pp); if (ret) goto out; - out_toasm_x86(fout, sym, reg_list, reg_cnt, is_stdcall); - free_reglist(reg_list, reg_cnt); + out_toasm_x86(fout, sym, &pp); + proto_release(&pp); } fprintf(fout, "# from asm\n\n"); @@ -478,20 +195,12 @@ int main(int argc, char *argv[]) if (sym[0] == 0 || sym[0] == ';' || sym[0] == '#') continue; - ret = find_protostr(protostr, sizeof(protostr), fhdr, - sym, &pline); - if (ret != 0) { - printf("%s: sym '%s' is missing\n", - hdrfn, sym); - goto out; - } - - ret = parse_protostr(protostr, reg_list, ®_cnt, &is_stdcall); + ret = proto_parse(fhdr, sym, &pp); if (ret) goto out; - out_fromasm_x86(fout, sym, reg_list, reg_cnt, is_stdcall); - free_reglist(reg_list, reg_cnt); + out_fromasm_x86(fout, sym, &pp); + proto_release(&pp); } ret = 0; diff --git a/tools/protoparse.h b/tools/protoparse.h new file mode 100644 index 0000000..a42f38e --- /dev/null +++ b/tools/protoparse.h @@ -0,0 +1,289 @@ + +struct parsed_proto { + struct { + char *reg; + const char *type; + } arg[16]; + const char *ret_type; + int is_stdcall; + int argc; + int argc_stack; + int argc_reg; +}; + +static const char *hdrfn; +static int hdrfline = 0; + +static int find_protostr(char *dst, size_t dlen, FILE *fhdr, const char *sym) +{ + int line = 0; + char *p; + + rewind(fhdr); + + while (fgets(dst, dlen, fhdr)) + { + line++; + if (strstr(dst, sym) != NULL) + break; + } + hdrfline = line; + + if (feof(fhdr)) + return -1; + + p = dst + strlen(dst); + for (p--; p > dst && my_isblank(*p); --p) + *p = 0; + + return 0; +} + +static int get_regparm(char *dst, size_t dlen, char *p) +{ + int i, o; + + if (*p != '<') + return 0; + + for (o = 0, i = 1; o < dlen; i++) { + if (p[i] == 0) + return 0; + if (p[i] == '>') + break; + dst[o++] = p[i]; + } + dst[o] = 0; + return i + 1; +} + +// hmh.. +static const char *known_types[] = { + "const void *", + "void *", + "char *", + "FILE *", + "unsigned __int8", + "unsigned __int16", + "unsigned int", + "signed int", + "char", + "__int8", + "__int16", + "int", + "bool", + "void", + "BYTE", + "WORD", + "DWORD", + "HMODULE", + "HANDLE", + "HWND", + "LPCSTR", + "size_t", +}; + +static const char *check_type(const char *name) +{ + int i, l; + + for (i = 0; i < ARRAY_SIZE(known_types); i++) { + l = strlen(known_types[i]); + if (strncmp(known_types[i], name, l) == 0) + return known_types[i]; + } + + return NULL; +} + +/* args are always expanded to 32bit */ +static const char *map_reg(const char *reg) +{ + const char *regs_f[] = { "eax", "ebx", "ecx", "edx", "esi", "edi" }; + const char *regs_w[] = { "ax", "bx", "cx", "dx", "si", "di" }; + const char *regs_b[] = { "al", "bl", "cl", "dl" }; + int i; + + for (i = 0; i < ARRAY_SIZE(regs_w); i++) + if (IS(reg, regs_w[i])) + return regs_f[i]; + + for (i = 0; i < ARRAY_SIZE(regs_b); i++) + if (IS(reg, regs_b[i])) + return regs_f[i]; + + return reg; +} + +static int parse_protostr(char *protostr, struct parsed_proto *pp) +{ + char regparm[16]; + char buf[256]; + char cconv[32]; + const char *kt; + int xarg = 0; + int ret; + char *p; + int i; + + p = protostr; + if (p[0] == '/' && p[1] == '/') { + //printf("warning: decl for sym '%s' is commented out\n", sym); + p = sskip(p + 2); + } + + kt = check_type(p); + if (kt == NULL) { + printf("%s:%d:%ld: unhandled return in '%s'\n", + hdrfn, hdrfline, (p - protostr) + 1, protostr); + return 1; + } + pp->ret_type = kt; + p += strlen(kt); + p = sskip(p); + + p = next_word(cconv, sizeof(cconv), p); + p = sskip(p); + if (cconv[0] == 0) { + printf("%s:%d:%ld: cconv missing\n", + hdrfn, hdrfline, (p - protostr) + 1); + return 1; + } + if (IS(cconv, "__cdecl")) + pp->is_stdcall = 0; + else if (IS(cconv, "__stdcall")) + pp->is_stdcall = 1; + else if (IS(cconv, "__fastcall")) + pp->is_stdcall = 1; + else if (IS(cconv, "__thiscall")) + pp->is_stdcall = 1; + else if (IS(cconv, "__userpurge")) + pp->is_stdcall = 1; // in all cases seen.. + else if (IS(cconv, "__usercall")) + pp->is_stdcall = 0; // ..or is it? + else { + printf("%s:%d:%ld: unhandled cconv: '%s'\n", + hdrfn, hdrfline, (p - protostr) + 1, cconv); + return 1; + } + + p = next_idt(buf, sizeof(buf), p); + p = sskip(p); + if (buf[0] == 0) { + printf("%s:%d:%ld: func name missing\n", + hdrfn, hdrfline, (p - protostr) + 1); + return 1; + } + + ret = get_regparm(regparm, sizeof(regparm), p); + if (ret > 0) { + if (!IS(regparm, "eax") && !IS(regparm, "ax") + && !IS(regparm, "al")) + { + printf("%s:%d:%ld: bad regparm: %s\n", + hdrfn, hdrfline, (p - protostr) + 1, regparm); + return 1; + } + p += ret; + p = sskip(p); + } + + if (*p != '(') { + printf("%s:%d:%ld: '(' expected, got '%c'\n", + hdrfn, hdrfline, (p - protostr) + 1, *p); + return 1; + } + p++; + + while (1) { + p = sskip(p); + if (*p == ')') + break; + if (*p == ',') + p = sskip(p + 1); + + xarg++; + + kt = check_type(p); + if (kt == NULL) { + printf("%s:%d:%ld: unhandled type for arg%d\n", + hdrfn, hdrfline, (p - protostr) + 1, xarg); + return 1; + } + pp->arg[xarg - 1].type = kt; + p += strlen(kt); + p = sskip(p); + + p = next_idt(buf, sizeof(buf), p); + p = sskip(p); +#if 0 + if (buf[0] == 0) { + printf("%s:%d:%ld: idt missing for arg%d\n", + hdrfn, hdrfline, (p - protostr) + 1, xarg); + return 1; + } +#endif + pp->arg[xarg - 1].reg = NULL; + + ret = get_regparm(regparm, sizeof(regparm), p); + if (ret > 0) { + p += ret; + p = sskip(p); + + pp->arg[xarg - 1].reg = strdup(map_reg(regparm)); + } + } + + if (xarg > 0 && (IS(cconv, "__fastcall") || IS(cconv, "__thiscall"))) { + if (pp->arg[0].reg != NULL) { + printf("%s:%d: %s with arg1 spec %s?\n", + hdrfn, hdrfline, cconv, pp->arg[0].reg); + } + pp->arg[0].reg = strdup("ecx"); + } + + if (xarg > 1 && IS(cconv, "__fastcall")) { + if (pp->arg[1].reg != NULL) { + printf("%s:%d: %s with arg2 spec %s?\n", + hdrfn, hdrfline, cconv, pp->arg[1].reg); + } + pp->arg[1].reg = strdup("edx"); + } + + pp->argc = xarg; + + for (i = 0; i < pp->argc; i++) { + if (pp->arg[i].reg == NULL) + pp->argc_stack++; + else + pp->argc_reg++; + } + + return 0; +} + +static int proto_parse(FILE *fhdr, const char *sym, struct parsed_proto *pp) +{ + char protostr[256]; + int ret; + + memset(pp, 0, sizeof(*pp)); + + ret = find_protostr(protostr, sizeof(protostr), fhdr, sym); + if (ret != 0) { + printf("%s: sym '%s' is missing\n", hdrfn, sym); + return ret; + } + + return parse_protostr(protostr, pp); +} + +static void proto_release(struct parsed_proto *pp) +{ + int i; + + for (i = 0; i < pp->argc; i++) { + if (pp->arg[i].reg == NULL) + free(pp->arg[i].reg); + } +} diff --git a/tools/translate.c b/tools/translate.c new file mode 100644 index 0000000..cc3e4fd --- /dev/null +++ b/tools/translate.c @@ -0,0 +1,416 @@ +#include +#include +#include + +#include "my_assert.h" +#include "my_str.h" + +#define ARRAY_SIZE(x) (sizeof(x) / sizeof(x[0])) +#define IS(w, y) !strcmp(w, y) + +#include "protoparse.h" + +const char *asmfn; +static int asmln; + +#define awarn(fmt, ...) \ + printf("warning:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__) +#define aerr(fmt, ...) do { \ + printf("error:%s:%d: " fmt, asmfn, asmln, ##__VA_ARGS__); \ + exit(1); \ +} while (0) + +enum op_class { + OPC_UNSPEC, + OPC_RMD, /* removed or optimized out */ + OPC_DATA, /* data processing */ + OPC_DATA_FLAGS, /* data processing + sets flags */ + OPC_JMP, /* .. and call */ + OPC_JCC, +}; + +enum op_op { + OP_INVAL, + OP_PUSH, + OP_POP, + OP_MOV, + OP_RET, + OP_ADD, + OP_TEST, + OP_CMP, + OP_CALL, + OP_JMP, + OP_JO, + OP_JNO, + OP_JC, + OP_JNC, + OP_JZ, + OP_JNZ, + OP_JBE, + OP_JA, + OP_JS, + OP_JNS, + OP_JP, + OP_JNP, + OP_JL, + OP_JGE, + OP_JLE, + OP_JG, +}; + +enum opr_type { + OPT_UNSPEC, + OPT_REG, + OPT_REGMEM, + OPT_LABEL, + OPT_CONST, +}; + +enum opr_lenmod { + OPRM_UNSPEC, + OPRM_BYTE, + OPRM_WORD, + OPRM_DWORD, +}; + +#define MAX_OPERANDS 2 + +struct parsed_opr { + enum opr_type type; + enum opr_lenmod lmod; + unsigned int val; + char name[256]; +}; + +struct parsed_op { + enum op_class cls; + enum op_op op; + struct parsed_opr operand[MAX_OPERANDS]; + int operand_cnt; +}; + +#define MAX_OPS 1024 + +static struct parsed_op ops[MAX_OPS]; +static char labels[MAX_OPS][32]; + +const char *main_regs[] = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp" }; + +static int parse_operand(struct parsed_opr *opr, + char words[16][256], int wordc, int w, enum op_class cls) +{ + char *endp = NULL; + int ret, len; + int i; + + if (w >= wordc) + aerr("parse_operand w %d, wordc %d\n", w, wordc); + + for (i = w; i < wordc; i++) { + len = strlen(words[i]); + if (words[i][len - 1] == ',') { + words[i][len - 1] = 0; + wordc = i + 1; + break; + } + } + + if (cls == OPC_JMP || cls == OPC_JCC) { + const char *label; + + if (wordc - w == 3 && IS(words[w + 1], "ptr")) + label = words[w + 2]; + else if (wordc - w == 2 && IS(words[w], "short")) + label = words[w + 1]; + else if (wordc - w == 1) + label = words[w]; + else + aerr("jump parse error"); + + opr->type = OPT_LABEL; + strcpy(opr->name, label); + return wordc; + } + + if (wordc - w >= 3) { + if (IS(words[w + 1], "ptr")) { + if (IS(words[w], "dword")) + opr->lmod = OPRM_DWORD; + else if (IS(words[w], "word")) + opr->lmod = OPRM_WORD; + else if (IS(words[w], "byte")) + opr->lmod = OPRM_BYTE; + else + aerr("type parsing failed\n"); + w += 2; + } + } + + if (wordc - w == 2 && IS(words[w], "offset")) { + opr->type = OPT_LABEL; + strcpy(opr->name, words[w + 1]); + return wordc; + } + + if (wordc - w != 1) + aerr("parse_operand 1 word expected\n"); + + len = strlen(words[w]); + + if (words[w][0] == '[') { + opr->type = OPT_REGMEM; + ret = sscanf(words[w], "[%256s]", opr->name); + if (ret != 1) + aerr("[] parse failure\n"); + return wordc; + } + else if (('0' <= words[w][0] && words[w][0] <= '9') + || words[w][0] == '-') + { + opr->type = OPT_CONST; + i = 0; + if (len > 1 && words[w][0] == '0') + i = 1; + if (words[w][len - 1] == 'h') { + words[w][len - 1] = 0; + opr->val = strtoul(&words[w][i], &endp, 16); + } + else { + opr->val = strtoul(&words[w][i], &endp, 10); + } + if (*endp != 0) + aerr("const parse failed\n"); + return wordc; + } + + strcpy(opr->name, words[w]); + opr->type = OPT_REG; + return wordc; +} + +static const struct { + const char *name; + enum op_op op; + enum op_class cls; + int minopr; + int maxopr; +} op_table[] = { + { "push", OP_PUSH, OPC_DATA, 1, 1 }, + { "pop", OP_POP, OPC_DATA, 1, 1 }, + { "mov" , OP_MOV, OPC_DATA, 2, 2 }, + { "add", OP_ADD, OPC_DATA_FLAGS, 2, 2 }, + { "test", OP_TEST, OPC_DATA_FLAGS, 2, 2 }, + { "cmp", OP_CMP, OPC_DATA_FLAGS, 2, 2 }, + { "retn", OP_RET, OPC_JMP, 0, 1 }, + { "call", OP_CALL, OPC_JMP, 1, 1 }, + { "jmp", OP_JMP, OPC_JMP, 1, 1 }, + { "jo", OP_JO, OPC_JCC, 1, 1 }, // 70 OF=1 + { "jno", OP_JNO, OPC_JCC, 1, 1 }, // 71 OF=0 + { "jc", OP_JC, OPC_JCC, 1, 1 }, // 72 CF=1 + { "jb", OP_JC, OPC_JCC, 1, 1 }, // 72 + { "jnc", OP_JNC, OPC_JCC, 1, 1 }, // 73 CF=0 + { "jae", OP_JNC, OPC_JCC, 1, 1 }, // 73 + { "jz", OP_JZ, OPC_JCC, 1, 1 }, // 74 ZF=1 + { "je", OP_JZ, OPC_JCC, 1, 1 }, // 74 + { "jnz", OP_JNZ, OPC_JCC, 1, 1 }, // 75 ZF=0 + { "jne", OP_JNZ, OPC_JCC, 1, 1 }, // 75 + { "jbe", OP_JBE, OPC_JCC, 1, 1 }, // 76 CF=1 || ZF=1 + { "jna", OP_JBE, OPC_JCC, 1, 1 }, // 76 + { "ja", OP_JA, OPC_JCC, 1, 1 }, // 77 CF=0 && ZF=0 + { "jnbe", OP_JA, OPC_JCC, 1, 1 }, // 77 + { "js", OP_JS, OPC_JCC, 1, 1 }, // 78 SF=1 + { "jns", OP_JNS, OPC_JCC, 1, 1 }, // 79 SF=0 + { "jp", OP_JP, OPC_JCC, 1, 1 }, // 7a PF=1 + { "jpe", OP_JP, OPC_JCC, 1, 1 }, // 7a + { "jnp", OP_JNP, OPC_JCC, 1, 1 }, // 7b PF=0 + { "jpo", OP_JNP, OPC_JCC, 1, 1 }, // 7b + { "jl", OP_JL, OPC_JCC, 1, 1 }, // 7c SF!=OF + { "jnge", OP_JL, OPC_JCC, 1, 1 }, // 7c + { "jge", OP_JGE, OPC_JCC, 1, 1 }, // 7d SF=OF + { "jnl", OP_JGE, OPC_JCC, 1, 1 }, // 7d + { "jle", OP_JLE, OPC_JCC, 1, 1 }, // 7e ZF=1 || SF!=OF + { "jng", OP_JLE, OPC_JCC, 1, 1 }, // 7e + { "jg", OP_JG, OPC_JCC, 1, 1 }, // 7f ZF=0 && SF=OF + { "jnle", OP_JG, OPC_JCC, 1, 1 }, // 7f +}; + +static void parse_op(struct parsed_op *op, char words[16][256], int wordc) +{ + int w = 1; + int opr; + int i; + + for (i = 0; i < ARRAY_SIZE(op_table); i++) { + if (!IS(words[0], op_table[i].name)) + continue; + + for (opr = 0; opr < op_table[i].minopr; opr++) { + w = parse_operand(&op->operand[opr], + words, wordc, w, op_table[i].cls); + } + + for (; w < wordc && opr < op_table[i].maxopr; opr++) { + w = parse_operand(&op->operand[opr], + words, wordc, w, op_table[i].cls); + } + + goto done; + } + + aerr("unhandled op: '%s'\n", words[0]); + +done: + if (w < wordc) + aerr("parse_op %s incomplete: %d/%d\n", + words[0], w, wordc); + + op->cls = op_table[i].cls; + op->op = op_table[i].op; + return; +} + +int gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt) +{ + struct parsed_proto pp; + int ret; + int i; + + ret = proto_parse(fhdr, funcn, &pp); + if (ret) + return ret; + + fprintf(fout, "%s %s(", pp.ret_type, funcn); + for (i = 0; i < pp.argc; i++) { + if (i > 0) + fprintf(fout, ", "); + fprintf(fout, "%s a%d", pp.arg[i].type, i); + } + fprintf(fout, ")\n{\n"); + + + + fprintf(fout, "}\n\n"); + proto_release(&pp); + return 0; +} + +int main(int argc, char *argv[]) +{ + FILE *fout, *fasm, *fhdr; + char line[256]; + char words[16][256]; + char func[256]; + int in_func = 0; + int pi = 0; + int len; + char *p; + int wordc; + + if (argc != 4) { + printf("usage:\n%s <.c> <.asm> \n", + argv[0]); + return 1; + } + + hdrfn = argv[3]; + fhdr = fopen(hdrfn, "r"); + my_assert_not(fhdr, NULL); + + asmfn = argv[2]; + fasm = fopen(asmfn, "r"); + my_assert_not(fasm, NULL); + + fout = fopen(argv[1], "w"); + my_assert_not(fout, NULL); + + + while (fgets(line, sizeof(line), fasm)) + { + asmln++; + + p = sskip(line); + if (*p == 0 || *p == ';') + continue; + + memset(words, 0, sizeof(words)); + for (wordc = 0; wordc < 16; wordc++) { + p = sskip(next_word(words[wordc], sizeof(words[0]), p)); + if (*p == 0 || *p == ';') { + wordc++; + break; + } + } + + if (wordc == 0) { + // shouldn't happen + awarn("wordc == 0?\n"); + continue; + } + + // don't care about this: + if (words[0][0] == '.' + || IS(words[0], "include") + || IS(words[0], "assume") || IS(words[1], "segment") + || IS(words[0], "align")) + { + continue; + } + + if (IS(words[1], "proc")) { + if (in_func) + aerr("proc '%s' while in_func '%s'?\n", + words[0], func); + strcpy(func, words[0]); + in_func = 1; + continue; + } + + if (IS(words[1], "endp")) { + if (!in_func) + aerr("endp '%s' while not in_func?\n", words[0]); + if (!IS(func, words[0])) + aerr("endp '%s' while in_func '%s'?\n", + words[0], func); + gen_func(fout, fhdr, func, pi); + in_func = 0; + func[0] = 0; + if (pi != 0) { + memset(&ops, 0, pi * sizeof(ops[0])); + memset(labels, 0, pi * sizeof(labels[0])); + pi = 0; + } + exit(1); + continue; + } + + if (IS(words[1], "=")) + // lots of work will be have to be done here, but for now.. + continue; + + if (pi >= ARRAY_SIZE(ops)) + aerr("too many ops\n"); + + p = strchr(words[0], ':'); + if (p != NULL) { + len = p - words[0]; + if (len > sizeof(labels[0]) - 1) + aerr("label too long: %d\n", len); + if (labels[pi][0] != 0) + aerr("dupe label?\n"); + memcpy(labels[pi], words[0], len); + labels[pi][len] = 0; + continue; + } + + parse_op(&ops[pi], words, wordc); + pi++; + + (void)proto_parse; + } + + fclose(fout); + fclose(fasm); + fclose(fhdr); + + return 0; +} -- 2.39.5