From 93b5bd181600ddf5097d133c52cdc7b1d4a2c7c8 Mon Sep 17 00:00:00 2001 From: notaz Date: Fri, 1 May 2015 00:19:53 +0300 Subject: [PATCH] translate: support more dereference types --- tests/Makefile | 2 +- tests/deref.asm | 25 +++++ tests/deref.expect.c | 16 +++ tests/deref.seed.h | 11 +++ tools/protoparse.h | 85 +++++++++++----- tools/translate.c | 230 +++++++++++++++++++++++++------------------ 6 files changed, 251 insertions(+), 118 deletions(-) create mode 100644 tests/deref.asm create mode 100644 tests/deref.expect.c create mode 100644 tests/deref.seed.h diff --git a/tests/Makefile b/tests/Makefile index 81d5d99..83def3c 100644 --- a/tests/Makefile +++ b/tests/Makefile @@ -1,6 +1,6 @@ TESTS = reg_call1 reg_call2 reg_call3 reg_call_tail reg_save \ - varargs ops x87 x87_f x87_s + varargs ops x87 x87_f x87_s deref all: $(addsuffix .ok,$(TESTS)) diff --git a/tests/deref.asm b/tests/deref.asm new file mode 100644 index 0000000..8108a71 --- /dev/null +++ b/tests/deref.asm @@ -0,0 +1,25 @@ + +_text segment para public 'CODE' use32 + +sub_test proc near + push ebp + mov ebp, esp + push esi + mov esi, ptr_struct1 + push 1 + call dword ptr [esi] + mov eax, [esi+4] + push 2 + call dword ptr [eax+4] + pop esi + pop ebp + retn +sub_test endp + +_text ends + +_rdata segment para public 'DATA' use32 +ptr_struct1 dd 0 +_rdata ends + +; vim:expandtab diff --git a/tests/deref.expect.c b/tests/deref.expect.c new file mode 100644 index 0000000..3cd7785 --- /dev/null +++ b/tests/deref.expect.c @@ -0,0 +1,16 @@ +int sub_test() +{ + void (__stdcall *i_f0)(int); + int (__stdcall *i_f1)(int); + u32 eax; + u32 esi; + + esi = (u32)ptr_struct1; + i_f0 = (void *)*(u32 *)(esi); + i_f0(1); + eax = *(u32 *)(esi+4); + i_f1 = (void *)*(u32 *)(eax+4); + eax = i_f1(2); + return eax; +} + diff --git a/tests/deref.seed.h b/tests/deref.seed.h new file mode 100644 index 0000000..b67b457 --- /dev/null +++ b/tests/deref.seed.h @@ -0,0 +1,11 @@ +struct struct1 { + void (__stdcall *f0)(int a1); + struct struct2 *s1; +}; + +struct struct2 { + int d0; + int (__stdcall *f1)(int a1); +}; + +extern struct struct1 *ptr_struct1; diff --git a/tools/protoparse.h b/tools/protoparse.h index 308cccd..469c12f 100644 --- a/tools/protoparse.h +++ b/tools/protoparse.h @@ -20,7 +20,7 @@ struct parsed_type { struct parsed_proto_arg { char *reg; struct parsed_type type; - struct parsed_proto *fptr; + struct parsed_proto *pp; // fptr or struct void *datap; }; @@ -353,6 +353,54 @@ static int check_struct_arg(struct parsed_proto_arg *arg) return 0; } +static int parse_protostr(char *protostr, struct parsed_proto *pp); + +static int parse_arg(char **p_, struct parsed_proto_arg *arg, int xarg) +{ + char buf[256]; + char *p = *p_; + char *pe; + int ret; + + arg->pp = calloc(1, sizeof(*arg->pp)); + my_assert_not(arg->pp, NULL); + arg->pp->is_arg = 1; + + pe = p; + while (1) { + pe = strpbrk(pe, ",()"); + if (pe == NULL) + return -1; + if (*pe == ',' || *pe == ')') + break; + pe = strchr(pe, ')'); + if (pe == NULL) + return -1; + pe++; + } + + if (pe - p > sizeof(buf) - 1) + return -1; + memcpy(buf, p, pe - p); + buf[pe - p] = 0; + + ret = parse_protostr(buf, arg->pp); + if (ret < 0) + return -1; + + // we don't use actual names right now... + snprintf(arg->pp->name, sizeof(arg->pp->name), "a%d", xarg); + + if (!arg->type.is_struct) + // we'll treat it as void * for non-calls + arg->type.name = strdup("void *"); + arg->type.is_ptr = 1; + + p += ret; + *p_ = p; + return 0; +} + static int parse_protostr(char *protostr, struct parsed_proto *pp) { struct parsed_proto_arg *arg; @@ -360,8 +408,8 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) char buf[256]; char cconv[32]; int is_retreg; - int xarg = 0; char *p, *p1; + int xarg = 0; int i, l; int ret; @@ -416,8 +464,8 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) if (!strchr(p, ')')) { p = next_idt(buf, sizeof(buf), p); p = sskip(p); - if (buf[0] == 0) { - printf("%s:%d:%zd: var name missing\n", + if (!pp->is_arg && buf[0] == 0) { + printf("%s:%d:%zd: var name is missing\n", hdrfn, hdrfline, (p - protostr) + 1); return -1; } @@ -578,24 +626,15 @@ static int parse_protostr(char *protostr, struct parsed_proto *pp) } p = sskip(p + ret); - if (*p == '(') { - // func ptr - arg->fptr = calloc(1, sizeof(*arg->fptr)); - ret = parse_protostr(p1, arg->fptr); + if (*p == '(' || arg->type.is_struct) { + // func ptr or struct + ret = parse_arg(&p1, arg, xarg); if (ret < 0) { printf("%s:%d:%zd: funcarg parse failed\n", hdrfn, hdrfline, p1 - protostr); return -1; } - arg->fptr->is_arg = 1; - // we don't use actual names right now.. - snprintf(arg->fptr->name, - sizeof(arg->fptr->name), "a%d", xarg); - // we'll treat it as void * for non-calls - arg->type.name = strdup("void *"); - arg->type.is_ptr = 1; - - p = p1 + ret; + p = p1; } p = next_idt(buf, sizeof(buf), p); @@ -885,10 +924,10 @@ static void pp_copy_arg(struct parsed_proto_arg *d, d->type.name = strdup(s->type.name); my_assert_not(d->type.name, NULL); } - if (s->fptr != NULL) { - d->fptr = malloc(sizeof(*d->fptr)); - my_assert_not(d->fptr, NULL); - memcpy(d->fptr, s->fptr, sizeof(*d->fptr)); + if (s->pp != NULL) { + d->pp = malloc(sizeof(*d->pp)); + my_assert_not(d->pp, NULL); + memcpy(d->pp, s->pp, sizeof(*d->pp)); } } @@ -965,8 +1004,8 @@ static inline void proto_release(struct parsed_proto *pp) free(pp->arg[i].reg); if (pp->arg[i].type.name != NULL) free(pp->arg[i].type.name); - if (pp->arg[i].fptr != NULL) - free(pp->arg[i].fptr); + if (pp->arg[i].pp != NULL) + free(pp->arg[i].pp); } if (pp->ret_type.name != NULL) free(pp->ret_type.name); diff --git a/tools/translate.c b/tools/translate.c index 168cdc9..591090d 100644 --- a/tools/translate.c +++ b/tools/translate.c @@ -1782,7 +1782,6 @@ static void parse_stack_access(struct parsed_op *po, } else { bp_arg = parse_stack_el(name, ofs_reg, NULL, 0); - snprintf(g_comment, sizeof(g_comment), "%s", bp_arg); eq = equ_find(po, bp_arg, &offset); if (eq == NULL) ferr(po, "detected but missing eq\n"); @@ -1862,6 +1861,8 @@ static int stack_frame_access(struct parsed_op *po, parse_stack_access(po, name, ofs_reg, &offset, &stack_ra, &bp_arg, is_lea); + snprintf(g_comment, sizeof(g_comment), "%s", bp_arg); + if (offset > stack_ra) { arg_i = (offset - stack_ra - 4) / 4; @@ -1960,7 +1961,7 @@ static int stack_frame_access(struct parsed_op *po, } if (unaligned) - snprintf(g_comment, sizeof(g_comment), "%s unaligned", bp_arg); + strcat(g_comment, " unaligned"); // common problem guess_lmod_from_c_type(&tmp_lmod, &g_func_pp->arg[i].type); @@ -2538,7 +2539,7 @@ static const char *op_to_c(struct parsed_op *po) // note: this skips over calls and rm'd stuff assuming they're handled // so it's intended to use at one of final passes static int scan_for_pop(int i, int opcnt, int magic, int reg, - int depth, int flags_set) + int depth, int seen_noreturn, int flags_set) { struct parsed_op *po; int relevant; @@ -2554,10 +2555,12 @@ static int scan_for_pop(int i, int opcnt, int magic, int reg, if (po->flags & OPF_TAIL) { if (po->op == OP_CALL) { if (po->pp != NULL && po->pp->is_noreturn) - // assume no stack cleanup for noreturn - return 1; + seen_noreturn = 1; + else + return -1; } - return -1; // deadend + else + return -1; // deadend } if (po->flags & (OPF_RMD|OPF_DONE|OPF_FARG)) @@ -2569,7 +2572,7 @@ static int scan_for_pop(int i, int opcnt, int magic, int reg, for (j = 0; j < po->btj->count; j++) { check_i(po, po->btj->d[j].bt_i); ret |= scan_for_pop(po->btj->d[j].bt_i, opcnt, magic, reg, - depth, flags_set); + depth, seen_noreturn, flags_set); if (ret < 0) return ret; // dead end } @@ -2579,7 +2582,7 @@ static int scan_for_pop(int i, int opcnt, int magic, int reg, check_i(po, po->bt_i); if (po->flags & OPF_CJMP) { ret |= scan_for_pop(po->bt_i, opcnt, magic, reg, - depth, flags_set); + depth, seen_noreturn, flags_set); if (ret < 0) return ret; // dead end } @@ -2608,7 +2611,8 @@ static int scan_for_pop(int i, int opcnt, int magic, int reg, } } - return -1; + // for noreturn, assume msvc skipped stack cleanup + return seen_noreturn ? 1 : -1; } // scan for 'reg' pop backwards starting from i @@ -3352,7 +3356,8 @@ static void scan_fwd_set_flags(int i, int opcnt, int magic, int flags) } static const struct parsed_proto *try_recover_pp( - struct parsed_op *po, const struct parsed_opr *opr, int *search_instead) + struct parsed_op *po, const struct parsed_opr *opr, + int is_call, int *search_instead) { const struct parsed_proto *pp = NULL; char buf[256]; @@ -3391,10 +3396,12 @@ static const struct parsed_proto *try_recover_pp( if (arg == g_func_pp->argc) ferr(po, "stack arg %d not in prototype?\n", arg_i); - pp = g_func_pp->arg[arg].fptr; - if (pp == NULL) - ferr(po, "icall sa: arg%d is not a fptr?\n", arg + 1); - check_func_pp(po, pp, "icall arg"); + pp = g_func_pp->arg[arg].pp; + if (is_call) { + if (pp == NULL) + ferr(po, "icall arg: arg%d has no pp\n", arg + 1); + check_func_pp(po, pp, "icall arg"); + } } else if (opr->type == OPT_REGMEM && strchr(opr->name + 1, '[')) { // label[index] @@ -3475,14 +3482,14 @@ static void scan_for_call_type(int i, const struct parsed_opr *opr, } if (i == g_func_pp->argc) return; - pp = g_func_pp->arg[i].fptr; + pp = g_func_pp->arg[i].pp; if (pp == NULL) ferr(po, "icall: arg%d (%s) is not a fptr?\n", i + 1, g_func_pp->arg[i].reg); check_func_pp(po, pp, "icall reg-arg"); } else - pp = try_recover_pp(po, opr, NULL); + pp = try_recover_pp(po, opr, 1, NULL); if (*pp_found != NULL && pp != NULL && *pp_found != pp) { if (!IS((*pp_found)->ret_type.name, pp->ret_type.name) @@ -3619,6 +3626,13 @@ static int get_pp_arg_regmask_dst(const struct parsed_proto *pp) return regmask | mxAX; } +static int are_ops_same(struct parsed_op *po1, struct parsed_op *po2) +{ + return po1->op == po2->op && po1->operand_cnt == po2->operand_cnt + && memcmp(po1->operand, po2->operand, + sizeof(po1->operand[0]) * po1->operand_cnt) == 0; +} + static void resolve_branches_parse_calls(int opcnt) { static const struct { @@ -4026,10 +4040,6 @@ static int resolve_origin(int i, const struct parsed_opr *opr, struct label_ref *lr; int ret = 0; - if (ops[i].cc_scratch == magic) - return 0; - ops[i].cc_scratch = magic; - while (1) { if (g_labels[i] != NULL) { lr = &g_label_refs[i]; @@ -4058,10 +4068,9 @@ static int resolve_origin(int i, const struct parsed_opr *opr, continue; if (*op_i >= 0) { - if (*op_i == i) + if (*op_i == i || are_ops_same(&ops[*op_i], &ops[i])) return ret | 1; - // XXX: could check if the other op does the same return -1; } @@ -4080,10 +4089,6 @@ static int resolve_last_ref(int i, const struct parsed_opr *opr, struct label_ref *lr; int ret = 0; - if (ops[i].cc_scratch == magic) - return 0; - ops[i].cc_scratch = magic; - while (1) { if (g_labels[i] != NULL) { lr = &g_label_refs[i]; @@ -4337,16 +4342,103 @@ static int resolve_used_bits(int i, int opcnt, int reg, return 0; } +static const struct parsed_proto *resolve_deref(int i, int magic, + struct parsed_opr *opr, int level) +{ + struct parsed_opr opr_s = OPR_INIT(OPT_REG, OPLM_DWORD, 0); + const struct parsed_proto *pp = NULL; + int from_caller = 0; + char s_reg[4]; + int offset = 0; + int len = 0; + int j = -1; + int k = -1; + int reg; + int ret; + + ret = sscanf(opr->name, "%3s+%x%n", s_reg, &offset, &len); + if (ret != 2 || len != strlen(opr->name)) { + ret = sscanf(opr->name, "%3s%n", s_reg, &len); + if (ret != 1 || len != strlen(opr->name)) + return NULL; + } + + reg = char_array_i(regs_r32, ARRAY_SIZE(regs_r32), s_reg); + if (reg < 0) + return NULL; + + opr_s.reg = reg; + ret = resolve_origin(i, &opr_s, i + magic, &j, NULL); + if (ret != 1) + return NULL; + + if (ops[j].op == OP_MOV && ops[j].operand[1].type == OPT_REGMEM + && strlen(ops[j].operand[1].name) == 3 + && ops[j].operand[0].lmod == OPLM_DWORD + && ops[j].pp == NULL // no hint + && level == 0) + { + // allow one simple dereference (com/directx) + reg = char_array_i(regs_r32, ARRAY_SIZE(regs_r32), + ops[j].operand[1].name); + if (reg < 0) + return NULL; + opr_s.reg = reg; + ret = resolve_origin(j, &opr_s, j + magic, &k, NULL); + if (ret != 1) + return NULL; + j = k; + } + if (ops[j].op != OP_MOV || ops[j].operand[0].lmod != OPLM_DWORD) + return NULL; + + if (ops[j].pp != NULL) { + // type hint in asm + pp = ops[j].pp; + } + else if (ops[j].operand[1].type == OPT_REGMEM) { + pp = try_recover_pp(&ops[j], &ops[j].operand[1], 0, NULL); + if (pp == NULL) { + // maybe structure ptr in structure + pp = resolve_deref(j, magic, &ops[j].operand[1], level + 1); + } + } + else if (ops[j].operand[1].type == OPT_LABEL) + pp = proto_parse(g_fhdr, ops[j].operand[1].name, g_quiet_pp); + else if (ops[j].operand[1].type == OPT_REG) { + // maybe arg reg? + k = -1; + ret = resolve_origin(j, &ops[j].operand[1], i + magic, + &k, &from_caller); + if (ret != 1 && from_caller && k == -1 && g_func_pp != NULL) { + for (k = 0; k < g_func_pp->argc; k++) { + if (g_func_pp->arg[k].reg == NULL) + continue; + if (IS(g_func_pp->arg[k].reg, ops[j].operand[1].name)) { + pp = g_func_pp->arg[k].pp; + break; + } + } + } + } + + if (pp == NULL) + return NULL; + if (pp->is_func || pp->is_fptr || !pp->type.is_struct) { + if (offset != 0) + ferr(&ops[j], "expected struct, got '%s %s'\n", + pp->type.name, pp->name); + return NULL; + } + + return proto_lookup_struct(g_fhdr, pp->type.name, offset); +} + static const struct parsed_proto *resolve_icall(int i, int opcnt, int *pp_i, int *multi_src) { const struct parsed_proto *pp = NULL; int search_advice = 0; - int offset = -1; - char name[256]; - char s_reg[4]; - int reg, len; - int ret; *multi_src = 0; *pp_i = -1; @@ -4354,67 +4446,14 @@ static const struct parsed_proto *resolve_icall(int i, int opcnt, switch (ops[i].operand[0].type) { case OPT_REGMEM: // try to resolve struct member calls - ret = sscanf(ops[i].operand[0].name, "%3s+%x%n", - s_reg, &offset, &len); - if (ret == 2 && len == strlen(ops[i].operand[0].name)) - { - reg = char_array_i(regs_r32, ARRAY_SIZE(regs_r32), s_reg); - if (reg >= 0) { - struct parsed_opr opr = OPR_INIT(OPT_REG, OPLM_DWORD, reg); - int j = -1; - ret = resolve_origin(i, &opr, i + opcnt * 19, &j, NULL); - if (ret != 1) - break; - if (ops[j].op == OP_MOV && ops[j].operand[1].type == OPT_REGMEM - && ops[j].operand[0].lmod == OPLM_DWORD - && ops[j].pp == NULL) // no hint - { - // allow one simple dereference (directx) - reg = char_array_i(regs_r32, ARRAY_SIZE(regs_r32), - ops[j].operand[1].name); - if (reg < 0) - break; - struct parsed_opr opr2 = OPR_INIT(OPT_REG, OPLM_DWORD, reg); - int k = -1; - ret = resolve_origin(j, &opr2, j + opcnt * 19, &k, NULL); - if (ret != 1) - break; - j = k; - } - if (ops[j].op != OP_MOV) - break; - if (ops[j].operand[0].lmod != OPLM_DWORD) - break; - if (ops[j].pp != NULL) { - // type hint in asm - pp = ops[j].pp; - } - else if (ops[j].operand[1].type == OPT_REGMEM) { - // allow 'hello[ecx]' - assume array of same type items - ret = sscanf(ops[j].operand[1].name, "%[^[][e%2s]", - name, s_reg); - if (ret != 2) - break; - pp = proto_parse(g_fhdr, name, g_quiet_pp); - } - else if (ops[j].operand[1].type == OPT_LABEL) - pp = proto_parse(g_fhdr, ops[j].operand[1].name, g_quiet_pp); - else - break; - if (pp == NULL) - break; - if (pp->is_func || pp->is_fptr || !pp->type.is_struct) { - pp = NULL; - break; - } - pp = proto_lookup_struct(g_fhdr, pp->type.name, offset); - } + pp = resolve_deref(i, i + opcnt * 19, &ops[i].operand[0], 0); + if (pp != NULL) break; - } // fallthrough case OPT_LABEL: case OPT_OFFSET: - pp = try_recover_pp(&ops[i], &ops[i].operand[0], &search_advice); + pp = try_recover_pp(&ops[i], &ops[i].operand[0], + 1, &search_advice); if (!search_advice) break; // fallthrough @@ -5083,9 +5122,10 @@ static void reg_use_pass(int i, int opcnt, unsigned char *cbits, flags_set = OPF_RSAVE | OPF_DONE; } - ret = scan_for_pop(i + 1, opcnt, i + opcnt * 3, reg, 0, 0); + ret = scan_for_pop(i + 1, opcnt, i + opcnt * 3, reg, 0, 0, 0); if (ret == 1) { - scan_for_pop(i + 1, opcnt, i + opcnt * 4, reg, 0, flags_set); + scan_for_pop(i + 1, opcnt, i + opcnt * 4, + reg, 0, 0, flags_set); } else { ret = scan_for_pop_ret(i + 1, opcnt, po->operand[0].reg, 0); @@ -5291,9 +5331,11 @@ static void output_pp(FILE *fout, const struct parsed_proto *pp, for (i = 0; i < pp->argc; i++) { if (i > 0) fprintf(fout, ", "); - if (pp->arg[i].fptr != NULL && !(flags & OPP_SIMPLE_ARGS)) { + if (pp->arg[i].pp != NULL && pp->arg[i].pp->is_func + && !(flags & OPP_SIMPLE_ARGS)) + { // func pointer - output_pp(fout, pp->arg[i].fptr, 0); + output_pp(fout, pp->arg[i].pp, 0); } else if (pp->arg[i].type.is_retreg) { fprintf(fout, "u32 *r_%s", pp->arg[i].reg); @@ -7621,11 +7663,11 @@ static void gen_hdr_dep_pass(int i, int opcnt, unsigned char *cbits, if (po->flags & OPF_DONE) continue; - ret = scan_for_pop(i + 1, opcnt, i + opcnt * 2, reg, 0, 0); + ret = scan_for_pop(i + 1, opcnt, i + opcnt * 2, reg, 0, 0, 0); if (ret == 1) { regmask_save |= 1 << reg; po->flags |= OPF_RMD; - scan_for_pop(i + 1, opcnt, i + opcnt * 3, reg, 0, OPF_RMD); + scan_for_pop(i + 1, opcnt, i + opcnt * 3, reg, 0, 0, OPF_RMD); continue; } } -- 2.39.2