X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=tools%2Ftranslate.c;h=704d4bc85c83e4ecbcd676dd44442ac44e6ba1ae;hb=4e81a3a2e191ccf8498da6d23e96c03fc8b36c83;hp=c464758483246ff44704e4202a5be489bea2c0ae;hpb=9af2d373a75efbac33d111d4a820cbcb300e0efd;p=ia32rtools.git diff --git a/tools/translate.c b/tools/translate.c index c464758..704d4bc 100644 --- a/tools/translate.c +++ b/tools/translate.c @@ -37,7 +37,7 @@ static FILE *g_fhdr; #include "masm_tools.h" enum op_flags { - OPF_RMD = (1 << 0), /* removed or optimized out */ + OPF_RMD = (1 << 0), /* removed from code generation */ OPF_DATA = (1 << 1), /* data processing - writes to dst opr */ OPF_FLAGS = (1 << 2), /* sets flags */ OPF_JMP = (1 << 3), /* branch, call */ @@ -56,6 +56,7 @@ enum op_flags { OPF_32BIT = (1 << 16), /* 32bit division */ OPF_LOCK = (1 << 17), /* op has lock prefix */ OPF_VAPUSH = (1 << 18), /* vararg ptr push (as call arg) */ + OPF_DONE = (1 << 19), /* already fully handled by analysis */ }; enum op_op { @@ -112,7 +113,7 @@ enum op_op { // x87 // mmx OP_EMMS, - // mmx + // undefined OP_UD2, }; @@ -233,6 +234,7 @@ static int g_sp_frame; static int g_stack_frame_used; static int g_stack_fsz; static int g_ida_func_attr; +static int g_skip_func; static int g_allow_regfunc; static int g_quiet_pp; static int g_header_mode; @@ -247,6 +249,11 @@ static int g_header_mode; printf("%s:%d: note: [%s] '%s': " fmt, asmfn, (op_)->asmln, g_func, \ dump_op(op_), ##__VA_ARGS__) +#define ferr_assert(op_, cond) do { \ + if (!(cond)) ferr(op_, "assertion '%s' failed on ln :%d\n", #cond, \ + __LINE__); \ +} while (0) + const char *regs_r32[] = { "eax", "ebx", "ecx", "edx", "esi", "edi", "ebp", "esp", // not r32, but list here for easy parsing and printing @@ -522,7 +529,7 @@ static int guess_lmod_from_c_type(enum opr_lenmod *lmod, const struct parsed_type *c_type) { static const char *dword_types[] = { - "int", "_DWORD", "UINT_PTR", "DWORD", + "uint32_t", "int", "_DWORD", "UINT_PTR", "DWORD", "WPARAM", "LPARAM", "UINT", "__int32", "LONG", "HIMC", "BOOL", "size_t", "float", @@ -978,7 +985,8 @@ static void parse_op(struct parsed_op *op, char words[16][256], int wordc) } if (i == ARRAY_SIZE(op_table)) { - anote("unhandled op: '%s'\n", words[0]); + if (!g_skip_func) + aerr("unhandled op: '%s'\n", words[0]); i--; // OP_UD2 } w++; @@ -1168,7 +1176,7 @@ static void parse_op(struct parsed_op *op, char words[16][256], int wordc) && op->operand[0].reg == op->operand[1].reg && IS(op->operand[0].name, op->operand[1].name)) // ! ah, al.. { - op->flags |= OPF_RMD; + op->flags |= OPF_RMD | OPF_DONE; op->regmask_src = op->regmask_dst = 0; } break; @@ -1180,7 +1188,7 @@ static void parse_op(struct parsed_op *op, char words[16][256], int wordc) char buf[16]; snprintf(buf, sizeof(buf), "%s+0", op->operand[0].name); if (IS(buf, op->operand[1].name)) - op->flags |= OPF_RMD; + op->flags |= OPF_RMD | OPF_DONE; } break; @@ -1714,7 +1722,7 @@ static void check_func_pp(struct parsed_op *po, // fptrs must use 32bit args, callsite might have no information and // lack a cast to smaller types, which results in incorrectly masked // args passed (callee may assume masked args, it does on ARM) - if (!pp->is_oslib) { + if (!pp->is_osinc) { for (i = 0; i < pp->argc; i++) { ret = guess_lmod_from_c_type(&tmp_lmod, &pp->arg[i].type); if (ret && tmp_lmod != OPLM_DWORD) @@ -2155,7 +2163,7 @@ static int scan_for_pop(int i, int opcnt, const char *reg, return -1; // deadend } - if ((po->flags & OPF_RMD) + if ((po->flags & (OPF_RMD|OPF_DONE)) || (po->op == OP_PUSH && po->p_argnum != 0)) // arg push continue; @@ -2231,12 +2239,13 @@ static int scan_for_pop_ret(int i, int opcnt, const char *reg, continue; for (j = i - 1; j >= 0; j--) { - if (ops[j].flags & OPF_RMD) + if (ops[j].flags & (OPF_RMD|OPF_DONE)) continue; if (ops[j].flags & OPF_JMP) return -1; - if (ops[j].op == OP_POP && ops[j].operand[0].type == OPT_REG + if (ops[j].op == OP_POP && ops[j].datap == NULL + && ops[j].operand[0].type == OPT_REG && IS(ops[j].operand[0].name, reg)) { found = 1; @@ -2252,7 +2261,6 @@ static int scan_for_pop_ret(int i, int opcnt, const char *reg, return found ? 0 : -1; } -// XXX: merge with scan_for_pop? static void scan_for_pop_const(int i, int opcnt) { int j; @@ -2264,9 +2272,10 @@ static void scan_for_pop_const(int i, int opcnt) break; } - if (!(ops[j].flags & OPF_RMD) && ops[j].op == OP_POP) + if (ops[j].op == OP_POP && !(ops[j].flags & (OPF_RMD|OPF_DONE))) { - ops[i].flags |= OPF_RMD; + ops[i].flags |= OPF_RMD | OPF_DONE; + ops[j].flags |= OPF_DONE; ops[j].datap = &ops[i]; break; } @@ -2311,7 +2320,7 @@ static void scan_propagate_df(int i, int opcnt) break; if (po->op == OP_CLD) { - po->flags |= OPF_RMD; + po->flags |= OPF_RMD | OPF_DONE; return; } } @@ -2514,20 +2523,38 @@ static int scan_for_reg_clear(int i, int reg) return -1; } +static void patch_esp_adjust(struct parsed_op *po, int adj) +{ + ferr_assert(po, po->op == OP_ADD); + ferr_assert(po, IS(opr_name(po, 0), "esp")); + ferr_assert(po, po->operand[1].type == OPT_CONST); + + // this is a bit of a hack, but deals with use of + // single adj for multiple calls + po->operand[1].val -= adj; + po->flags |= OPF_RMD; + if (po->operand[1].val == 0) + po->flags |= OPF_DONE; + ferr_assert(po, (int)po->operand[1].val >= 0); +} + // scan for positive, constant esp adjust +// multipath case is preliminary static int scan_for_esp_adjust(int i, int opcnt, - unsigned int adj_expect, int *adj, int *multipath) + int adj_expect, int *adj, int *is_multipath, int do_update) { struct parsed_op *po; int first_pop = -1; - *adj = *multipath = 0; + *adj = *is_multipath = 0; for (; i < opcnt && *adj < adj_expect; i++) { - po = &ops[i]; - if (g_labels[i] != NULL) - *multipath = 1; + *is_multipath = 1; + + po = &ops[i]; + if (po->flags & OPF_DONE) + continue; if (po->op == OP_ADD && po->operand[0].reg == xSP) { if (po->operand[1].type != OPT_CONST) @@ -2535,23 +2562,39 @@ static int scan_for_esp_adjust(int i, int opcnt, *adj += po->operand[1].val; if (*adj & 3) ferr(&ops[i], "unaligned esp adjust: %x\n", *adj); + if (do_update) { + if (!*is_multipath) + patch_esp_adjust(po, adj_expect); + else + po->flags |= OPF_RMD; + } return i; } - else if (po->op == OP_PUSH && !(po->flags & OPF_RMD)) { + else if (po->op == OP_PUSH) { //if (first_pop == -1) // first_pop = -2; // none *adj -= lmod_bytes(po, po->operand[0].lmod); } - else if (po->op == OP_POP && !(po->flags & OPF_RMD)) { - // seems like msvc only uses 'pop ecx' for stack realignment.. - if (po->operand[0].type != OPT_REG || po->operand[0].reg != xCX) - break; - if (first_pop == -1 && *adj >= 0) - first_pop = i; + else if (po->op == OP_POP) { + if (!(po->flags & OPF_DONE)) { + // seems like msvc only uses 'pop ecx' for stack realignment.. + if (po->operand[0].type != OPT_REG || po->operand[0].reg != xCX) + break; + if (first_pop == -1 && *adj >= 0) + first_pop = i; + } + if (do_update && *adj >= 0) { + po->flags |= OPF_RMD; + if (!*is_multipath) + po->flags |= OPF_DONE; + } + *adj += lmod_bytes(po, po->operand[0].lmod); } else if (po->flags & (OPF_JMP|OPF_TAIL)) { if (po->op == OP_JMP && po->btj == NULL) { + if (po->bt_i <= i) + break; i = po->bt_i - 1; continue; } @@ -2561,6 +2604,7 @@ static int scan_for_esp_adjust(int i, int opcnt, break; if (po->pp != NULL && po->pp->is_stdcall) break; + // assume it's another cdecl call } } @@ -2780,13 +2824,13 @@ static void scan_prologue_epilogue(int opcnt) && IS(opr_name(&ops[1], 1), "esp")) { g_bp_frame = 1; - ops[0].flags |= OPF_RMD; - ops[1].flags |= OPF_RMD; + ops[0].flags |= OPF_RMD | OPF_DONE; + ops[1].flags |= OPF_RMD | OPF_DONE; i = 2; if (ops[2].op == OP_SUB && IS(opr_name(&ops[2], 0), "esp")) { g_stack_fsz = opr_const(&ops[2], 1); - ops[2].flags |= OPF_RMD; + ops[2].flags |= OPF_RMD | OPF_DONE; i++; } else { @@ -2794,7 +2838,7 @@ static void scan_prologue_epilogue(int opcnt) i = 2; while (ops[i].op == OP_PUSH && IS(opr_name(&ops[i], 0), "ecx")) { g_stack_fsz += 4; - ops[i].flags |= OPF_RMD; + ops[i].flags |= OPF_RMD | OPF_DONE; ecx_push++; i++; } @@ -2805,9 +2849,9 @@ static void scan_prologue_epilogue(int opcnt) && IS(opr_name(&ops[i + 1], 0), "__alloca_probe")) { g_stack_fsz += ops[i].operand[1].val; - ops[i].flags |= OPF_RMD; + ops[i].flags |= OPF_RMD | OPF_DONE; i++; - ops[i].flags |= OPF_RMD; + ops[i].flags |= OPF_RMD | OPF_DONE; i++; } } @@ -2827,7 +2871,7 @@ static void scan_prologue_epilogue(int opcnt) if ((ops[j].op == OP_POP && IS(opr_name(&ops[j], 0), "ebp")) || ops[j].op == OP_LEAVE) { - ops[j].flags |= OPF_RMD; + ops[j].flags |= OPF_RMD | OPF_DONE; } else if (!(g_ida_func_attr & IDAFA_NORETURN)) ferr(&ops[j], "'pop ebp' expected\n"); @@ -2837,7 +2881,7 @@ static void scan_prologue_epilogue(int opcnt) && IS(opr_name(&ops[j - 1], 0), "esp") && IS(opr_name(&ops[j - 1], 1), "ebp")) { - ops[j - 1].flags |= OPF_RMD; + ops[j - 1].flags |= OPF_RMD | OPF_DONE; } else if (ops[j].op != OP_LEAVE && !(g_ida_func_attr & IDAFA_NORETURN)) @@ -2862,7 +2906,7 @@ static void scan_prologue_epilogue(int opcnt) // non-bp frame i = 0; while (ops[i].op == OP_PUSH && IS(opr_name(&ops[i], 0), "ecx")) { - ops[i].flags |= OPF_RMD; + ops[i].flags |= OPF_RMD | OPF_DONE; g_stack_fsz += 4; ecx_push++; i++; @@ -2875,7 +2919,7 @@ static void scan_prologue_epilogue(int opcnt) && ops[i].operand[1].type == OPT_CONST) { g_stack_fsz = ops[i].operand[1].val; - ops[i].flags |= OPF_RMD; + ops[i].flags |= OPF_RMD | OPF_DONE; esp_sub = 1; break; } @@ -2894,7 +2938,7 @@ static void scan_prologue_epilogue(int opcnt) while (i > 0 && j > 0) { i--; if (ops[i].op == OP_PUSH) { - ops[i].flags &= ~OPF_RMD; + ops[i].flags &= ~(OPF_RMD | OPF_DONE); j--; } } @@ -2939,12 +2983,12 @@ static void scan_prologue_epilogue(int opcnt) && ops[j].operand[1].type == OPT_CONST) { /* add esp, N */ - ecx_push -= ops[j].operand[1].val / 4 - 1; + l += ops[j].operand[1].val / 4 - 1; } else ferr(&ops[j], "'pop ecx' expected\n"); - ops[j].flags |= OPF_RMD; + ops[j].flags |= OPF_RMD | OPF_DONE; j--; } if (l != ecx_push) @@ -2960,7 +3004,7 @@ static void scan_prologue_epilogue(int opcnt) || ops[j].operand[1].val != g_stack_fsz) ferr(&ops[j], "'add esp' expected\n"); - ops[j].flags |= OPF_RMD; + ops[j].flags |= OPF_RMD | OPF_DONE; ops[j].operand[1].val = 0; // hack for stack arg scanner found = 1; } @@ -3067,13 +3111,45 @@ static int try_resolve_const(int i, const struct parsed_opr *opr, return -1; } +static struct parsed_proto *process_call_early(int i, int opcnt, + int *adj_i) +{ + struct parsed_op *po = &ops[i]; + struct parsed_proto *pp; + int multipath = 0; + int adj = 0; + int ret; + + pp = po->pp; + if (pp == NULL || pp->is_vararg || pp->argc_reg != 0) + // leave for later + return NULL; + + // look for and make use of esp adjust + *adj_i = ret = -1; + if (!pp->is_stdcall && pp->argc_stack > 0) + ret = scan_for_esp_adjust(i + 1, opcnt, + pp->argc_stack * 4, &adj, &multipath, 0); + if (ret >= 0) { + if (pp->argc_stack > adj / 4) + return NULL; + if (multipath) + return NULL; + if (ops[ret].op == OP_POP && adj != 4) + return NULL; + } + + *adj_i = ret; + return pp; +} + static struct parsed_proto *process_call(int i, int opcnt) { struct parsed_op *po = &ops[i]; const struct parsed_proto *pp_c; struct parsed_proto *pp; const char *tmpname; - int j = 0, l = 0; + int adj = 0, multipath = 0; int ret, arg; tmpname = opr_name(po, 0); @@ -3081,13 +3157,13 @@ static struct parsed_proto *process_call(int i, int opcnt) if (pp == NULL) { // indirect call - pp_c = resolve_icall(i, opcnt, &l); + pp_c = resolve_icall(i, opcnt, &multipath); if (pp_c != NULL) { if (!pp_c->is_func && !pp_c->is_fptr) ferr(po, "call to non-func: %s\n", pp_c->name); pp = proto_clone(pp_c); my_assert_not(pp, NULL); - if (l) + if (multipath) // not resolved just to single func pp->is_fptr = 1; @@ -3108,18 +3184,19 @@ static struct parsed_proto *process_call(int i, int opcnt) my_assert_not(pp, NULL); pp->is_fptr = 1; - ret = scan_for_esp_adjust(i + 1, opcnt, ~0, &j, &l); - if (ret < 0 || j < 0) { + ret = scan_for_esp_adjust(i + 1, opcnt, + 32*4, &adj, &multipath, 0); + if (ret < 0 || adj < 0) { if (!g_allow_regfunc) ferr(po, "non-__cdecl indirect call unhandled yet\n"); pp->is_unresolved = 1; - j = 0; + adj = 0; } - j /= 4; - if (j > ARRAY_SIZE(pp->arg)) - ferr(po, "esp adjust too large: %d\n", j); + adj /= 4; + if (adj > ARRAY_SIZE(pp->arg)) + ferr(po, "esp adjust too large: %d\n", adj); pp->ret_type.name = strdup("int"); - pp->argc = pp->argc_stack = j; + pp->argc = pp->argc_stack = adj; for (arg = 0; arg < pp->argc; arg++) pp->arg[arg].type.name = strdup("int"); } @@ -3127,18 +3204,21 @@ static struct parsed_proto *process_call(int i, int opcnt) } // look for and make use of esp adjust + multipath = 0; ret = -1; if (!pp->is_stdcall && pp->argc_stack > 0) ret = scan_for_esp_adjust(i + 1, opcnt, - pp->argc_stack * 4, &j, &l); + pp->argc_stack * 4, &adj, &multipath, 0); if (ret >= 0) { if (pp->is_vararg) { - if (j / 4 < pp->argc_stack) - ferr(po, "esp adjust is too small: %x < %x\n", - j, pp->argc_stack * 4); + if (adj / 4 < pp->argc_stack) { + fnote(po, "(this call)\n"); + ferr(&ops[ret], "esp adjust is too small: %x < %x\n", + adj, pp->argc_stack * 4); + } // modify pp to make it have varargs as normal args arg = pp->argc; - pp->argc += j / 4 - pp->argc_stack; + pp->argc += adj / 4 - pp->argc_stack; for (; arg < pp->argc; arg++) { pp->arg[arg].type.name = strdup("int"); pp->argc_stack++; @@ -3146,29 +3226,14 @@ static struct parsed_proto *process_call(int i, int opcnt) if (pp->argc > ARRAY_SIZE(pp->arg)) ferr(po, "too many args for '%s'\n", tmpname); } - if (pp->argc_stack > j / 4) { + if (pp->argc_stack > adj / 4) { fnote(po, "(this call)\n"); ferr(&ops[ret], "stack tracking failed for '%s': %x %x\n", - tmpname, pp->argc_stack * 4, j); + tmpname, pp->argc_stack * 4, adj); } - ops[ret].flags |= OPF_RMD; - if (ops[ret].op == OP_POP) { - if (j > 4) { - // deal with multi-pop stack adjust - j = pp->argc_stack; - while (ops[ret].op == OP_POP && j > 0 && ret < opcnt) { - ops[ret].flags |= OPF_RMD; - j--; - ret++; - } - } - } - else if (!l) { - // a bit of a hack, but deals with use of - // single adj for multiple calls - ops[ret].operand[1].val -= j; - } + scan_for_esp_adjust(i + 1, opcnt, + pp->argc_stack * 4, &adj, &multipath, 1); } else if (pp->is_vararg) ferr(po, "missing esp_adjust for vararg func '%s'\n", @@ -3177,6 +3242,82 @@ static struct parsed_proto *process_call(int i, int opcnt) return pp; } +static int collect_call_args_early(struct parsed_op *po, int i, + struct parsed_proto *pp, int *regmask) +{ + int arg, ret; + int j; + + for (arg = 0; arg < pp->argc; arg++) + if (pp->arg[arg].reg == NULL) + break; + + // first see if it can be easily done + for (j = i; j > 0 && arg < pp->argc; ) + { + if (g_labels[j] != NULL) + return -1; + j--; + + if (ops[j].op == OP_CALL) + return -1; + else if (ops[j].op == OP_ADD && ops[j].operand[0].reg == xSP) + return -1; + else if (ops[j].op == OP_POP) + return -1; + else if (ops[j].flags & OPF_CJMP) + return -1; + else if (ops[j].op == OP_PUSH) { + if (ops[j].flags & (OPF_FARG|OPF_FARGNR)) + return -1; + ret = scan_for_mod(&ops[j], j + 1, i, 1); + if (ret >= 0) + return -1; + + if (pp->arg[arg].type.is_va_list) + return -1; + + // next arg + for (arg++; arg < pp->argc; arg++) + if (pp->arg[arg].reg == NULL) + break; + } + } + + if (arg < pp->argc) + return -1; + + // now do it + for (arg = 0; arg < pp->argc; arg++) + if (pp->arg[arg].reg == NULL) + break; + + for (j = i; j > 0 && arg < pp->argc; ) + { + j--; + + if (ops[j].op == OP_PUSH) + { + ops[j].p_argnext = -1; + ferr_assert(&ops[j], pp->arg[arg].datap == NULL); + pp->arg[arg].datap = &ops[j]; + + if (ops[j].operand[0].type == OPT_REG) + *regmask |= 1 << ops[j].operand[0].reg; + + ops[j].flags |= OPF_RMD | OPF_DONE | OPF_FARGNR | OPF_FARG; + ops[j].flags &= ~OPF_RSAVE; + + // next arg + for (arg++; arg < pp->argc; arg++) + if (pp->arg[arg].reg == NULL) + break; + } + } + + return 0; +} + static int collect_call_args_r(struct parsed_op *po, int i, struct parsed_proto *pp, int *regmask, int *save_arg_vars, int *arg_grp, int arg, int magic, int need_op_saving, int may_reuse) @@ -3270,8 +3411,7 @@ static int collect_call_args_r(struct parsed_op *po, int i, ferr(po, "arg collect %d/%d hit esp adjust of %d\n", arg, pp->argc, ops[j].operand[1].val); } - else if (ops[j].op == OP_POP && !(ops[j].flags & OPF_RMD) - && ops[j].datap == NULL) + else if (ops[j].op == OP_POP && !(ops[j].flags & OPF_DONE)) { if (pp->is_unresolved) break; @@ -3285,7 +3425,8 @@ static int collect_call_args_r(struct parsed_op *po, int i, may_reuse = 1; } - else if (ops[j].op == OP_PUSH && !(ops[j].flags & OPF_FARGNR)) + else if (ops[j].op == OP_PUSH + && !(ops[j].flags & (OPF_FARGNR|OPF_DONE))) { if (pp->is_unresolved && (ops[j].flags & OPF_RMD)) break; @@ -3353,7 +3494,7 @@ static int collect_call_args_r(struct parsed_op *po, int i, if (!g_func_pp->is_vararg || strstr(ops[k].operand[1].name, buf)) { - ops[k].flags |= OPF_RMD; + ops[k].flags |= OPF_RMD | OPF_DONE; ops[j].flags |= OPF_RMD | OPF_VAPUSH; save_args &= ~(1 << arg); reg = -1; @@ -3368,7 +3509,7 @@ static int collect_call_args_r(struct parsed_op *po, int i, ret = stack_frame_access(&ops[k], &ops[k].operand[1], buf, sizeof(buf), ops[k].operand[1].name, "", 1, 0); if (ret >= 0) { - ops[k].flags |= OPF_RMD; + ops[k].flags |= OPF_RMD | OPF_DONE; ops[j].flags |= OPF_RMD; ops[j].p_argpass = ret + 1; save_args &= ~(1 << arg); @@ -3562,17 +3703,69 @@ static void output_std_flags(FILE *fout, struct parsed_op *po, } } +enum { + OPP_FORCE_NORETURN = (1 << 0), + OPP_SIMPLE_ARGS = (1 << 1), + OPP_ALIGN = (1 << 2), +}; + static void output_pp_attrs(FILE *fout, const struct parsed_proto *pp, - int is_noreturn) + int flags) { + const char *cconv = ""; + if (pp->is_fastcall) - fprintf(fout, "__fastcall "); + cconv = "__fastcall "; else if (pp->is_stdcall && pp->argc_reg == 0) - fprintf(fout, "__stdcall "); - if (pp->is_noreturn || is_noreturn) + cconv = "__stdcall "; + + fprintf(fout, (flags & OPP_ALIGN) ? "%-16s" : "%s", cconv); + + if (pp->is_noreturn || (flags & OPP_FORCE_NORETURN)) fprintf(fout, "noreturn "); } +static void output_pp(FILE *fout, const struct parsed_proto *pp, + int flags) +{ + int i; + + fprintf(fout, (flags & OPP_ALIGN) ? "%-5s" : "%s ", + pp->ret_type.name); + if (pp->is_fptr) + fprintf(fout, "("); + output_pp_attrs(fout, pp, flags); + if (pp->is_fptr) + fprintf(fout, "*"); + fprintf(fout, "%s", pp->name); + if (pp->is_fptr) + fprintf(fout, ")"); + + fprintf(fout, "("); + for (i = 0; i < pp->argc; i++) { + if (i > 0) + fprintf(fout, ", "); + if (pp->arg[i].fptr != NULL && !(flags & OPP_SIMPLE_ARGS)) { + // func pointer + output_pp(fout, pp->arg[i].fptr, 0); + } + else if (pp->arg[i].type.is_retreg) { + fprintf(fout, "u32 *r_%s", pp->arg[i].reg); + } + else { + fprintf(fout, "%s", pp->arg[i].type.name); + if (!pp->is_fptr) + fprintf(fout, " a%d", i + 1); + } + } + if (pp->is_vararg) { + if (i > 0) + fprintf(fout, ", "); + fprintf(fout, "..."); + } + fprintf(fout, ")"); +} + static int get_pp_arg_regmask(const struct parsed_proto *pp) { int regmask = 0; @@ -3658,7 +3851,7 @@ static void gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt) po->bt_i = -1; po->btj = NULL; - if (po->flags & OPF_RMD) + if (po->flags & (OPF_RMD|OPF_DONE)) continue; if (po->op == OP_CALL) { @@ -3714,7 +3907,7 @@ static void gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt) { if (l == i + 1 && po->op == OP_JMP) { // yet another alignment type.. - po->flags |= OPF_RMD; + po->flags |= OPF_RMD|OPF_DONE; break; } add_label_ref(&g_label_refs[l], i); @@ -3742,7 +3935,7 @@ tailcall: // pass3: // - remove dead labels - // - process calls + // - process trivial calls for (i = 0; i < opcnt; i++) { if (g_labels[i] != NULL && g_label_refs[i].i == -1) { @@ -3751,10 +3944,47 @@ tailcall: } po = &ops[i]; - if (po->flags & OPF_RMD) + if (po->flags & (OPF_RMD|OPF_DONE)) continue; if (po->op == OP_CALL) + { + pp = process_call_early(i, opcnt, &j); + if (pp != NULL) { + if (!(po->flags & OPF_ATAIL)) + // since we know the args, try to collect them + if (collect_call_args_early(po, i, pp, ®mask) != 0) + pp = NULL; + } + + if (pp != NULL) { + if (j >= 0) { + // commit esp adjust + ops[j].flags |= OPF_RMD; + if (ops[j].op != OP_POP) + patch_esp_adjust(&ops[j], pp->argc_stack * 4); + else + ops[j].flags |= OPF_DONE; + } + + if (strstr(pp->ret_type.name, "int64")) + need_tmp64 = 1; + + po->flags |= OPF_DONE; + } + } + } + + // pass4: + // - process calls + // - handle push /pop pairs + for (i = 0; i < opcnt; i++) + { + po = &ops[i]; + if (po->flags & (OPF_RMD|OPF_DONE)) + continue; + + if (po->op == OP_CALL && !(po->flags & OPF_DONE)) { pp = process_call(i, opcnt); @@ -3767,18 +3997,22 @@ tailcall: if (strstr(pp->ret_type.name, "int64")) need_tmp64 = 1; } + else if (po->op == OP_PUSH && !(po->flags & OPF_FARG) + && !(po->flags & OPF_RSAVE) && po->operand[0].type == OPT_CONST) + scan_for_pop_const(i, opcnt); } - // pass4: + // pass5: // - find POPs for PUSHes, rm both // - scan for STD/CLD, propagate DF // - scan for all used registers // - find flag set ops for their users // - do unreselved calls // - declare indirect functions - for (i = 0; i < opcnt; i++) { + for (i = 0; i < opcnt; i++) + { po = &ops[i]; - if (po->flags & OPF_RMD) + if (po->flags & (OPF_RMD|OPF_DONE)) continue; if (po->op == OP_PUSH && (po->flags & OPF_RSAVE)) { @@ -3824,13 +4058,10 @@ tailcall: continue; } } - else if (po->operand[0].type == OPT_CONST) { - scan_for_pop_const(i, opcnt); - } } if (po->op == OP_STD) { - po->flags |= OPF_DF | OPF_RMD; + po->flags |= OPF_DF | OPF_RMD | OPF_DONE; scan_propagate_df(i + 1, opcnt); } @@ -3915,6 +4146,7 @@ tailcall: need_tmp64 = 1; } else if (po->op == OP_CALL) { + // note: resolved non-reg calls are OPF_DONE already pp = po->pp; if (pp == NULL) ferr(po, "NULL pp\n"); @@ -4008,14 +4240,14 @@ tailcall: need_tmp64 = 1; } else if (po->op == OP_CLD) - po->flags |= OPF_RMD; + po->flags |= OPF_RMD | OPF_DONE; if (po->op == OP_RCL || po->op == OP_RCR || po->op == OP_XCHG) { need_tmp_var = 1; } } - // pass4: + // pass6: // - confirm regmask_save, it might have been reduced if (regmask_save != 0) { @@ -4040,47 +4272,10 @@ tailcall: } // the function itself - fprintf(fout, "%s ", g_func_pp->ret_type.name); - output_pp_attrs(fout, g_func_pp, g_ida_func_attr & IDAFA_NORETURN); - fprintf(fout, "%s(", g_func_pp->name); - - for (i = 0; i < g_func_pp->argc; i++) { - if (i > 0) - fprintf(fout, ", "); - if (g_func_pp->arg[i].fptr != NULL) { - // func pointer.. - pp = g_func_pp->arg[i].fptr; - fprintf(fout, "%s (", pp->ret_type.name); - output_pp_attrs(fout, pp, 0); - fprintf(fout, "*a%d)(", i + 1); - for (j = 0; j < pp->argc; j++) { - if (j > 0) - fprintf(fout, ", "); - if (pp->arg[j].fptr) - ferr(ops, "nested fptr\n"); - fprintf(fout, "%s", pp->arg[j].type.name); - } - if (pp->is_vararg) { - if (j > 0) - fprintf(fout, ", "); - fprintf(fout, "..."); - } - fprintf(fout, ")"); - } - else if (g_func_pp->arg[i].type.is_retreg) { - fprintf(fout, "u32 *r_%s", g_func_pp->arg[i].reg); - } - else { - fprintf(fout, "%s a%d", g_func_pp->arg[i].type.name, i + 1); - } - } - if (g_func_pp->is_vararg) { - if (i > 0) - fprintf(fout, ", "); - fprintf(fout, "..."); - } - - fprintf(fout, ")\n{\n"); + ferr_assert(ops, !g_func_pp->is_fptr); + output_pp(fout, g_func_pp, + (g_ida_func_attr & IDAFA_NORETURN) ? OPP_FORCE_NORETURN : 0); + fprintf(fout, "\n{\n"); // declare indirect functions for (i = 0; i < opcnt; i++) { @@ -4114,15 +4309,9 @@ tailcall: else snprintf(pp->name, sizeof(pp->name), "icall%d", i); - fprintf(fout, " %s (", pp->ret_type.name); - output_pp_attrs(fout, pp, 0); - fprintf(fout, "*%s)(", pp->name); - for (j = 0; j < pp->argc; j++) { - if (j > 0) - fprintf(fout, ", "); - fprintf(fout, "%s a%d", pp->arg[j].type.name, j + 1); - } - fprintf(fout, ");\n"); + fprintf(fout, " "); + output_pp(fout, pp, OPP_SIMPLE_ARGS); + fprintf(fout, ";\n"); } } } @@ -5311,11 +5500,12 @@ struct func_prototype { int id; int argc_stack; int regmask_dep; - int has_ret:3; // -1, 0, 1: unresolved, no, yes + int has_ret:3; // -1, 0, 1: unresolved, no, yes unsigned int dep_resolved:1; unsigned int is_stdcall:1; struct func_proto_dep *dep_func; int dep_func_cnt; + const struct parsed_proto *pp; // seed pp, if any }; struct func_proto_dep { @@ -5328,6 +5518,15 @@ struct func_proto_dep { static struct func_prototype *hg_fp; static int hg_fp_cnt; +static struct scanned_var { + char name[NAMELEN]; + enum opr_lenmod lmod; + unsigned int is_seeded:1; + unsigned int is_c_str:1; + const struct parsed_proto *pp; // seed pp, if any +} *hg_vars; +static int hg_var_cnt; + static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, int count); @@ -5374,87 +5573,229 @@ static int hg_fp_cmp_id(const void *p1_, const void *p2_) } #endif -static void gen_hdr(const char *funcn, int opcnt) +// recursive register dep pass +// - track saved regs (part 2) +// - try to figure out arg-regs +// - calculate reg deps +static void gen_hdr_dep_pass(int i, int opcnt, unsigned char *cbits, + struct func_prototype *fp, int regmask_save, int regmask_dst, + int *regmask_dep, int *has_ret) { - const struct parsed_proto *pp_c; - struct parsed_proto *pp; - struct func_prototype *fp; struct func_proto_dep *dep; - struct parsed_data *pd; struct parsed_op *po; - const char *tmpname; - int regmask_save = 0; - int regmask_dst = 0; - int regmask_dep = 0; - int max_bp_offset = 0; - int has_ret = -1; int from_caller = 0; - int i, j, l, ret; - int depth, reg; - - if ((hg_fp_cnt & 0xff) == 0) { - hg_fp = realloc(hg_fp, sizeof(hg_fp[0]) * (hg_fp_cnt + 0x100)); - my_assert_not(hg_fp, NULL); - memset(hg_fp + hg_fp_cnt, 0, sizeof(hg_fp[0]) * 0x100); - } - - fp = &hg_fp[hg_fp_cnt]; - snprintf(fp->name, sizeof(fp->name), "%s", funcn); - fp->id = hg_fp_cnt; - fp->argc_stack = -1; - hg_fp_cnt++; - - // perhaps already in seed header? - pp_c = proto_parse(g_fhdr, funcn, 1); - if (pp_c != NULL) { - fp->argc_stack = pp_c->argc_stack; - fp->regmask_dep = get_pp_arg_regmask(pp_c); - fp->has_ret = !IS(pp_c->ret_type.name, "void"); - return; - } - - g_bp_frame = g_sp_frame = g_stack_fsz = 0; - g_stack_frame_used = 0; - - // pass1: - // - handle ebp/esp frame, remove ops related to it - scan_prologue_epilogue(opcnt); + int depth; + int j, l; + int reg; + int ret; - // pass2: - // - collect calls - // - resolve all branches - for (i = 0; i < opcnt; i++) + for (; i < opcnt; i++) { - po = &ops[i]; - po->bt_i = -1; - po->btj = NULL; - - if (po->flags & OPF_RMD) - continue; + if (cbits[i >> 3] & (1 << (i & 7))) + return; + cbits[i >> 3] |= (1 << (i & 7)); - if (po->op == OP_CALL) { - tmpname = opr_name(po, 0); - pp = NULL; - if (po->operand[0].type == OPT_LABEL) { - hg_fp_add_dep(fp, tmpname); + po = &ops[i]; - // perhaps a call to already known func? - pp_c = proto_parse(g_fhdr, tmpname, 1); - if (pp_c != NULL) - pp = proto_clone(pp_c); + if ((po->flags & OPF_JMP) && po->op != OP_CALL) { + if (po->btj != NULL) { + // jumptable + for (j = 0; j < po->btj->count; j++) { + gen_hdr_dep_pass(po->btj->d[j].bt_i, opcnt, cbits, fp, + regmask_save, regmask_dst, regmask_dep, has_ret); + } + return; } - else if (po->datap != NULL) { - pp = calloc(1, sizeof(*pp)); - my_assert_not(pp, NULL); - ret = parse_protostr(po->datap, pp); - if (ret < 0) - ferr(po, "bad protostr supplied: %s\n", (char *)po->datap); - free(po->datap); - po->datap = NULL; + if (po->bt_i < 0) { + ferr(po, "dead branch\n"); + return; } - if (pp != NULL && pp->is_noreturn) - po->flags |= OPF_TAIL; + + if (po->flags & OPF_CJMP) { + gen_hdr_dep_pass(po->bt_i, opcnt, cbits, fp, + regmask_save, regmask_dst, regmask_dep, has_ret); + } + else { + i = po->bt_i - 1; + } + continue; + } + + if (po->flags & OPF_FARG) + /* (just calculate register deps) */; + else if (po->op == OP_PUSH && po->operand[0].type == OPT_REG) + { + reg = po->operand[0].reg; + if (reg < 0) + ferr(po, "reg not set for push?\n"); + + if (po->flags & OPF_RSAVE) { + regmask_save |= 1 << reg; + continue; + } + if (po->flags & OPF_DONE) + continue; + + depth = 0; + ret = scan_for_pop(i + 1, opcnt, + po->operand[0].name, i + opcnt * 2, 0, &depth, 0); + if (ret == 1) { + regmask_save |= 1 << reg; + po->flags |= OPF_RMD; + scan_for_pop(i + 1, opcnt, + po->operand[0].name, i + opcnt * 3, 0, &depth, 1); + continue; + } + } + else if (po->flags & OPF_RMD) + continue; + else if (po->op == OP_CALL) { + po->regmask_dst |= 1 << xAX; + + dep = hg_fp_find_dep(fp, po->operand[0].name); + if (dep != NULL) + dep->regmask_live = regmask_save | regmask_dst; + } + else if (po->op == OP_RET) { + if (po->operand_cnt > 0) { + fp->is_stdcall = 1; + if (fp->argc_stack >= 0 + && fp->argc_stack != po->operand[0].val / 4) + ferr(po, "ret mismatch? (%d)\n", fp->argc_stack * 4); + fp->argc_stack = po->operand[0].val / 4; + } + } + + if (*has_ret != 0 && (po->flags & OPF_TAIL)) { + if (po->op == OP_CALL) { + j = i; + ret = 1; + } + else { + struct parsed_opr opr = { 0, }; + opr.type = OPT_REG; + opr.reg = xAX; + j = -1; + from_caller = 0; + ret = resolve_origin(i, &opr, i + opcnt * 4, &j, &from_caller); + } + + if (ret == -1 && from_caller) { + // unresolved eax - probably void func + *has_ret = 0; + } + else { + if (ops[j].op == OP_CALL) { + dep = hg_fp_find_dep(fp, po->operand[0].name); + if (dep != NULL) + dep->ret_dep = 1; + else + *has_ret = 1; + } + else + *has_ret = 1; + } + } + + l = regmask_save | regmask_dst; + if (g_bp_frame && !(po->flags & OPF_EBP_S)) + l |= 1 << xBP; + + l = po->regmask_src & ~l; +#if 0 + if (l) + fnote(po, "dep |= %04x, dst %04x, save %04x (f %x)\n", + l, regmask_dst, regmask_save, po->flags); +#endif + *regmask_dep |= l; + regmask_dst |= po->regmask_dst; + + if (po->flags & OPF_TAIL) + return; + } +} + +static void gen_hdr(const char *funcn, int opcnt) +{ + int save_arg_vars[MAX_ARG_GRP] = { 0, }; + unsigned char cbits[MAX_OPS / 8]; + const struct parsed_proto *pp_c; + struct parsed_proto *pp; + struct func_prototype *fp; + struct parsed_data *pd; + struct parsed_op *po; + const char *tmpname; + int regmask_dummy = 0; + int regmask_dep; + int max_bp_offset = 0; + int has_ret; + int i, j, l, ret; + + if ((hg_fp_cnt & 0xff) == 0) { + hg_fp = realloc(hg_fp, sizeof(hg_fp[0]) * (hg_fp_cnt + 0x100)); + my_assert_not(hg_fp, NULL); + memset(hg_fp + hg_fp_cnt, 0, sizeof(hg_fp[0]) * 0x100); + } + + fp = &hg_fp[hg_fp_cnt]; + snprintf(fp->name, sizeof(fp->name), "%s", funcn); + fp->id = hg_fp_cnt; + fp->argc_stack = -1; + hg_fp_cnt++; + + // perhaps already in seed header? + fp->pp = proto_parse(g_fhdr, funcn, 1); + if (fp->pp != NULL) { + fp->argc_stack = fp->pp->argc_stack; + fp->is_stdcall = fp->pp->is_stdcall; + fp->regmask_dep = get_pp_arg_regmask(fp->pp); + fp->has_ret = !IS(fp->pp->ret_type.name, "void"); + return; + } + + g_bp_frame = g_sp_frame = g_stack_fsz = 0; + g_stack_frame_used = 0; + + // pass1: + // - handle ebp/esp frame, remove ops related to it + scan_prologue_epilogue(opcnt); + + // pass2: + // - collect calls + // - resolve all branches + for (i = 0; i < opcnt; i++) + { + po = &ops[i]; + po->bt_i = -1; + po->btj = NULL; + + if (po->flags & (OPF_RMD|OPF_DONE)) + continue; + + if (po->op == OP_CALL) { + tmpname = opr_name(po, 0); + pp = NULL; + if (po->operand[0].type == OPT_LABEL) { + hg_fp_add_dep(fp, tmpname); + + // perhaps a call to already known func? + pp_c = proto_parse(g_fhdr, tmpname, 1); + if (pp_c != NULL) + pp = proto_clone(pp_c); + } + else if (po->datap != NULL) { + pp = calloc(1, sizeof(*pp)); + my_assert_not(pp, NULL); + + ret = parse_protostr(po->datap, pp); + if (ret < 0) + ferr(po, "bad protostr supplied: %s\n", (char *)po->datap); + free(po->datap); + po->datap = NULL; + } + if (pp != NULL && pp->is_noreturn) + po->flags |= OPF_TAIL; po->pp = pp; continue; @@ -5501,7 +5842,6 @@ tailcall: // pass3: // - remove dead labels - // - process calls // - handle push /pop pairs for (i = 0; i < opcnt; i++) { @@ -5511,119 +5851,91 @@ tailcall: } po = &ops[i]; - if (po->flags & OPF_RMD) + if (po->flags & (OPF_RMD|OPF_DONE)) continue; - if (po->op == OP_CALL) { - pp = process_call(i, opcnt); - - if (!pp->is_unresolved && !(po->flags & OPF_ATAIL)) { - int regmask_dummy = 0, save_arg_vars[MAX_ARG_GRP] = { 0, }; - // since we know the args, collect them - collect_call_args(po, i, pp, ®mask_dummy, save_arg_vars, - i + opcnt * 2); - } - } - else if (po->op == OP_PUSH && po->operand[0].type == OPT_CONST) { + if (po->op == OP_PUSH && po->operand[0].type == OPT_CONST) scan_for_pop_const(i, opcnt); - } } // pass4: - // - track saved regs - // - try to figure out arg-regs + // - process trivial calls for (i = 0; i < opcnt; i++) { po = &ops[i]; - - if (po->flags & OPF_FARG) - /* (just calculate register deps) */; - else if (po->flags & OPF_RMD) + if (po->flags & (OPF_RMD|OPF_DONE)) continue; - else if (po->op == OP_PUSH && po->operand[0].type == OPT_REG) + + if (po->op == OP_CALL) { - reg = po->operand[0].reg; - if (reg < 0) - ferr(po, "reg not set for push?\n"); + pp = process_call_early(i, opcnt, &j); + if (pp != NULL) { + if (!(po->flags & OPF_ATAIL)) + // since we know the args, try to collect them + if (collect_call_args_early(po, i, pp, ®mask_dummy) != 0) + pp = NULL; + } - depth = 0; - ret = scan_for_pop(i + 1, opcnt, - po->operand[0].name, i + opcnt * 1, 0, &depth, 0); - if (ret == 1) { - regmask_save |= 1 << reg; - po->flags |= OPF_RMD; - scan_for_pop(i + 1, opcnt, - po->operand[0].name, i + opcnt * 2, 0, &depth, 1); - continue; + if (pp != NULL) { + if (j >= 0) { + // commit esp adjust + ops[j].flags |= OPF_RMD; + if (ops[j].op != OP_POP) + patch_esp_adjust(&ops[j], pp->argc_stack * 4); + else + ops[j].flags |= OPF_DONE; + } + + po->flags |= OPF_DONE; } + } + } + + // pass5: + // - track saved regs (simple) + // - process calls + for (i = 0; i < opcnt; i++) + { + po = &ops[i]; + if (po->flags & (OPF_RMD|OPF_DONE)) + continue; + + if (po->op == OP_PUSH && po->operand[0].type == OPT_REG) + { ret = scan_for_pop_ret(i + 1, opcnt, po->operand[0].name, 0); if (ret == 0) { - regmask_save |= 1 << reg; - po->flags |= OPF_RMD; + // regmask_save |= 1 << po->operand[0].reg; // do it later + po->flags |= OPF_RSAVE | OPF_RMD | OPF_DONE; scan_for_pop_ret(i + 1, opcnt, po->operand[0].name, OPF_RMD); - continue; } } - else if (po->op == OP_CALL) { - po->regmask_dst |= 1 << xAX; + else if (po->op == OP_CALL && !(po->flags & OPF_DONE)) + { + pp = process_call(i, opcnt); - dep = hg_fp_find_dep(fp, po->operand[0].name); - if (dep != NULL) - dep->regmask_live = regmask_save | regmask_dst; - } - else if (po->op == OP_RET) { - if (po->operand_cnt > 0) { - fp->is_stdcall = 1; - if (fp->argc_stack >= 0 - && fp->argc_stack != po->operand[0].val / 4) - ferr(po, "ret mismatch? (%d)\n", fp->argc_stack * 4); - fp->argc_stack = po->operand[0].val / 4; + if (!pp->is_unresolved && !(po->flags & OPF_ATAIL)) { + // since we know the args, collect them + ret = collect_call_args(po, i, pp, ®mask_dummy, save_arg_vars, + i + opcnt * 1); } } + } - if (has_ret != 0 && (po->flags & OPF_TAIL)) { - if (po->op == OP_CALL) { - j = i; - ret = 1; - } - else { - struct parsed_opr opr = { 0, }; - opr.type = OPT_REG; - opr.reg = xAX; - j = -1; - from_caller = 0; - ret = resolve_origin(i, &opr, i + opcnt * 3, &j, &from_caller); - } + // pass6 + memset(cbits, 0, sizeof(cbits)); + regmask_dep = 0; + has_ret = -1; - if (ret == -1 && from_caller) { - // unresolved eax - probably void func - has_ret = 0; - } - else { - if (ops[j].op == OP_CALL) { - dep = hg_fp_find_dep(fp, po->operand[0].name); - if (dep != NULL) - dep->ret_dep = 1; - else - has_ret = 1; - } - else - has_ret = 1; - } - } + gen_hdr_dep_pass(0, opcnt, cbits, fp, 0, 0, ®mask_dep, &has_ret); - l = regmask_save | regmask_dst; - if (g_bp_frame && !(po->flags & OPF_EBP_S)) - l |= 1 << xBP; + // find unreachable code - must be fixed in IDA + for (i = 0; i < opcnt; i++) + { + if (cbits[i >> 3] & (1 << (i & 7))) + continue; - l = po->regmask_src & ~l; -#if 0 - if (l) - fnote(po, "dep |= %04x, dst %04x, save %04x\n", l, - regmask_dst, regmask_save); -#endif - regmask_dep |= l; - regmask_dst |= po->regmask_dst; + if (ops[i].op != OP_NOP) + ferr(&ops[i], "unreachable code\n"); } if (has_ret == -1 && (regmask_dep & (1 << xAX))) @@ -5643,6 +5955,12 @@ tailcall: fp->regmask_dep = regmask_dep & ~(1 << xSP); fp->has_ret = has_ret; +#if 0 + printf("// has_ret %d, regmask_dep %x\n", + fp->has_ret, fp->regmask_dep); + output_hdr_fp(stdout, fp, 1); + if (IS(funcn, "sub_100073FD")) exit(1); +#endif gen_x_cleanup(opcnt); } @@ -5650,6 +5968,7 @@ tailcall: static void hg_fp_resolve_deps(struct func_prototype *fp) { struct func_prototype fp_s; + int dep; int i; // this thing is recursive, so mark first.. @@ -5663,8 +5982,11 @@ static void hg_fp_resolve_deps(struct func_prototype *fp) if (!fp->dep_func[i].proto->dep_resolved) hg_fp_resolve_deps(fp->dep_func[i].proto); - fp->regmask_dep |= ~fp->dep_func[i].regmask_live - & fp->dep_func[i].proto->regmask_dep; + dep = ~fp->dep_func[i].regmask_live + & fp->dep_func[i].proto->regmask_dep; + fp->regmask_dep |= dep; + // printf("dep %s %s |= %x\n", fp->name, + // fp->dep_func[i].name, dep); if (fp->has_ret == -1) fp->has_ret = fp->dep_func[i].proto->has_ret; @@ -5675,8 +5997,9 @@ static void hg_fp_resolve_deps(struct func_prototype *fp) static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, int count) { - char *p, buf[NAMELEN]; - const char *cp; + const struct parsed_proto *pp; + char *p, namebuf[NAMELEN]; + const char *name; int regmask_dep; int argc_stack; int j, arg; @@ -5695,10 +6018,31 @@ static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, fprintf(fout, "\n"); #endif + p = strchr(fp->name, '@'); + if (p != NULL) { + memcpy(namebuf, fp->name, p - fp->name); + namebuf[p - fp->name] = 0; + name = namebuf; + } + else + name = fp->name; + if (name[0] == '_') + name++; + + pp = proto_parse(g_fhdr, name, 1); + if (pp != NULL && pp->is_include) + continue; + + if (fp->pp != NULL) { + // part of seed, output later + continue; + } + regmask_dep = fp->regmask_dep; argc_stack = fp->argc_stack; - fprintf(fout, fp->has_ret ? "int " : "void "); + fprintf(fout, "%-5s", fp->pp ? fp->pp->ret_type.name : + (fp->has_ret ? "int" : "void")); if (regmask_dep && (fp->is_stdcall || argc_stack == 0) && (regmask_dep & ~((1 << xCX) | (1 << xDX))) == 0) { @@ -5720,17 +6064,7 @@ static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, else fprintf(fout, " __cdecl "); - p = strchr(fp->name, '@'); - if (p != NULL) { - memcpy(buf, fp->name, p - fp->name); - buf[p - fp->name] = 0; - cp = buf; - } - else - cp = fp->name; - if (cp[0] == '_') - cp++; - fprintf(fout, "%s(", cp); + fprintf(fout, "%s(", name); arg = 0; for (j = 0; j < xSP; j++) { @@ -5738,7 +6072,11 @@ static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, arg++; if (arg != 1) fprintf(fout, ", "); - fprintf(fout, "int a%d/*<%s>*/", arg, regs_r32[j]); + if (fp->pp != NULL) + fprintf(fout, "%s", fp->pp->arg[arg - 1].type.name); + else + fprintf(fout, "int"); + fprintf(fout, " a%d/*<%s>*/", arg, regs_r32[j]); } } @@ -5746,7 +6084,14 @@ static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, arg++; if (arg != 1) fprintf(fout, ", "); - fprintf(fout, "int a%d", arg); + if (fp->pp != NULL) { + fprintf(fout, "%s", fp->pp->arg[arg - 1].type.name); + if (!fp->pp->arg[arg - 1].type.is_ptr) + fprintf(fout, " "); + } + else + fprintf(fout, "int "); + fprintf(fout, "a%d", arg); } fprintf(fout, ");\n"); @@ -5755,6 +6100,15 @@ static void output_hdr_fp(FILE *fout, const struct func_prototype *fp, static void output_hdr(FILE *fout) { + static const char *lmod_c_names[] = { + [OPLM_UNSPEC] = "???", + [OPLM_BYTE] = "uint8_t", + [OPLM_WORD] = "uint16_t", + [OPLM_DWORD] = "uint32_t", + [OPLM_QWORD] = "uint64_t", + }; + const struct scanned_var *var; + char line[256] = { 0, }; int i; // resolve deps @@ -5765,7 +6119,208 @@ static void output_hdr(FILE *fout) // note: messes up .proto ptr, don't use //qsort(hg_fp, hg_fp_cnt, sizeof(hg_fp[0]), hg_fp_cmp_id); + // output variables + for (i = 0; i < hg_var_cnt; i++) { + var = &hg_vars[i]; + + if (var->pp != NULL) + // part of seed + continue; + else if (var->is_c_str) + fprintf(fout, "extern %-8s %s[];", "char", var->name); + else + fprintf(fout, "extern %-8s %s;", + lmod_c_names[var->lmod], var->name); + + if (var->is_seeded) + fprintf(fout, " // seeded"); + fprintf(fout, "\n"); + } + + fprintf(fout, "\n"); + + // output function prototypes output_hdr_fp(fout, hg_fp, hg_fp_cnt); + + // seed passthrough + fprintf(fout, "\n// - seed -\n"); + + rewind(g_fhdr); + while (fgets(line, sizeof(line), g_fhdr)) + fwrite(line, 1, strlen(line), fout); +} + +// read a line, truncating it if it doesn't fit +static char *my_fgets(char *s, size_t size, FILE *stream) +{ + char *ret, *ret2; + char buf[64]; + int p; + + p = size - 2; + if (p >= 0) + s[p] = 0; + + ret = fgets(s, size, stream); + if (ret != NULL && p >= 0 && s[p] != 0 && s[p] != '\n') { + p = sizeof(buf) - 2; + do { + buf[p] = 0; + ret2 = fgets(buf, sizeof(buf), stream); + } + while (ret2 != NULL && buf[p] != 0 && buf[p] != '\n'); + } + + return ret; +} + +// '=' needs special treatment +// also ' quote +static char *next_word_s(char *w, size_t wsize, char *s) +{ + size_t i; + + s = sskip(s); + + i = 0; + if (*s == '\'') { + w[0] = s[0]; + for (i = 1; i < wsize - 1; i++) { + if (s[i] == 0) { + printf("warning: missing closing quote: \"%s\"\n", s); + break; + } + if (s[i] == '\'') + break; + w[i] = s[i]; + } + } + + for (; i < wsize - 1; i++) { + if (s[i] == 0 || my_isblank(s[i]) || (s[i] == '=' && i > 0)) + break; + w[i] = s[i]; + } + w[i] = 0; + + if (s[i] != 0 && !my_isblank(s[i]) && s[i] != '=') + printf("warning: '%s' truncated\n", w); + + return s + i; +} + +static void scan_variables(FILE *fasm) +{ + struct scanned_var *var; + char line[256] = { 0, }; + char words[3][256]; + char *p = NULL; + int wordc; + int l; + + while (!feof(fasm)) + { + // skip to next data section + while (my_fgets(line, sizeof(line), fasm)) + { + asmln++; + + p = sskip(line); + if (*p == 0 || *p == ';') + continue; + + p = sskip(next_word_s(words[0], sizeof(words[0]), p)); + if (*p == 0 || *p == ';') + continue; + + if (*p != 's' || !IS_START(p, "segment para public")) + continue; + + break; + } + + if (p == NULL || !IS_START(p, "segment para public")) + break; + p = sskip(p + 19); + + if (!IS_START(p, "'DATA'")) + continue; + + // now process it + while (my_fgets(line, sizeof(line), fasm)) + { + asmln++; + + p = line; + if (my_isblank(*p)) + continue; + + p = sskip(p); + if (*p == 0 || *p == ';') + continue; + + for (wordc = 0; wordc < ARRAY_SIZE(words); wordc++) { + words[wordc][0] = 0; + p = sskip(next_word_s(words[wordc], sizeof(words[0]), p)); + if (*p == 0 || *p == ';') { + wordc++; + break; + } + } + + if (wordc == 2 && IS(words[1], "ends")) + break; + if (wordc < 2) + continue; + + if ((hg_var_cnt & 0xff) == 0) { + hg_vars = realloc(hg_vars, sizeof(hg_vars[0]) + * (hg_var_cnt + 0x100)); + my_assert_not(hg_vars, NULL); + memset(hg_vars + hg_var_cnt, 0, sizeof(hg_vars[0]) * 0x100); + } + + var = &hg_vars[hg_var_cnt++]; + snprintf(var->name, sizeof(var->name), "%s", words[0]); + + // maybe already in seed header? + var->pp = proto_parse(g_fhdr, var->name, 1); + if (var->pp != NULL) { + if (var->pp->is_fptr) { + var->lmod = OPLM_DWORD; + //var->is_ptr = 1; + } + else if (var->pp->is_func) + aerr("func?\n"); + else if (!guess_lmod_from_c_type(&var->lmod, &var->pp->type)) + aerr("unhandled C type '%s' for '%s'\n", + var->pp->type.name, var->name); + + var->is_seeded = 1; + continue; + } + + if (IS(words[1], "dd")) + var->lmod = OPLM_DWORD; + else if (IS(words[1], "dw")) + var->lmod = OPLM_WORD; + else if (IS(words[1], "db")) { + var->lmod = OPLM_BYTE; + if (wordc >= 3 && (l = strlen(words[2])) > 4) { + if (words[2][0] == '\'' && IS(words[2] + l - 2, ",0")) + var->is_c_str = 1; + } + } + else if (IS(words[1], "dq")) + var->lmod = OPLM_QWORD; + //else if (IS(words[1], "dt")) + else + aerr("type '%s' not known\n", words[1]); + } + } + + rewind(fasm); + asmln = 0; } static void set_label(int i, const char *name) @@ -5786,26 +6341,6 @@ static void set_label(int i, const char *name) g_labels[i][len] = 0; } -// '=' needs special treatment.. -static char *next_word_s(char *w, size_t wsize, char *s) -{ - size_t i; - - s = sskip(s); - - for (i = 0; i < wsize - 1; i++) { - if (s[i] == 0 || my_isblank(s[i]) || (s[i] == '=' && i > 0)) - break; - w[i] = s[i]; - } - w[i] = 0; - - if (s[i] != 0 && !my_isblank(s[i]) && s[i] != '=') - printf("warning: '%s' truncated\n", w); - - return s + i; -} - struct chunk_item { char *name; long fptr; @@ -5854,7 +6389,7 @@ static void scan_ahead(FILE *fasm) oldpos = ftell(fasm); oldasmln = asmln; - while (fgets(line, sizeof(line), fasm)) + while (my_fgets(line, sizeof(line), fasm)) { wordc = 0; asmln++; @@ -5950,8 +6485,14 @@ int main(int argc, char *argv[]) } if (argc < arg + 3) { - printf("usage:\n%s [-v] [-rf] [-m] <.c> <.asm> [rlist]*\n" - "%s -hdr <.asm> [rlist]*\n", + printf("usage:\n%s [-v] [-rf] [-m] <.c> <.asm> [rlist]*\n" + "%s -hdr <.asm> [rlist]*\n" + "options:\n" + " -hdr - header generation mode\n" + " -rf - allow unannotated indirect calls\n" + " -m - allow multiple .text sections\n" + "[rlist] is a file with function names to skip," + " one per line\n", argv[0], argv[0]); return 1; } @@ -5982,7 +6523,7 @@ int main(int argc, char *argv[]) frlist = fopen(argv[arg], "r"); my_assert_not(frlist, NULL); - while (fgets(line, sizeof(line), frlist)) { + while (my_fgets(line, sizeof(line), frlist)) { p = sskip(line); if (*p == 0 || *p == ';') continue; @@ -6031,7 +6572,10 @@ int main(int argc, char *argv[]) g_label_refs[i].next = NULL; } - while (fgets(line, sizeof(line), fasm)) + if (g_header_mode) + scan_variables(fasm); + + while (my_fgets(line, sizeof(line), fasm)) { wordc = 0; asmln++; @@ -6186,7 +6730,7 @@ parse_words: do_pending_endp: // do delayed endp processing to collect switch jumptables if (pending_endp) { - if (in_func && !skip_func && !end && wordc >= 2 + if (in_func && !g_skip_func && !end && wordc >= 2 && ((words[0][0] == 'd' && words[0][2] == 0) || (words[1][0] == 'd' && words[1][2] == 0))) { @@ -6241,7 +6785,7 @@ do_pending_endp: continue; } - if (in_func && !skip_func) { + if (in_func && !g_skip_func) { if (g_header_mode) gen_hdr(g_func, pi); else @@ -6252,7 +6796,7 @@ do_pending_endp: in_func = 0; g_ida_func_attr = 0; skip_warned = 0; - skip_func = 0; + g_skip_func = 0; g_func[0] = 0; func_chunks_used = 0; func_chunk_i = -1; @@ -6286,7 +6830,7 @@ do_pending_endp: words[0], g_func); p = words[0]; if (bsearch(&p, rlist, rlist_len, sizeof(rlist[0]), cmpstringp)) - skip_func = 1; + g_skip_func = 1; strcpy(g_func, words[0]); set_label(0, words[0]); in_func = 1; @@ -6305,10 +6849,10 @@ do_pending_endp: && ops[0].op == OP_JMP && ops[0].operand[0].had_ds) { // import jump - skip_func = 1; + g_skip_func = 1; } - if (!skip_func && func_chunks_used) { + if (!g_skip_func && func_chunks_used) { // start processing chunks struct chunk_item *ci, key = { g_func, 0 }; @@ -6348,7 +6892,7 @@ do_pending_endp: } // scan for next text segment - while (fgets(line, sizeof(line), fasm)) { + while (my_fgets(line, sizeof(line), fasm)) { asmln++; p = sskip(line); if (*p == 0 || *p == ';') @@ -6367,8 +6911,8 @@ do_pending_endp: continue; } - if (!in_func || skip_func) { - if (!skip_warned && !skip_func && g_labels[pi] != NULL) { + if (!in_func || g_skip_func) { + if (!skip_warned && !g_skip_func && g_labels[pi] != NULL) { if (verbose) anote("skipping from '%s'\n", g_labels[pi]); skip_warned = 1;