+ default:
+ ferr(po, "%s: unhandled parsed_flag_op: %d\n", __func__, pfo);
+ }
+}
+
+static void out_cmp_test(char *buf, size_t buf_size,
+ struct parsed_op *po, enum parsed_flag_op pfo, int is_inv)
+{
+ char buf1[256], buf2[256], buf3[256];
+
+ if (po->op == OP_TEST) {
+ if (IS(opr_name(po, 0), opr_name(po, 1))) {
+ out_src_opr(buf3, sizeof(buf3), po, &po->operand[0], 0);
+ }
+ else {
+ out_src_opr(buf1, sizeof(buf1), po, &po->operand[0], 0);
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0);
+ snprintf(buf3, sizeof(buf3), "(%s & %s)", buf1, buf2);
+ }
+ out_test_for_cc(buf, buf_size, po, pfo, is_inv,
+ po->operand[0].lmod, buf3);
+ }
+ else if (po->op == OP_CMP) {
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[0], 0);
+ out_src_opr(buf3, sizeof(buf3), po, &po->operand[1], 0);
+ out_cmp_for_cc(buf, buf_size, po, pfo, is_inv,
+ po->operand[0].lmod, buf2, buf3);
+ }
+ else
+ ferr(po, "%s: unhandled op: %d\n", __func__, po->op);
+}
+
+static void propagate_lmod(struct parsed_op *po, struct parsed_opr *popr1,
+ struct parsed_opr *popr2)
+{
+ if (popr1->lmod == OPLM_UNSPEC && popr2->lmod == OPLM_UNSPEC)
+ ferr(po, "missing lmod for both operands\n");
+
+ if (popr1->lmod == OPLM_UNSPEC)
+ popr1->lmod = popr2->lmod;
+ else if (popr2->lmod == OPLM_UNSPEC)
+ popr2->lmod = popr1->lmod;
+ else if (popr1->lmod != popr2->lmod)
+ ferr(po, "conflicting lmods: %d vs %d\n", popr1->lmod, popr2->lmod);
+}
+
+static const char *op_to_c(struct parsed_op *po)
+{
+ switch (po->op)
+ {
+ case OP_ADD:
+ case OP_ADC:
+ return "+";
+ case OP_SUB:
+ case OP_SBB:
+ return "-";
+ case OP_AND:
+ return "&";
+ case OP_OR:
+ return "|";
+ case OP_XOR:
+ return "^";
+ case OP_SHL:
+ return "<<";
+ case OP_SHR:
+ return ">>";
+ case OP_MUL:
+ case OP_IMUL:
+ return "*";
+ default:
+ ferr(po, "op_to_c was supplied with %d\n", po->op);
+ }
+}
+
+static int scan_for_pop(int i, int opcnt, const char *reg,
+ int magic, int do_patch)
+{
+ struct parsed_op *po;
+ int ret = 0;
+
+ for (; i < opcnt; i++) {
+ po = &ops[i];
+ if (po->cc_scratch == magic)
+ break; // already checked
+ po->cc_scratch = magic;
+
+ if (po->flags & OPF_TAIL)
+ return -1; // deadend
+
+ if (po->flags & OPF_RMD)
+ continue;
+
+ if ((po->flags & OPF_JMP) && po->op != OP_CALL) {
+ if (po->bt_i < 0) {
+ ferr(po, "dead branch\n");
+ return -1;
+ }
+
+ if (po->flags & OPF_CC) {
+ ret |= scan_for_pop(po->bt_i, opcnt, reg, magic, do_patch);
+ if (ret < 0)
+ return ret; // dead end
+ }
+ else {
+ i = po->bt_i - 1;
+ }
+ continue;
+ }
+
+ if (po->op == OP_POP && po->operand[0].type == OPT_REG
+ && IS(po->operand[0].name, reg))
+ {
+ if (do_patch)
+ po->flags |= OPF_RMD;
+ return 1;
+ }
+ }
+
+ return ret;
+}
+
+// scan for pop starting from 'ret' op (all paths)
+static int scan_for_pop_ret(int i, int opcnt, const char *reg, int do_patch)
+{
+ int found = 0;
+ int j;
+
+ for (; i < opcnt; i++) {
+ if (!(ops[i].flags & OPF_TAIL))
+ continue;
+
+ for (j = i - 1; j >= 0; j--) {
+ if (ops[j].flags & OPF_RMD)
+ continue;
+ if (ops[j].flags & OPF_JMP)
+ return -1;
+
+ if (ops[j].op == OP_POP && ops[j].operand[0].type == OPT_REG
+ && IS(ops[j].operand[0].name, reg))
+ {
+ found = 1;
+ if (do_patch)
+ ops[j].flags |= OPF_RMD;
+ break;
+ }
+
+ if (g_labels[j][0] != 0)
+ return -1;
+ }
+ }
+
+ return found ? 0 : -1;
+}
+
+// is operand 'opr modified' by parsed_op 'po'?
+static int is_opr_modified(const struct parsed_opr *opr,
+ const struct parsed_op *po)
+{
+ if ((po->flags & OPF_RMD) || !(po->flags & OPF_DATA))
+ return 0;
+
+ if (opr->type == OPT_REG && po->operand[0].type == OPT_REG) {
+ if (po->regmask_dst & (1 << opr->reg))
+ return 1;
+ else
+ return 0;
+ }
+
+ return IS(po->operand[0].name, opr->name);
+}
+
+// is any operand of parsed_op 'po_test' modified by parsed_op 'po'?
+static int is_any_opr_modified(const struct parsed_op *po_test,
+ const struct parsed_op *po)
+{
+ int i;
+
+ if ((po->flags & OPF_RMD) || !(po->flags & OPF_DATA))
+ return 0;
+
+ if (po_test->regmask_src & po->regmask_dst)
+ return 1;
+
+ for (i = 0; i < po_test->operand_cnt; i++)
+ if (IS(po_test->operand[i].name, po->operand[0].name))
+ return 1;
+
+ return 0;
+}
+
+// scan for any po_test operand modification in range given
+static int scan_for_mod(struct parsed_op *po_test, int i, int opcnt)
+{
+ for (; i < opcnt; i++) {
+ if (is_any_opr_modified(po_test, &ops[i]))
+ return i;
+ }
+
+ return -1;
+}
+
+// scan for po_test operand[0] modification in range given
+static int scan_for_mod_opr0(struct parsed_op *po_test,
+ int i, int opcnt)
+{
+ for (; i < opcnt; i++) {
+ if (is_opr_modified(&po_test->operand[0], &ops[i]))
+ return i;
+ }
+
+ return -1;
+}
+
+static int scan_for_flag_set(int i)
+{
+ for (; i >= 0; i--) {
+ if (ops[i].flags & OPF_FLAGS)
+ return i;
+
+ if ((ops[i].flags & OPF_JMP) && !(ops[i].flags & OPF_CC))
+ return -1;
+ if (g_labels[i][0] != 0)
+ return -1;
+ }
+
+ return -1;
+}
+
+// scan back for cdq, if anything modifies edx, fail
+static int scan_for_cdq_edx(int i)
+{
+ for (; i >= 0; i--) {
+ if (ops[i].op == OP_CDQ)
+ return i;
+
+ if (ops[i].regmask_dst & (1 << xDX))
+ return -1;
+ if (g_labels[i][0] != 0)
+ return -1;
+ }
+
+ return -1;
+}
+
+static void gen_func(FILE *fout, FILE *fhdr, const char *funcn, int opcnt)
+{
+ struct parsed_op *po, *delayed_flag_op = NULL, *tmp_op;
+ struct parsed_opr *last_arith_dst = NULL;
+ char buf1[256], buf2[256], buf3[256];
+ struct parsed_proto *pp, *pp_tmp;
+ const char *tmpname;
+ enum parsed_flag_op pfo;
+ int save_arg_vars = 0;
+ int cmp_result_vars = 0;
+ int need_mul_var = 0;
+ int had_decl = 0;
+ int regmask_arg = 0;
+ int regmask = 0;
+ int pfomask = 0;
+ int no_output;
+ int dummy;
+ int arg;
+ int i, j;
+ int reg;
+ int ret;
+
+ g_bp_frame = g_bp_stack = 0;
+
+ ret = proto_parse(fhdr, funcn, &g_func_pp);
+ if (ret)
+ ferr(ops, "proto_parse failed for '%s'\n", funcn);
+
+ fprintf(fout, "%s %s(", g_func_pp.ret_type, funcn);
+ for (i = 0; i < g_func_pp.argc; i++) {
+ if (i > 0)
+ fprintf(fout, ", ");
+ fprintf(fout, "%s a%d", g_func_pp.arg[i].type, i + 1);
+ }
+ fprintf(fout, ")\n{\n");
+
+ // pass1:
+ // - handle ebp frame, remove ops related to it
+ if (ops[0].op == OP_PUSH && IS(opr_name(&ops[0], 0), "ebp")
+ && ops[1].op == OP_MOV
+ && IS(opr_name(&ops[1], 0), "ebp")
+ && IS(opr_name(&ops[1], 1), "esp"))
+ {
+ int ecx_push = 0;
+
+ g_bp_frame = 1;
+ ops[0].flags |= OPF_RMD;
+ ops[1].flags |= OPF_RMD;
+
+ if (ops[2].op == OP_SUB && IS(opr_name(&ops[2], 0), "esp")) {
+ g_bp_stack = opr_const(&ops[2], 1);
+ ops[2].flags |= OPF_RMD;
+ }
+ else {
+ // another way msvc builds stack frame..
+ i = 2;
+ while (ops[i].op == OP_PUSH && IS(opr_name(&ops[i], 0), "ecx")) {
+ g_bp_stack += 4;
+ ops[i].flags |= OPF_RMD;
+ ecx_push++;
+ i++;
+ }
+ }
+
+ i = 2;
+ do {
+ for (; i < opcnt; i++)
+ if (ops[i].op == OP_RET)
+ break;
+ if (ops[i - 1].op != OP_POP || !IS(opr_name(&ops[i - 1], 0), "ebp"))
+ ferr(&ops[i - 1], "'pop ebp' expected\n");
+ ops[i - 1].flags |= OPF_RMD;
+
+ if (g_bp_stack != 0) {
+ if (ops[i - 2].op != OP_MOV
+ || !IS(opr_name(&ops[i - 2], 0), "esp")
+ || !IS(opr_name(&ops[i - 2], 1), "ebp"))
+ {
+ ferr(&ops[i - 2], "esp restore expected\n");
+ }
+ ops[i - 2].flags |= OPF_RMD;
+
+ if (ecx_push && ops[i - 3].op == OP_POP
+ && IS(opr_name(&ops[i - 3], 0), "ecx"))
+ {
+ ferr(&ops[i - 3], "unexpected ecx pop\n");
+ }
+ }
+
+ i++;
+ } while (i < opcnt);
+ }
+
+ // pass2:
+ // - resolve all branches
+ for (i = 0; i < opcnt; i++) {
+ po = &ops[i];
+ po->bt_i = -1;
+
+ if ((po->flags & OPF_RMD) || !(po->flags & OPF_JMP)
+ || po->op == OP_CALL || po->op == OP_RET)
+ continue;
+
+ for (j = 0; j < opcnt; j++) {
+ if (g_labels[j][0] && IS(po->operand[0].name, g_labels[j])) {
+ po->bt_i = j;
+ po->lrl = g_label_refs[j];
+ g_label_refs[j] = po;
+ break;
+ }
+ }
+
+ if (po->bt_i == -1) {
+ // assume tail call
+ po->op = OP_CALL;
+ po->flags |= OPF_TAIL;
+ }
+ }
+
+ // pass3:
+ // - find POPs for PUSHes, rm both
+ // - scan for all used registers
+ // - find flag set ops for their users
+ // - process calls
+ for (i = 0; i < opcnt; i++) {
+ po = &ops[i];
+ if (po->flags & OPF_RMD)
+ continue;
+
+ if (po->op == OP_PUSH && po->operand[0].type == OPT_REG) {
+ if (po->operand[0].reg < 0)
+ ferr(po, "reg not set for push?\n");
+ if (!(regmask & (1 << po->operand[0].reg))) { // reg save
+ ret = scan_for_pop(i + 1, opcnt,
+ po->operand[0].name, i + opcnt, 0);
+ if (ret == 1) {
+ po->flags |= OPF_RMD;
+ scan_for_pop(i + 1, opcnt, po->operand[0].name,
+ i + opcnt * 2, 1);
+ continue;
+ }
+ ret = scan_for_pop_ret(i + 1, opcnt, po->operand[0].name, 0);
+ if (ret == 0) {
+ po->flags |= OPF_RMD;
+ scan_for_pop_ret(i + 1, opcnt, po->operand[0].name, 1);
+ continue;
+ }
+ }
+ }
+
+ regmask |= po->regmask_src | po->regmask_dst;
+
+ if (po->flags & OPF_CC)
+ {
+ ret = scan_for_flag_set(i - 1);
+ if (ret < 0)
+ ferr(po, "unable to trace flag setter\n");
+
+ tmp_op = &ops[ret]; // flag setter
+ pfo = split_cond(po, po->op, &dummy);
+ pfomask = 0;
+
+ // to get nicer code, we try to delay test and cmp;
+ // if we can't because of operand modification, or if we
+ // have math op, make it calculate flags explicitly
+ if (tmp_op->op == OP_TEST || tmp_op->op == OP_CMP) {
+ if (scan_for_mod(tmp_op, ret + 1, i) >= 0)
+ pfomask = 1 << pfo;
+ }
+ else {
+ if ((pfo != PFO_Z && pfo != PFO_S && pfo != PFO_P)
+ || scan_for_mod_opr0(tmp_op, ret + 1, i) >= 0)
+ pfomask = 1 << pfo;
+ }
+ if (pfomask) {
+ tmp_op->pfomask |= pfomask;
+ cmp_result_vars |= pfomask;
+ po->datap = tmp_op;
+ }
+
+ if (po->op == OP_ADC || po->op == OP_SBB)
+ cmp_result_vars |= 1 << PFO_C;
+ }
+ else if (po->op == OP_CALL)
+ {
+ pp = malloc(sizeof(*pp));
+ my_assert_not(pp, NULL);
+ tmpname = opr_name(&ops[i], 0);
+ ret = proto_parse(fhdr, tmpname, pp);
+ if (ret)
+ ferr(po, "proto_parse failed for '%s'\n", tmpname);
+
+ for (arg = 0; arg < pp->argc; arg++)
+ if (pp->arg[arg].reg == NULL)
+ break;
+
+ for (j = i - 1; j >= 0 && arg < pp->argc; j--) {
+ if (ops[j].op == OP_CALL) {
+ pp_tmp = ops[j].datap;
+ if (pp_tmp == NULL)
+ ferr(po, "arg collect hit unparsed call\n");
+ if (pp_tmp->argc_stack > 0)
+ ferr(po, "arg collect hit '%s' with %d stack args\n",
+ opr_name(&ops[j], 0), pp_tmp->argc_stack);
+ }
+ else if ((ops[j].flags & OPF_TAIL)
+ || (ops[j].flags & (OPF_JMP|OPF_CC)) == OPF_JMP)
+ {
+ break;
+ }
+
+ if (ops[j].op == OP_PUSH) {
+ pp->arg[arg].datap = &ops[j];
+ ret = scan_for_mod(&ops[j], j + 1, i);
+ if (ret >= 0) {
+ // mark this push as one that needs operand saving
+ ops[j].argmask |= 1 << arg;
+ save_arg_vars |= 1 << arg;
+ }
+ else
+ ops[j].flags |= OPF_RMD;
+
+ // next arg
+ for (arg++; arg < pp->argc; arg++)
+ if (pp->arg[arg].reg == NULL)
+ break;
+ }
+
+ if (g_labels[j][0] != 0) {
+ if (j > 0 && ((ops[j - 1].flags & OPF_TAIL)
+ || (ops[j - 1].flags & (OPF_JMP|OPF_CC)) == OPF_JMP))
+ {
+ // follow the branch in reverse
+ if (g_label_refs[j] == NULL)
+ ferr(po, "no refs for '%s'?\n", g_labels[j]);
+ if (g_label_refs[j]->lrl != NULL)
+ ferr(po, "unhandled multiple fefs to '%s'\n", g_labels[j]);
+ j = (g_label_refs[j] - ops) + 1;
+ continue;
+ }
+ break;
+ }
+ }
+ if (arg < pp->argc)
+ ferr(po, "arg collect failed for '%s'\n", tmpname);
+ po->datap = pp;
+ }
+ else if (po->op == OP_MUL
+ || (po->op == OP_IMUL && po->operand_cnt == 1))
+ {
+ need_mul_var = 1;
+ }
+ }
+
+ // declare stack frame
+ if (g_bp_stack)
+ fprintf(fout, " union { u32 d[%d]; u16 w[%d]; u8 b[%d]; } sf;\n",
+ (g_bp_stack + 3) / 4, (g_bp_stack + 1) / 2, g_bp_stack);
+
+ // declare arg-registers
+ for (i = 0; i < g_func_pp.argc; i++) {
+ if (g_func_pp.arg[i].reg != NULL) {
+ reg = char_array_i(regs_r32,
+ ARRAY_SIZE(regs_r32), g_func_pp.arg[i].reg);
+ if (reg < 0)
+ ferr(ops, "arg '%s' is not a reg?\n", g_func_pp.arg[i].reg);
+
+ regmask_arg |= 1 << reg;
+ fprintf(fout, " u32 %s = (u32)a%d;\n",
+ g_func_pp.arg[i].reg, i + 1);
+ had_decl = 1;
+ }
+ }
+
+ // declare other regs - special case for eax
+ if (!((regmask | regmask_arg) & 1) && !IS(g_func_pp.ret_type, "void")) {
+ fprintf(fout, " u32 eax = 0;\n");
+ had_decl = 1;
+ }
+
+ regmask &= ~regmask_arg;
+ if (g_bp_frame)
+ regmask &= ~(1 << xBP);
+ if (regmask) {
+ for (reg = 0; reg < 8; reg++) {
+ if (regmask & (1 << reg)) {
+ fprintf(fout, " u32 %s;\n", regs_r32[reg]);
+ had_decl = 1;
+ }
+ }
+ }
+
+ if (save_arg_vars) {
+ for (reg = 0; reg < 32; reg++) {
+ if (save_arg_vars & (1 << reg)) {
+ fprintf(fout, " u32 s_a%d;\n", reg + 1);
+ had_decl = 1;
+ }
+ }
+ }
+
+ if (cmp_result_vars) {
+ for (i = 0; i < 8; i++) {
+ if (cmp_result_vars & (1 << i)) {
+ fprintf(fout, " u32 cond_%s;\n", parsed_flag_op_names[i]);
+ had_decl = 1;
+ }
+ }
+ }
+
+ if (need_mul_var) {
+ fprintf(fout, " u64 mul_tmp;\n");
+ had_decl = 1;
+ }
+
+ if (had_decl)
+ fprintf(fout, "\n");
+
+ // output ops
+ for (i = 0; i < opcnt; i++)
+ {
+ if (g_labels[i][0] != 0)
+ fprintf(fout, "\n%s:\n", g_labels[i]);
+
+ po = &ops[i];
+ if (po->flags & OPF_RMD)
+ continue;
+
+ no_output = 0;
+
+ #define assert_operand_cnt(n_) \
+ if (po->operand_cnt != n_) \
+ ferr(po, "operand_cnt is %d/%d\n", po->operand_cnt, n_)
+
+ // conditional/flag using op?
+ if (po->flags & OPF_CC)
+ {
+ int is_delayed = 0;
+ int is_inv = 0;
+
+ pfo = split_cond(po, po->op, &is_inv);
+
+ // we go through all this trouble to avoid using parsed_flag_op,
+ // which makes generated code much nicer
+ if (delayed_flag_op != NULL)
+ {
+ out_cmp_test(buf1, sizeof(buf1), delayed_flag_op, pfo, is_inv);
+ is_delayed = 1;
+ }
+ else if (last_arith_dst != NULL
+ && (pfo == PFO_Z || pfo == PFO_S || pfo == PFO_P))
+ {
+ out_src_opr(buf3, sizeof(buf3), po, last_arith_dst, 0);
+ out_test_for_cc(buf1, sizeof(buf1), po, pfo, is_inv,
+ last_arith_dst->lmod, buf3);
+ is_delayed = 1;
+ }
+ else if (po->datap != NULL) {
+ // use preprocessed results
+ tmp_op = po->datap;
+ if (!tmp_op || !(tmp_op->pfomask & (1 << pfo)))
+ ferr(po, "not prepared for pfo %d\n", pfo);
+
+ // note: is_inv was not yet applied
+ snprintf(buf1, sizeof(buf1), "(%scond_%s)",
+ is_inv ? "!" : "", parsed_flag_op_names[pfo]);
+ }
+ else {
+ ferr(po, "all methods of finding comparison failed\n");
+ }
+
+ if (po->flags & OPF_JMP) {
+ fprintf(fout, " if %s\n", buf1);
+ }
+ else if (po->op == OP_ADC || po->op == OP_SBB) {
+ if (is_delayed)
+ fprintf(fout, " cond_%s = %s;\n",
+ parsed_flag_op_names[pfo], buf1);
+ }
+ else if (po->flags & OPF_DATA) { // SETcc
+ out_dst_opr(buf2, sizeof(buf2), po, &po->operand[0]);
+ fprintf(fout, " %s = %s;", buf2, buf1);
+ }
+ else {
+ ferr(po, "unhandled conditional op\n");
+ }
+ }
+
+ pfomask = po->pfomask;
+
+ switch (po->op)
+ {
+ case OP_MOV:
+ assert_operand_cnt(2);
+ propagate_lmod(po, &po->operand[0], &po->operand[1]);
+ fprintf(fout, " %s = %s;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ break;
+
+ case OP_LEA:
+ assert_operand_cnt(2);
+ po->operand[1].lmod = OPLM_DWORD; // always
+ fprintf(fout, " %s = %s;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 1));
+ break;
+
+ case OP_MOVZX:
+ assert_operand_cnt(2);
+ fprintf(fout, " %s = %s;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ break;
+
+ case OP_MOVSX:
+ assert_operand_cnt(2);
+ switch (po->operand[1].lmod) {
+ case OPLM_BYTE:
+ strcpy(buf3, "(s8)");
+ break;
+ case OPLM_WORD:
+ strcpy(buf3, "(s16)");
+ break;
+ default:
+ ferr(po, "invalid src lmod: %d\n", po->operand[1].lmod);
+ }
+ fprintf(fout, " %s = %s%s;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ buf3,
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ break;
+
+ case OP_NOT:
+ assert_operand_cnt(1);
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]);
+ fprintf(fout, " %s = ~%s;", buf1, buf1);
+ break;
+
+ case OP_CDQ:
+ assert_operand_cnt(2);
+ fprintf(fout, " %s = (s32)%s >> 31;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ strcpy(g_comment, "cdq");
+ break;
+
+ case OP_STOS:
+ // assumes DF=0
+ assert_operand_cnt(3);
+ if (po->flags & OPF_REP) {
+ fprintf(fout, " for (; ecx != 0; ecx--, edi += %d)\n",
+ lmod_bytes(po, po->operand[0].lmod));
+ fprintf(fout, " *(u32 *)edi = eax;");
+ strcpy(g_comment, "rep stos");
+ }
+ else {
+ fprintf(fout, " *(u32 *)edi = eax; edi += %d;",
+ lmod_bytes(po, po->operand[0].lmod));
+ strcpy(g_comment, "stos");
+ }
+ break;
+
+ // arithmetic w/flags
+ case OP_ADD:
+ case OP_SUB:
+ case OP_AND:
+ case OP_OR:
+ propagate_lmod(po, &po->operand[0], &po->operand[1]);
+ // fallthrough
+ case OP_SHL:
+ case OP_SHR:
+ dualop_arith:
+ assert_operand_cnt(2);
+ fprintf(fout, " %s %s= %s;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ op_to_c(po),
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ last_arith_dst = &po->operand[0];
+ delayed_flag_op = NULL;
+ break;
+
+ case OP_XOR:
+ assert_operand_cnt(2);
+ propagate_lmod(po, &po->operand[0], &po->operand[1]);
+ if (IS(opr_name(po, 0), opr_name(po, 1))) {
+ // special case for XOR
+ fprintf(fout, " %s = 0;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]));
+ last_arith_dst = &po->operand[0];
+ delayed_flag_op = NULL;
+ break;
+ }
+ goto dualop_arith;
+
+ case OP_SAR:
+ assert_operand_cnt(2);
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]);
+ fprintf(fout, " %s = %s%s >> %s;", buf1,
+ lmod_cast_s(po, po->operand[0].lmod), buf1,
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ last_arith_dst = &po->operand[0];
+ delayed_flag_op = NULL;
+ break;
+
+ case OP_ADC:
+ case OP_SBB:
+ assert_operand_cnt(2);
+ propagate_lmod(po, &po->operand[0], &po->operand[1]);
+ fprintf(fout, " %s %s= %s + cond_c;",
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]),
+ op_to_c(po),
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[1], 0));
+ last_arith_dst = &po->operand[0];
+ delayed_flag_op = NULL;
+ break;
+
+ case OP_INC:
+ case OP_DEC:
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]);
+ strcpy(buf2, po->op == OP_INC ? "++" : "--");
+ fprintf(fout, " %s%s;", buf1, buf2);
+ last_arith_dst = &po->operand[0];
+ delayed_flag_op = NULL;
+ break;
+
+ case OP_NEG:
+ out_dst_opr(buf1, sizeof(buf1), po, &po->operand[0]);
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[0], 0);
+ fprintf(fout, " %s = -%s%s;", buf1,
+ lmod_cast_s(po, po->operand[0].lmod), buf2);
+ last_arith_dst = &po->operand[0];
+ delayed_flag_op = NULL;
+ if (pfomask & (1 << PFO_C)) {
+ fprintf(fout, "\n cond_c = (%s != 0);", buf1);
+ pfomask &= ~(1 << PFO_C);
+ }
+ break;
+
+ case OP_IMUL:
+ if (po->operand_cnt == 2)
+ goto dualop_arith;
+ if (po->operand_cnt == 3)
+ ferr(po, "TODO imul3\n");
+ // fallthrough
+ case OP_MUL:
+ assert_operand_cnt(1);
+ strcpy(buf1, po->op == OP_IMUL ? "(s64)(s32)" : "(u64)");
+ fprintf(fout, " mul_tmp = %seax * %s%s;\n", buf1, buf1,
+ out_src_opr(buf2, sizeof(buf2), po, &po->operand[0], 0));
+ fprintf(fout, " edx = mul_tmp >> 32;\n");
+ fprintf(fout, " eax = mul_tmp;");
+ last_arith_dst = NULL;
+ delayed_flag_op = NULL;
+ break;
+
+ case OP_DIV:
+ case OP_IDIV:
+ assert_operand_cnt(1);
+ if (po->operand[0].lmod != OPLM_DWORD)
+ ferr(po, "unhandled lmod %d\n", po->operand[0].lmod);
+
+ // 32bit division is common, look for it
+ if (scan_for_cdq_edx(i - 1) >= 0) {
+ out_src_opr(buf1, sizeof(buf1), po, &po->operand[0], 0);
+ strcpy(buf2, lmod_cast(po, po->operand[0].lmod,
+ po->op == OP_IDIV));
+ fprintf(fout, " edx = %seax %% %s%s;\n", buf2, buf2, buf1);
+ fprintf(fout, " eax = %seax / %s%s;", buf2, buf2, buf1);
+ }
+ else
+ ferr(po, "TODO 64bit divident\n");
+ last_arith_dst = NULL;
+ delayed_flag_op = NULL;
+ break;
+
+ case OP_TEST:
+ case OP_CMP:
+ propagate_lmod(po, &po->operand[0], &po->operand[1]);
+ if (pfomask != 0) {
+ for (j = 0; j < 8; j++) {
+ if (pfomask & (1 << j)) {
+ out_cmp_test(buf1, sizeof(buf1), po, j, 0);
+ fprintf(fout, " cond_%s = %s;",
+ parsed_flag_op_names[j], buf1);
+ }
+ }
+ pfomask = 0;
+ }
+ else
+ no_output = 1;
+ delayed_flag_op = po;
+ break;
+
+ // note: we reuse OP_Jcc for SETcc, only flags differ
+ case OP_JO ... OP_JG:
+ if (po->flags & OPF_JMP)
+ fprintf(fout, " goto %s;", po->operand[0].name);
+ // else SETcc - should already be handled
+ break;
+
+ case OP_JMP:
+ assert_operand_cnt(1);
+ if (po->operand[0].type != OPT_LABEL)
+ ferr(po, "unhandled call type\n");
+
+ fprintf(fout, " goto %s;", po->operand[0].name);
+ break;
+
+ case OP_CALL:
+ assert_operand_cnt(1);
+ if (po->operand[0].type != OPT_LABEL)
+ ferr(po, "unhandled call type\n");
+
+ pp = po->datap;
+ if (pp == NULL)
+ ferr(po, "NULL pp\n");
+
+ fprintf(fout, " ");
+ if (!IS(pp->ret_type, "void")) {
+ if (po->flags & OPF_TAIL)
+ fprintf(fout, "return ");
+ else
+ fprintf(fout, "eax = ");
+ if (strchr(pp->ret_type, '*'))
+ fprintf(fout, "(u32)");
+ }
+
+ fprintf(fout, "%s(", opr_name(po, 0));
+ for (arg = 0; arg < pp->argc; arg++) {
+ if (arg > 0)
+ fprintf(fout, ", ");
+
+ if (strchr(pp->arg[arg].type, '*'))
+ fprintf(fout, "(%s)", pp->arg[arg].type);
+
+ if (pp->arg[arg].reg != NULL) {
+ fprintf(fout, "%s", pp->arg[arg].reg);
+ continue;
+ }
+
+ // stack arg
+ tmp_op = pp->arg[arg].datap;
+ if (tmp_op == NULL)
+ ferr(po, "parsed_op missing for arg%d\n", arg);
+ if (tmp_op->argmask) {
+ fprintf(fout, "s_a%d", arg + 1);
+ }
+ else {
+ fprintf(fout, "%s",
+ out_src_opr(buf1, sizeof(buf1),
+ tmp_op, &tmp_op->operand[0], 0));
+ }
+ }
+ fprintf(fout, ");");
+
+ if (po->flags & OPF_TAIL) {
+ strcpy(g_comment, "tailcall");
+ if (IS(pp->ret_type, "void"))
+ fprintf(fout, "\n return;");
+ }
+ delayed_flag_op = NULL;
+ last_arith_dst = NULL;
+ break;
+
+ case OP_RET:
+ if (IS(g_func_pp.ret_type, "void"))
+ fprintf(fout, " return;");
+ else
+ fprintf(fout, " return eax;");
+ break;
+
+ case OP_PUSH:
+ if (po->argmask) {
+ // special case - saved func arg
+ for (j = 0; j < 32; j++) {
+ if (po->argmask & (1 << j)) {
+ fprintf(fout, " s_a%d = %s;", j + 1,
+ out_src_opr(buf1, sizeof(buf1), po, &po->operand[0], 0));
+ }
+ }
+ break;
+ }
+ ferr(po, "push encountered\n");
+ break;
+
+ case OP_POP:
+ ferr(po, "pop encountered\n");
+ break;
+
+ case OP_NOP:
+ break;
+
+ default:
+ no_output = 1;
+ ferr(po, "unhandled op type %d, flags %x\n",
+ po->op, po->flags);
+ break;
+ }
+
+ if (g_comment[0] != 0) {
+ fprintf(fout, " // %s", g_comment);
+ g_comment[0] = 0;
+ no_output = 0;
+ }
+ if (!no_output)
+ fprintf(fout, "\n");
+
+ if (pfomask != 0)
+ ferr(po, "missed flag calc, pfomask=%x\n", pfomask);
+
+ // see is delayed flag stuff is still valid
+ if (delayed_flag_op != NULL && delayed_flag_op != po) {
+ if (is_any_opr_modified(delayed_flag_op, po))
+ delayed_flag_op = NULL;
+ }
+
+ if (last_arith_dst != NULL && last_arith_dst != &po->operand[0]) {
+ if (is_opr_modified(last_arith_dst, po))
+ last_arith_dst = NULL;
+ }
+ }
+
+ fprintf(fout, "}\n\n");
+
+ // cleanup
+ for (i = 0; i < opcnt; i++) {
+ if (ops[i].op == OP_CALL) {
+ pp = ops[i].datap;
+ if (pp) {
+ proto_release(pp);
+ free(pp);
+ }
+ }
+ }
+ proto_release(&g_func_pp);