From: kub Date: Wed, 27 Nov 2019 20:02:53 +0000 (+0100) Subject: sh2 drc: bug fixing X-Git-Tag: v2.00~810 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=57d863cb876af1b19a9aaa83b72288fae3f40dcf;p=picodrive.git sh2 drc: bug fixing --- diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 8f4718ee..7a832747 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -25,7 +25,7 @@ #define PR 18 // platform register // All operations but ptr ops are using the lower 32 bits of the A64 registers. -// The upper 32 bits are only used in ptr ops. +// The upper 32 bits are only used in ptr ops and are zeroed by A64 32 bit ops. #define A64_COND_EQ 0x0 diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index c9c006c8..062737f6 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -33,6 +33,8 @@ #define FC 24 // emulated processor flags: C (bit 0), others 0 #define FV 25 // emulated processor flags: Nt^Ns (bit 31). others x +// All operations but ptr ops are using the lower 32 bits of the registers. +// The upper 32 bits always contain the sign extension from the lower 32 bits. // unified conditions; virtual, not corresponding to anything real on MIPS #define DCOND_EQ 0x0 @@ -1095,10 +1097,10 @@ static void emith_lohi_nops(void) emith_lohi_nops(); \ EMIT(MIPS_MULT(s1, s2)); \ EMIT(MIPS_MFLO(AT)); \ - emith_add_r_r(dlo, AT); \ - EMIT(MIPS_SLTU_REG(t_, dlo, AT)); \ - EMIT(MIPS_MFHI(AT)); \ + EMIT(MIPS_MFHI(t_)); \ last_lohi = (u8 *)tcache_ptr; \ + emith_add_r_r(dlo, AT); \ + EMIT(MIPS_SLTU_REG(AT, dlo, AT)); \ emith_add_r_r(dhi, AT); \ emith_add_r_r(dhi, t_); \ rcache_free_tmp(t_); \ @@ -1479,7 +1481,7 @@ static int emith_cond_check(int cond, int *r) // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ - emith_add_r_r_ptr_imm(SP, SP, -8-16); /* ABI: 16 byte arg save area */ \ + emith_add_r_r_ptr_imm(SP, SP, -8-16); /* O32: 16 byte arg save area */ \ emith_write_r_r_offs(LR, SP, 4+16); \ if ((r) > 0) emith_write_r_r_offs(r, SP, 0+16); \ } while (0) diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index fe4da035..0f614f18 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -30,6 +30,8 @@ #define FC 29 // emulated processor flags: C (bit 0), others 0 #define FV 28 // emulated processor flags: Nt^Ns (bit 31). others x +// All operations but ptr ops are using the lower 32 bits of the registers. +// The upper 32 bits always contain the sign extension from the lower 32 bits. // unified conditions; virtual, not corresponding to anything real on RISC-V #define DCOND_EQ 0x0 @@ -217,12 +219,9 @@ enum { F2_ALT=0x20, F2_MULDIV=0x01 }; // NB: must split 64 bit result into 2 32 bit registers // NB: expects 32 bit values in s1+s2, correctly sign extended to 64 bits #define EMIT_R5_MULLU_REG(dlo, dhi, s1, s2) do { \ - /*EMIT(R5_ADDW_IMM(s1, s1, 0));*/ \ - /*EMIT(R5_ADDW_IMM(s2, s2, 0));*/ \ EMIT(R5_MUL(dlo, s1, s2)); \ EMIT(R5_ASR_IMM(dhi, dlo, 32)); \ - EMIT(R5_LSL_IMM(dlo, dlo, 32)); \ - EMIT(R5_ASR_IMM(dlo, dlo, 32)); \ + EMIT(R5_ADDW_IMM(dlo, dlo, 0)); \ } while (0) #define EMIT_R5_MULLS_REG(dlo, dhi, s1, s2) \ @@ -633,7 +632,7 @@ static int literal_pindex, literal_iindex; static inline int emith_pool_literal(uintptr_t imm) { int idx = literal_pindex - 8; // max look behind in pool - // see if one of the last literals was the same (or close enough) + // see if one of the last literals was the same for (idx = (idx < 0 ? 0 : idx); idx < literal_pindex; idx++) if (imm == literal_pool[idx]) break; diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index a12dfe96..57bfc212 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -7,21 +7,24 @@ * See COPYING file in the top-level directory. * * notes: - * - tcache, block descriptor, link buffer overflows result in sh2_translate() - * failure, followed by full tcache invalidation for that region + * - tcache, block descriptor, block entry buffer overflows result in oldest + * blocks being deleted until enough space is available + * - link and list element buffer overflows result in failure and exit * - jumps between blocks are tracked for SMC handling (in block_entry->links), - * except jumps between different tcaches + * except jumps from global to CPU-local tcaches * * implemented: * - static register allocation * - remaining register caching and tracking in temporaries * - block-local branch linking - * - block linking (except between tcaches) + * - block linking * - some constant propagation + * - call stack caching for host block entry address + * - delay, poll, and idle loop detection and handling + * - some T/M flag optimizations where the value is known or isn't used * * TODO: * - better constant propagation - * - stack caching? * - bug fixing */ #include @@ -1068,7 +1071,7 @@ static struct block_desc *dr_add_block(int entries, u32 addr, int size, if (be != NULL) dbg(1, "block override for %08x", addr); - if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size || + if (block_ring[tcache_id].used + 1 > block_ring[tcache_id].size || entry_ring[tcache_id].used + entries > entry_ring[tcache_id].size) { dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; @@ -3014,13 +3017,13 @@ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // branch targets in current block - struct linkage branch_targets[MAX_LOCAL_TARGETS]; + static struct linkage branch_targets[MAX_LOCAL_TARGETS]; int branch_target_count = 0; // unresolved local or external targets with block link/exit area if needed - struct linkage blx_targets[MAX_LOCAL_BRANCHES]; + static struct linkage blx_targets[MAX_LOCAL_BRANCHES]; int blx_target_count = 0; - u8 op_flags[BLOCK_INSN_LIMIT]; + static u8 op_flags[BLOCK_INSN_LIMIT]; enum flg_states { FLG_UNKNOWN, FLG_UNUSED, FLG_0, FLG_1 }; struct drcf { @@ -3037,7 +3040,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if LOOP_OPTIMIZER // loops with pinned registers for optimzation // pinned regs are like statics and don't need saving/restoring inside a loop - struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; + static struct linkage pinned_loops[MAX_LOCAL_TARGETS/16]; int pinned_loop_count = 0; #endif @@ -3479,6 +3482,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // no sense in looking any further than the next rcache flush tmp = ((op_flags[i+v] & OF_BTARGET) || (op_flags[i+v-1] & OF_DELAY_OP) || (OP_ISBRACND(opd[v-1].op) && !(op_flags[i+v] & OF_DELAY_OP))); + // XXX looking behind cond branch to avoid evicting regs used later? if (pc + 2*v <= end_pc && !tmp) { // (pc already incremented above) late |= opd[v].source & ~write; // ignore source regs after they have been written to @@ -4636,6 +4640,7 @@ end_op: rcache_invalidate(); } } else + // no space for resolving forward branch, handle it as external dbg(1, "warning: too many unresolved branches"); } @@ -4657,6 +4662,7 @@ end_op: EMITH_JMP_START(emith_invert_cond(cond)); if (bl) { bl->jump = tcache_ptr; + emith_flush(); // flush to inhibit insn swapping bl->type = BL_LDJMP; } tmp = rcache_get_tmp_arg(0); @@ -5534,7 +5540,7 @@ int sh2_drc_init(SH2 *sh2) i = tcache_ptr - tcache; RING_INIT(&tcache_ring[0], tcache_ptr, tcache_sizes[0] - i); for (i = 1; i < ARRAY_SIZE(tcache_ring); i++) { - RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_sizes[i-1], + RING_INIT(&tcache_ring[i], tcache_ring[i-1].base + tcache_ring[i-1].size, tcache_sizes[i]); }