From 31efd4546e9246c7999dbb748cd1c7727df5896a Mon Sep 17 00:00:00 2001 From: kub Date: Sun, 9 Jun 2024 22:30:51 +0000 Subject: [PATCH] sh2 drc, several bug fixes --- cpu/drc/emit_arm.c | 4 ++-- cpu/drc/emit_arm64.c | 2 +- cpu/drc/emit_mips.c | 10 +++++++- cpu/drc/emit_ppc.c | 2 +- cpu/drc/emit_riscv.c | 6 ++--- cpu/drc/emit_x86.c | 5 ++-- cpu/sh2/compiler.c | 45 ++++++++++++++++++------------------ cpu/sh2/mame/sh2pico.c | 2 +- pico/32x/sh2soc.c | 11 +++++---- platform/libretro/libretro.c | 2 +- platform/linux/emu.c | 2 +- 11 files changed, 51 insertions(+), 40 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 7d51c357..ecaf06e2 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1,7 +1,7 @@ /* * Basic macros to emit ARM instructions and some utils * Copyright (C) 2008,2009,2010 notaz - * Copyright (C) 2019 kub + * Copyright (C) 2019-2024 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -1196,7 +1196,7 @@ static inline void emith_pool_adjust(int tcache_offs, int move_offs) #define emith_jump_at(ptr, target) do { \ u32 *ptr_ = (u32 *)ptr; \ - u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ + u32 val_ = (u32 *)(target) - ptr_ - 2; \ EOP_C_B_PTR(ptr_, A_COND_AL, 0, val_ & 0xffffff); \ } while (0) #define emith_jump_at_size() 4 diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index b8ae926a..10eb8a2a 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -1,6 +1,6 @@ /* * Basic macros to emit ARM A64 instructions and some utils - * Copyright (C) 2019 kub + * Copyright (C) 2019-2024 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index d775fdd8..a92709d3 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -1,6 +1,6 @@ /* * Basic macros to emit MIPS32/MIPS64 Release 1 or 2 instructions and some utils - * Copyright (C) 2019 kub + * Copyright (C) 2019-2024 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -1671,12 +1671,20 @@ static NOINLINE void host_instructions_updated(void *base, void *end, int force) asm volatile( " rdhwr %2, $1;" " bal 0f;" // needed to allow for jr.hb: +#if _MIPS_SZPTR == 64 + "0: daddiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb +#else "0: addiu $ra, $ra, 3f-0b;" // set ra to insn after jr.hb +#endif " beqz %2, 3f;" "1: synci 0(%0);" " sltu %3, %0, %1;" +#if _MIPS_SZPTR == 64 + " daddu %0, %0, %2;" +#else " addu %0, %0, %2;" +#endif " bnez %3, 1b;" " sync;" diff --git a/cpu/drc/emit_ppc.c b/cpu/drc/emit_ppc.c index f8e05b14..a765eb1e 100644 --- a/cpu/drc/emit_ppc.c +++ b/cpu/drc/emit_ppc.c @@ -1,6 +1,6 @@ /* * Basic macros to emit PowerISA 2.03 64 bit instructions and some utils - * Copyright (C) 2020 kub + * Copyright (C) 2020-2024 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. diff --git a/cpu/drc/emit_riscv.c b/cpu/drc/emit_riscv.c index d4fd65b8..0670a45c 100644 --- a/cpu/drc/emit_riscv.c +++ b/cpu/drc/emit_riscv.c @@ -1,6 +1,6 @@ /* * Basic macros to emit RISC-V RV64IM instructions and some utils - * Copyright (C) 2019 kub + * Copyright (C) 2019-2024 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -710,9 +710,9 @@ static void emith_move_imm(int r, uintptr_t imm) if (lui >> 12) { EMIT(R5_MOVT_IMM(r, lui)); if (imm & 0xfff) - EMIT(R5_ADD_IMM(r, r, imm)); + EMIT(R5_ADDW_IMM(r, r, imm)); } else - EMIT(R5_ADD_IMM(r, Z0, imm)); + EMIT(R5_ADDW_IMM(r, Z0, imm)); } static void emith_move_ptr_imm(int r, uintptr_t imm) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 53d52385..628adbdd 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1,7 +1,7 @@ /* * Basic macros to emit x86 instructions and some utils * Copyright (C) 2008,2009,2010 notaz - * Copyright (C) 2019 kub + * Copyright (C) 2019-2024 kub * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -1365,7 +1365,8 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common /* overflow if top 17 bits of MACH aren't all 1 or 0 */ \ /* to check: add MACH >> 31 to MACH >> 15. this is 0 if no overflow */ \ emith_asr(rn, mh, 15); \ - emith_addf_r_r_r_lsr(rn, rn, mh, 31); \ + emith_lsr(rm, mh, 31); \ + emith_addf_r_r(rn, rm); \ EMITH_SJMP_START(DCOND_EQ); /* sum != 0 -> -ovl */ \ emith_move_r_imm_c(DCOND_NE, ml, 0x00000000); \ emith_move_r_imm_c(DCOND_NE, mh, 0x00008000); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index b87f2b9b..0714eb94 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -1,7 +1,7 @@ /* * SH2 recompiler * (C) notaz, 2009,2010,2013 - * (C) kub, 2018,2019,2020 + * (C) kub, 2018-2024 * * This work is licensed under the terms of MAME license. * See COPYING file in the top-level directory. @@ -2610,7 +2610,8 @@ static uptr split_address(uptr la, uptr mask, s32 *offs) #ifdef __arm__ // arm32 offset has an add/sub flag and an unsigned 8 bit value, which only // allows values of [-255...255]. the value -256 thus can't be used. - if (*offs + sign == 0) { + if (*offs < 0) { // TODO not working at all with negative offsets on ARM? + //if (*offs == -sign) { la -= sign; *offs += sign; } @@ -2631,7 +2632,7 @@ static int emit_get_rbase_and_offs(SH2 *sh2, sh2_reg_e r, int rmode, s32 *offs) // is r constant and points to a memory region? if (! gconst_get(r, &a)) return -1; - poffs = dr_ctx_get_mem_ptr(sh2, a, &mask); + poffs = dr_ctx_get_mem_ptr(sh2, a + *offs, &mask); if (poffs == -1) return -1; @@ -3244,10 +3245,11 @@ static void emit_branch_linkage_code(SH2 *sh2, struct block_desc *block, int tca } #define FLUSH_CYCLES(sr) \ - if (cycles > 0) { \ + if (cycles > 0) \ emith_sub_r_imm(sr, cycles << 12); \ - cycles = 0; \ - } + else if (cycles < 0) /* may happen after a branch not taken */ \ + emith_add_r_imm(sr, -cycles << 12); \ + cycles = 0; \ static void *dr_get_pc_base(u32 pc, SH2 *sh2); static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free); @@ -3960,10 +3962,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if DIV_OPTIMIZER if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { // divide 32/16 - tmp = rcache_get_tmp_arg(1); - emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); rcache_get_reg_arg(0, div(opd).rn, NULL); rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp = rcache_get_tmp_arg(1); + emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); rcache_invalidate_tmp(); emith_abicall(sh2_drc_divu32); tmp = rcache_get_tmp_ret(); @@ -3979,16 +3981,17 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_r(sr, sr, tmp3); // T rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { // divide 64/32 tmp4 = rcache_get_reg(div(opd).ro, RC_GR_READ, NULL); emith_ctx_write(tmp4, offsetof(SH2, drc_tmp)); rcache_free(tmp4); - tmp = rcache_get_tmp_arg(1); - emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); rcache_get_reg_arg(0, div(opd).rn, NULL); rcache_get_reg_arg(2, div(opd).rm, NULL); + tmp = rcache_get_tmp_arg(1); + emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); rcache_invalidate_tmp(); emith_abicall(sh2_drc_divu64); tmp = rcache_get_tmp_ret(); @@ -4004,6 +4007,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } #endif break; @@ -4085,13 +4089,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) #if DIV_OPTIMIZER if (div(opd).div1 == 16 && div(opd).ro == div(opd).rn) { // divide 32/16 - tmp = rcache_get_tmp_arg(1); - emith_add_r_r_ptr_imm(tmp, CONTEXT_REG, offsetof(SH2, drc_tmp)); - rcache_get_reg_arg(0, div(opd).rn, NULL); + tmp = rcache_get_reg_arg(0, div(opd).rn, NULL); tmp2 = rcache_get_reg_arg(2, div(opd).rm, NULL); - tmp3 = rcache_get_tmp(); + tmp3 = rcache_get_tmp_arg(1); emith_lsr(tmp3, tmp2, 31); emith_or_r_r_lsl(sr, tmp3, M_SHIFT); // M = Rm[31] + emith_add_r_r_ptr_imm(tmp3, CONTEXT_REG, offsetof(SH2, drc_tmp)); rcache_invalidate_tmp(); emith_abicall(sh2_drc_divs32); tmp = rcache_get_tmp_ret(); @@ -4108,6 +4111,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_r(sr, sr, tmp3); // T rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } else if (div(opd).div1 == 32 && div(opd).ro != div(opd).rn) { // divide 64/32 @@ -4138,6 +4142,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) emith_or_r_r_lsl(sr, tmp3, Q_SHIFT); // Q = !Ro[0]^M rcache_free_tmp(tmp3); skip_op = div(opd).div1 + div(opd).rotcl; + cycles += skip_op; } else #endif { @@ -5113,7 +5118,7 @@ end_op: emith_move_r_imm_s8_patch(rtsadd, tcache_ptr - (u8 *)rtsret); #endif - // branch not taken, correct cycle count + // branch not taken, correct cycle count (now, cycles < 0) if (ctaken) cycles -= ctaken; // set T bit to reflect branch not taken for OP_BRANCH_CT/CF @@ -5243,10 +5248,6 @@ end_op: printf("~~~\n"); */ -#if (DRC_DEBUG) - fflush(stdout); -#endif - return block_entry_ptr; } @@ -5675,8 +5676,9 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, int free) a += rest, len -= rest; } while (len > 0); - if (!removed && len <= 4) { - dbg(2, "rm_blocks called @%08x, no work?", _a); + if (!removed) { + if (len <= 4) + dbg(2, "rm_blocks called @%08x, no work?", _a); return; } @@ -5984,7 +5986,6 @@ int sh2_drc_init(SH2 *sh2) // disasm the utils tcache_dsm_ptrs[0] = tcache; do_host_disasm(0); - fflush(stdout); #endif #if (DRC_DEBUG & 1) hash_collisions = 0; diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index 65f4757e..2c2ea406 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -1,7 +1,7 @@ #include "../sh2.h" #ifdef DRC_CMP -#include "../compiler.c" +#include "../compiler.h" #define BUSY_LOOP_HACKS 0 #else #define BUSY_LOOP_HACKS 1 diff --git a/pico/32x/sh2soc.c b/pico/32x/sh2soc.c index fb1cf1e9..6b122eac 100644 --- a/pico/32x/sh2soc.c +++ b/pico/32x/sh2soc.c @@ -435,24 +435,25 @@ void REGPARM(3) sh2_peripheral_write32(u32 a, u32 d, SH2 *sh2) old = r[a / 4]; r[a / 4] = d; + // TODO: DRC doesn't correctly extend 'd' parameter register to 64bit :-/ switch (a) { // division unit (TODO: verify): case 0x104: // DVDNT: divident L, starts divide elprintf_sh2(sh2, EL_32XP, "divide %08x / %08x", - d, r[0x100 / 4]); + r[0x104 / 4], r[0x100 / 4]); if (r[0x100 / 4]) { signed int divisor = r[0x100 / 4]; - r[0x118 / 4] = r[0x110 / 4] = (signed int)d % divisor; - r[0x104 / 4] = r[0x11c / 4] = r[0x114 / 4] = (signed int)d / divisor; + r[0x118 / 4] = r[0x110 / 4] = (signed int)r[0x104 / 4] % divisor; + r[0x104 / 4] = r[0x11c / 4] = r[0x114 / 4] = (signed int)r[0x104 / 4] / divisor; } else r[0x110 / 4] = r[0x114 / 4] = r[0x118 / 4] = r[0x11c / 4] = 0; // ? break; case 0x114: elprintf_sh2(sh2, EL_32XP, "divide %08x%08x / %08x @%08x", - r[0x110 / 4], d, r[0x100 / 4], sh2_pc(sh2)); + r[0x110 / 4], r[0x114 / 4], r[0x100 / 4], sh2_pc(sh2)); if (r[0x100 / 4]) { - signed long long divident = (signed long long)r[0x110 / 4] << 32 | d; + signed long long divident = (signed long long)r[0x110 / 4] << 32 | r[0x114 / 4]; signed int divisor = r[0x100 / 4]; // XXX: undocumented mirroring to 0x118,0x11c? r[0x118 / 4] = r[0x110 / 4] = divident % divisor; diff --git a/platform/libretro/libretro.c b/platform/libretro/libretro.c index 62c09103..fa3f7e5a 100644 --- a/platform/libretro/libretro.c +++ b/platform/libretro/libretro.c @@ -2542,7 +2542,7 @@ void retro_init(void) | POPT_EN_MCD_PCM|POPT_EN_MCD_CDDA|POPT_EN_MCD_GFX | POPT_EN_32X|POPT_EN_PWM | POPT_ACC_SPRITES|POPT_DIS_32C_BORDER; -#ifdef __arm__ +#ifdef DRC_SH2 #ifdef _3DS if (ctr_svchack_successful) #endif diff --git a/platform/linux/emu.c b/platform/linux/emu.c index 5d65ad5e..7417ca09 100644 --- a/platform/linux/emu.c +++ b/platform/linux/emu.c @@ -36,7 +36,7 @@ void pemu_prep_defconfig(void) void pemu_validate_config(void) { -#if !defined(__arm__) && !defined(__aarch64__) && !defined(__mips__) && !defined(__riscv__) && !defined(__riscv) && !defined(__powerpc__) && !defined(__ppc__) && !defined(__PPC__) && !defined(__i386__) && !defined(__x86_64__) +#if !defined(DRC_SH2) PicoIn.opt &= ~POPT_EN_DRC; #endif } -- 2.39.5