From aa4c4cb951d3ec16975d2d546c3cb3bbb56e94d2 Mon Sep 17 00:00:00 2001 From: kub Date: Fri, 26 Apr 2019 18:53:21 +0200 Subject: [PATCH] sh2 drc, make B/W read functions signed (reduces generated code size) --- cpu/drc/emit_arm.c | 44 ++++++++++++++++++++++++++++-------------- cpu/drc/emit_x86.c | 22 ++++++++++++++++++--- cpu/sh2/compiler.c | 34 ++++++++++++++------------------ cpu/sh2/compiler.h | 4 ++-- cpu/sh2/mame/sh2.c | 10 +++++----- cpu/sh2/mame/sh2pico.c | 8 ++++---- pico/32x/memory.c | 19 +++++++++--------- 7 files changed, 83 insertions(+), 58 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index d8674a03..586f0a54 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -795,6 +795,8 @@ static inline void emith_pool_check(void) emith_read_r_r_offs_c(cond, r, rs, offs) #define emith_read_r_r_r_c(cond, r, rs, rm) \ EOP_LDR_REG_LSL(cond, r, rs, rm, 0) +#define emith_read_r_r_offs(r, rs, offs) \ + emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read_r_r_r(r, rs, rm) \ EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) @@ -802,28 +804,37 @@ static inline void emith_pool_check(void) EOP_LDRB_IMM2(cond, r, rs, offs) #define emith_read8_r_r_r_c(cond, r, rs, rm) \ EOP_LDRB_REG_LSL(cond, r, rs, rm, 0) +#define emith_read8_r_r_offs(r, rs, offs) \ + emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read8_r_r_r(r, rs, rm) \ - EOP_LDRB_REG_LSL(A_COND_AL, r, rs, rm, 0) + emith_read8_r_r_r_c(A_COND_AL, r, rs, rm) #define emith_read16_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRH_IMM2(cond, r, rs, offs) #define emith_read16_r_r_r_c(cond, r, rs, rm) \ EOP_LDRH_REG2(cond, r, rs, rm) +#define emith_read16_r_r_offs(r, rs, offs) \ + emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read16_r_r_r(r, rs, rm) \ - EOP_LDRH_REG2(A_COND_AL, r, rs, rm) - -#define emith_read_r_r_offs(r, rs, offs) \ - emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) + emith_read16_r_r_r_c(A_COND_AL, r, rs, rm) +#define emith_read8s_r_r_offs_c(cond, r, rs, offs) \ + EOP_LDRSB_IMM2(cond, r, rs, offs) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRSB_REG2(cond, r, rs, rm) #define emith_read8s_r_r_offs(r, rs, offs) \ - EOP_LDRSB_IMM2(A_COND_AL, r, rs, offs) -#define emith_read8_r_r_offs(r, rs, offs) \ - emith_read8_r_r_offs_c(A_COND_AL, r, rs, offs) - + emith_read8s_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read8s_r_r_r(r, rs, rm) \ + emith_read8s_r_r_r_c(A_COND_AL, r, rs, rm) + +#define emith_read16s_r_r_offs_c(cond, r, rs, offs) \ + EOP_LDRSH_IMM2(cond, r, rs, offs) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + EOP_LDRSH_REG2(cond, r, rs, rm) #define emith_read16s_r_r_offs(r, rs, offs) \ - EOP_LDRSH_IMM2(A_COND_AL, r, rs, offs) -#define emith_read16_r_r_offs(r, rs, offs) \ - emith_read16_r_r_offs_c(A_COND_AL, r, rs, offs) + emith_read16s_r_r_offs_c(A_COND_AL, r, rs, offs) +#define emith_read16s_r_r_r(r, rs, rm) \ + emith_read16s_r_r_r_c(A_COND_AL, r, rs, rm) #define emith_write_r_r_offs_c(cond, r, rs, offs) \ EOP_STR_IMM2(cond, r, rs, offs) @@ -945,6 +956,11 @@ static inline void emith_pool_check(void) #define emith_call(target) \ emith_call_cond(A_COND_AL, target) +#define emith_call_reg(r) { \ + emith_move_r_r(14, 15); \ + EOP_C_BX(A_COND_AL, r); \ +} + #define emith_call_ctx(offs) { \ emith_move_r_r(14, 15); \ emith_jump_ctx(offs); \ @@ -1091,9 +1107,7 @@ static inline void emith_pool_check(void) } while (0) /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ -#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ - emith_sext(rn, rn, 16); \ - emith_sext(rm, rm, 16); \ +#define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ emith_tst_r_imm(sr, S); \ EMITH_SJMP2_START(DCOND_NE); \ emith_mula_s64_c(DCOND_EQ, ml, mh, rn, rm); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 1ac4ee01..5805aadd 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -397,8 +397,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_read8_r_r_r_c(cond, r, rs, rm) \ emith_read8_r_r_r(r, rs, rm) +#define emith_read8s_r_r_r_c(cond, r, rs, rm) \ + emith_read8s_r_r_r(r, rs, rm) #define emith_read16_r_r_r_c(cond, r, rs, rm) \ emith_read16_r_r_r(r, rs, rm) +#define emith_read16s_r_r_r_c(cond, r, rs, rm) \ + emith_read16s_r_r_r(r, rs, rm) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) @@ -684,12 +688,24 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) +#define emith_read8s_r_r_r(r, rs, rm) do { \ + EMIT(0x0f, u8); \ + EMIT_OP_MODRM(0xbe, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + #define emith_read16_r_r_r(r, rs, rm) do { \ EMIT(0x0f, u8); \ EMIT_OP_MODRM(0xb7, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) +#define emith_read16s_r_r_r(r, rs, rm) do { \ + EMIT(0x0f, u8); \ + EMIT_OP_MODRM(0xbf, 0, r, 4); \ + EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ +} while (0) + #define emith_read_r_r_r(r, rs, rm) do { \ EMIT_OP_MODRM(0x8b, 0, r, 4); \ EMIT_SIB(0, rs, rm); /* mov r, [rm + rs * 1] */ \ @@ -785,9 +801,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(offs, u32); \ } while (0) -#define emith_push_ret() +#define emith_push_ret() \ + emith_push(xSI); /* to align */ #define emith_pop_and_ret() \ + emith_pop(xSI); \ emith_ret() #define EMITH_JMP_START(cond) { \ @@ -1080,8 +1098,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; /* mh:ml += rn*rm, does saturation if required by S bit. rn, rm must be TEMP */ #define emith_sh2_macw(ml, mh, rn, rm, sr) do { \ - emith_sext(rn, rn, 16); \ - emith_sext(rm, rm, 16); \ emith_tst_r_imm(sr, S); \ EMITH_SJMP_START(DCOND_EQ); \ /* XXX: MACH should be untouched when S is set? */ \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index cd85b373..517be81c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -2354,17 +2354,15 @@ static int emit_memhandler_read_rr(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rs, u32 off hr2 = hr; else #if REMAP_REGISTER - hr2 = rcache_map_reg(rd, hr, size != 2 ? RC_GR_RMW : RC_GR_WRITE); + hr2 = rcache_map_reg(rd, hr, RC_GR_WRITE); #else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); #endif - if (rd != SHR_TMP && size != 2) { // 16, 8 - emith_sext(hr2, hr, size ? 16 : 8); - } else if (hr != hr2) // 32 + if (hr != hr2) { emith_move_r_r(hr2, hr); - if (hr != hr2) rcache_free_tmp(hr); + } return hr2; } @@ -2422,21 +2420,19 @@ static int emit_indirect_indexed_read(SH2 *sh2, sh2_reg_e rd, sh2_reg_e rx, sh2_ hr = emit_memhandler_read(size); size &= MF_SIZEMASK; - if (rd != SHR_TMP) + if (rd == SHR_TMP) + hr2 = hr; + else #if REMAP_REGISTER - hr2 = rcache_map_reg(rd, hr, size != 2 ? RC_GR_RMW : RC_GR_WRITE); + hr2 = rcache_map_reg(rd, hr, RC_GR_WRITE); #else hr2 = rcache_get_reg(rd, RC_GR_WRITE, NULL); #endif - else - hr2 = hr; - if (rd != SHR_TMP && size != 2) { // 16, 8 - emith_sext(hr2, hr, size ? 16 : 8); - } else if (hr != hr2) // 32 + if (hr != hr2) { emith_move_r_r(hr2, hr); - if (hr != hr2) rcache_free_tmp(hr); + } return hr2; } @@ -2991,16 +2987,14 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tmp2 = emit_memhandler_read(opd->size); #if REMAP_REGISTER - tmp3 = rcache_map_reg(GET_Rn(), tmp2, opd->size != 2 ? RC_GR_RMW : RC_GR_WRITE); + tmp3 = rcache_map_reg(GET_Rn(), tmp2, RC_GR_WRITE); #else tmp3 = rcache_get_reg(GET_Rn(), RC_GR_WRITE, NULL); #endif - if (opd->size != 2) { - emith_sext(tmp3, tmp2, 16); - } else if (tmp3 != tmp2) + if (tmp3 != tmp2) { emith_move_r_r(tmp3, tmp2); - if (tmp3 != tmp2) rcache_free_tmp(tmp2); + } } goto end_op; @@ -4025,7 +4019,7 @@ static void sh2_generate_utils(void) EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); emith_eor_r_imm_c(DCOND_CC, arg0, 1); - emith_read8_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_read8s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); @@ -4037,7 +4031,7 @@ static void sh2_generate_utils(void) emith_sh2_rcall(arg0, arg1, arg2, arg3); EMITH_SJMP_START(DCOND_CS); emith_and_r_r_c(DCOND_CC, arg0, arg3); - emith_read16_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); + emith_read16s_r_r_r_c(DCOND_CC, RET_REG, arg0, arg2); emith_ret_c(DCOND_CC); EMITH_SJMP_END(DCOND_CS); emith_move_r_r_ptr(arg1, CONTEXT_REG); diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 07e76cca..d5cde520 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -44,10 +44,10 @@ unsigned short scan_block(unsigned int base_pc, int is_slave, #define _DRC_DECLARE_SR(SR) __DRC_DECLARE_SR(SR) #define DRC_DECLARE_SR _DRC_DECLARE_SR(DRC_SR_REG) #define DRC_SAVE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN)) == SH2_STATE_RUN) \ + if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ sh2->sr = sh2_sr; #define DRC_RESTORE_SR(sh2) \ - if ((sh2->state & (SH2_STATE_RUN)) == SH2_STATE_RUN) \ + if ((sh2->state & (SH2_STATE_RUN|SH2_STATE_SLEEP)) == SH2_STATE_RUN) \ sh2_sr = sh2->sr; #else #define DRC_DECLARE_SR diff --git a/cpu/sh2/mame/sh2.c b/cpu/sh2/mame/sh2.c index 2fb964b6..fa49153a 100644 --- a/cpu/sh2/mame/sh2.c +++ b/cpu/sh2/mame/sh2.c @@ -372,7 +372,7 @@ INLINE void BRA(sh2_state *sh2, UINT32 d) #if BUSY_LOOP_HACKS if (disp == -2) { - UINT32 next_opcode = RW( sh2, sh2->ppc & AM ); + UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); /* BRA $ * NOP */ @@ -802,7 +802,7 @@ INLINE void DT(sh2_state *sh2, UINT32 n) sh2->sr &= ~T; #if BUSY_LOOP_HACKS { - UINT32 next_opcode = RW( sh2, sh2->ppc & AM ); + UINT32 next_opcode = (UINT32)(UINT16)RW( sh2, sh2->ppc & AM ); /* DT Rn * BF $-2 */ @@ -1049,12 +1049,12 @@ INLINE void MAC_W(sh2_state *sh2, UINT32 m, UINT32 n) INT32 tempm, tempn, dest, src, ans; UINT32 templ; - tempn = (INT32) RW( sh2, sh2->r[n] ); + tempn = (INT32)(INT16) RW( sh2, sh2->r[n] ); sh2->r[n] += 2; - tempm = (INT32) RW( sh2, sh2->r[m] ); + tempm = (INT32)(INT16) RW( sh2, sh2->r[m] ); sh2->r[m] += 2; templ = sh2->macl; - tempm = ((INT32) (short) tempn * (INT32) (short) tempm); + tempm = (tempn * tempm); if ((INT32) sh2->macl >= 0) dest = 0; else diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index f9d30d77..467b2adc 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -121,7 +121,7 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->delay) { sh2->ppc = sh2->delay; - opcode = RW(sh2, sh2->delay); + opcode = (UINT32)(UINT16)RW(sh2, sh2->delay); // TODO: more branch types if ((opcode >> 13) == 5) { // BRA/BSR @@ -139,7 +139,7 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) else { sh2->ppc = sh2->pc; - opcode = RW(sh2, sh2->pc); + opcode = (UINT32)(UINT16)RW(sh2, sh2->pc); } sh2->delay = 0; @@ -232,13 +232,13 @@ int sh2_execute_interpreter(SH2 *sh2, int cycles) if (sh2->delay) { sh2->ppc = sh2->delay; - opcode = RW(sh2, sh2->delay); + opcode = (UINT32)(UINT16)RW(sh2, sh2->delay); sh2->pc -= 2; } else { sh2->ppc = sh2->pc; - opcode = RW(sh2, sh2->pc); + opcode = (UINT32)(UINT16)RW(sh2, sh2->pc); } sh2->delay = 0; diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 6a3b2222..8a4b5365 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1279,19 +1279,19 @@ out: elprintf_sh2(sh2, EL_32X, "r8 [%08x] %02x @%06x", a, d, sh2_pc(sh2)); DRC_RESTORE_SR(sh2); - return d; + return (s8)d; } static u32 REGPARM(2) sh2_read8_da(u32 a, SH2 *sh2) { - return sh2->data_array[(a & 0xfff) ^ 1]; + return (s8)sh2->data_array[(a & 0xfff) ^ 1]; } // for ssf2 static u32 REGPARM(2) sh2_read8_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - u8 *p = sh2->p_rom; + s8 *p = sh2->p_rom; return p[(bank + (a & 0x7ffff)) ^ 1]; } @@ -1340,18 +1340,18 @@ out: a, d, sh2_pc(sh2)); out_noprint: DRC_RESTORE_SR(sh2); - return d; + return (s16)d; } static u32 REGPARM(2) sh2_read16_da(u32 a, SH2 *sh2) { - return ((u16 *)sh2->data_array)[(a & 0xffe) / 2]; + return ((s16 *)sh2->data_array)[(a & 0xffe) / 2]; } static u32 REGPARM(2) sh2_read16_rom(u32 a, SH2 *sh2) { u32 bank = carthw_ssf2_banks[(a >> 19) & 7] << 19; - u16 *p = sh2->p_rom; + s16 *p = sh2->p_rom; return p[(bank + (a & 0x7fffe)) / 2]; } @@ -1364,7 +1364,8 @@ static u32 REGPARM(2) sh2_read32_unmapped(u32 a, SH2 *sh2) static u32 REGPARM(2) sh2_read32_cs0(u32 a, SH2 *sh2) { - return (sh2_read16_cs0(a, sh2) << 16) | sh2_read16_cs0(a + 2, sh2); + u32 d1 = sh2_read16_cs0(a, sh2) << 16, d2 = sh2_read16_cs0(a + 2, sh2) << 16; + return d1 | (d2 >> 16); } static u32 REGPARM(2) sh2_read32_da(u32 a, SH2 *sh2) @@ -1631,7 +1632,7 @@ u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2); else - return *(u8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); + return *(s8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); } u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) @@ -1644,7 +1645,7 @@ u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2); else - return *(u16 *)((p << 1) + (a & sh2_map->mask)); + return *(s16 *)((p << 1) + (a & sh2_map->mask)); } u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) -- 2.39.2