From 36614252d942e2dedb856895396441665647b831 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 19 Sep 2019 22:14:28 +0200 Subject: [PATCH] sh2 drc: improved RTS call stack cache --- cpu/drc/emit_arm.c | 26 +++------- cpu/drc/emit_arm64.c | 37 ++++---------- cpu/drc/emit_mips.c | 36 ++++---------- cpu/drc/emit_x86.c | 116 +++++++++++++++++++------------------------ cpu/sh2/compiler.c | 102 +++++++++++++++++++------------------ pico/32x/pwm.c | 2 +- 6 files changed, 130 insertions(+), 189 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 66a5b065..71a10922 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -1000,10 +1000,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_read_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_read_r_r_r(r, rs, rm) \ EOP_LDR_REG_LSL(A_COND_AL, r, rs, rm, 0) -#define emith_read_r_r_r_wb(r, rs, rm) \ - EOP_LDR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) \ - emith_read_r_r_r_wb(r, rs, rm) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ EOP_LDRB_IMM2(cond, r, rs, offs) @@ -1049,10 +1045,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) #define emith_write_r_r_offs_ptr(r, rs, offs) \ emith_write_r_r_offs_c(A_COND_AL, r, rs, offs) -#define emith_write_r_r_r_wb(r, rs, rm) \ - EOP_STR_REG_LSL_WB(A_COND_AL, r, rs, rm, 0) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) \ - emith_write_r_r_r_wb(r, rs, rm) #define emith_ctx_read_c(cond, r, offs) \ emith_read_r_r_offs_c(cond, r, CONTEXT_REG, offs) @@ -1133,21 +1125,21 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 4 #define emith_jump_cond(cond, target) \ emith_xbranch(cond, target, 0) +#define emith_jump_cond_inrange(target) !0 #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = ptr; \ u32 val_ = (u32 *)(target) - ptr_ - 2; \ *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ - (u8 *)ptr; \ -}) - -#define emith_jump_cond_inrange(target) !0 + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ +} while (0) #define emith_jump_patch_size() 4 #define emith_jump_at(ptr, target) do { \ @@ -1184,11 +1176,6 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) emith_jump_ctx(offs); \ } while (0) -#define emith_call_link(r, target) do { \ - emith_move_r_r(r, PC); \ - emith_jump(target); \ -} while (0) - #define emith_call_cleanup() /**/ #define emith_ret_c(cond) \ @@ -1200,6 +1187,9 @@ static inline void emith_pool_adjust(int pool_index, int move_offs) #define emith_ret_to_ctx(offs) \ emith_ctx_write(LR, offs) +#define emith_add_r_ret_imm(r, imm) \ + emith_add_r_r_ptr_imm(r, LR, imm) + /* pushes r12 for eabi alignment */ #define emith_push_ret(r) do { \ int r_ = (r >= 0 ? r : 12); \ diff --git a/cpu/drc/emit_arm64.c b/cpu/drc/emit_arm64.c index 8ce2ef38..72f53dd5 100644 --- a/cpu/drc/emit_arm64.c +++ b/cpu/drc/emit_arm64.c @@ -865,15 +865,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_read_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_read_r_r_r_wb(r, rs, rm) do { \ - emith_read_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) - #define emith_read8_r_r_offs(r, rs, offs) \ emith_ldst_offs(AM_B, r, rs, offs, LT_LD, AM_IDX) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ @@ -935,15 +926,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_write_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_write_r_r_r_wb(r, rs, rm) do { \ - emith_write_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) - #define emith_ctx_read_ptr(r, offs) \ emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) @@ -1031,6 +1013,7 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 4 #define emith_jump_cond(cond, target) \ emith_bcond(tcache_ptr, 0, cond, target) @@ -1039,9 +1022,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) emith_bcond(tcache_ptr, 1, cond, target) #define emith_jump_cond_inrange(target) \ - !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 22) + !(((u8 *)target - (u8 *)tcache_ptr + 0x100000) >> 21) -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr; \ u32 disp_ = (u8 *)(target) - (u8 *)(ptr_); \ int cond_ = ptr_[0] & 0xf; \ @@ -1051,8 +1034,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) } else if (ptr_[0] & 0x80000000) \ EMIT_PTR(ptr_, A64_BL((disp_) & 0x0fffffff)); \ else EMIT_PTR(ptr_, A64_B((disp_) & 0x0fffffff)); \ - (u8 *)ptr; \ -}) + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ +} while (0) +#define emith_jump_patch_size() 8 #define emith_jump_reg(r) \ EMIT(A64_BR(r)) @@ -1085,11 +1069,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) rcache_free_tmp(_t); \ } while (0) -#define emith_call_link(r, target) do { \ - EMIT(A64_ADRXLIT_IMM(r, 8)); \ - emith_jump(target); \ -} while (0) - #define emith_call_cleanup() /**/ #define emith_ret() \ @@ -1100,6 +1079,9 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_ret_to_ctx(offs) \ emith_ctx_write_ptr(LR, offs) +#define emith_add_r_ret_imm(r, imm) \ + emith_add_r_r_ptr_imm(r, LR, imm) + // NB: pushes r or r18 for SP hardware alignment #define emith_push_ret(r) do { \ int r_ = (r >= 0 ? r : 18); \ @@ -1120,7 +1102,6 @@ static void emith_ldst_offs(int sz, int rd, int rn, int o9, int ld, int mode) #define emith_flush() /**/ #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ -#define emith_jump_patch_size() 8 #define emith_rw_offs_max() 0xff diff --git a/cpu/drc/emit_mips.c b/cpu/drc/emit_mips.c index 0e85f92a..6ff134d9 100644 --- a/cpu/drc/emit_mips.c +++ b/cpu/drc/emit_mips.c @@ -950,13 +950,6 @@ static void emith_lohi_nops(void) #define emith_read_r_r_r_c(cond, r, rs, rm) \ emith_read_r_r_r(r, rs, rm) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_add_r_r_r(rs, rs, rm); \ - EMIT(MIPS_LW(r, rs, 0)); \ -} while (0) -#define emith_read_r_r_r_wb(r, rs, rm) \ - emith_read_r_r_r_ptr_wb(r, rs, rm) - #define emith_read8_r_r_offs(r, rs, offs) \ EMIT(MIPS_LBU(r, rs, offs)) #define emith_read8_r_r_offs_c(cond, r, rs, offs) \ @@ -1028,13 +1021,6 @@ static void emith_lohi_nops(void) #define emith_write_r_r_r_c(cond, r, rs, rm) \ emith_write_r_r_r(r, rs, rm) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_add_r_r_r(rs, rs, rm); \ - EMIT(MIPS_SW(r, rs, 0)); \ -} while (0) -#define emith_write_r_r_r_wb(r, rs, rm) \ - emith_write_r_r_r_ptr_wb(r, rs, rm) - #define emith_ctx_read_ptr(r, offs) \ emith_read_r_r_offs_ptr(r, CONTEXT_REG, offs) @@ -1176,6 +1162,7 @@ static int emith_cond_check(int cond, int *r) emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)) #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 8 /* J+delayslot */ // NB: MIPS conditional branches have only +/- 128KB range #define emith_jump_cond(cond, target) do { \ @@ -1190,6 +1177,8 @@ static int emith_cond_check(int cond, int *r) EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ } \ } while (0) +#define emith_jump_cond_inrange(target) \ + !(((u8 *)target - (u8 *)tcache_ptr + 0x20000) >> 18) #define emith_jump_cond_patchable(cond, target) do { \ int r_, mcond_ = emith_cond_check(cond, &r_); \ @@ -1199,16 +1188,14 @@ static int emith_cond_check(int cond, int *r) EMIT_PTR(bp, MIPS_BCONDZ(mcond_, r_, (u8 *)tcache_ptr-bp-4)); \ } while (0) -#define emith_jump_cond_inrange(target) \ - !(((u8 *)target - (u8 *)tcache_ptr + 0x10000) >> 18) - // NB: returns position of patch for cache maintenance -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 *ptr_ = (u32 *)ptr-1; /* must skip condition check code */ \ while ((ptr_[0] & 0xf8000000) != OP_J << 26) ptr_ ++; \ EMIT_PTR(ptr_, MIPS_J((uintptr_t)target & 0x0fffffff)); \ - (u8 *)(ptr_-1); \ -}) + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)(ptr_-1); \ +} while (0) +#define emith_jump_patch_size() 4 #define emith_jump_reg(r) \ emith_branch(MIPS_JR(r)) @@ -1235,11 +1222,6 @@ static int emith_cond_check(int cond, int *r) emith_call_reg(AT); \ } while (0) -#define emith_call_link(r, target) do { \ - EMIT(MIPS_BL(4)); EMIT(MIPS_ADD_IMM(r, LR, 8)); emith_flush(); \ - emith_branch(MIPS_J((uintptr_t)target & 0x0fffffff)); \ -} while (0) - #define emith_call_cleanup() /**/ #define emith_ret() \ @@ -1250,6 +1232,9 @@ static int emith_cond_check(int cond, int *r) #define emith_ret_to_ctx(offs) \ emith_ctx_write_ptr(LR, offs) +#define emith_add_r_ret_imm(r, imm) \ + emith_add_r_r_ptr_imm(r, LR, imm) + // NB: ABI SP alignment is 8 for compatibility with MIPS IV #define emith_push_ret(r) do { \ emith_sub_r_imm(SP, 8+16); /* reserve new arg save area (16) */ \ @@ -1271,7 +1256,6 @@ static int emith_cond_check(int cond, int *r) // NB: mips32r2 has SYNCI #define host_instructions_updated(base, end) __builtin___clear_cache(base, end) #define emith_update_cache() /**/ -#define emith_jump_patch_size() 4 #define emith_rw_offs_max() 0x7fff // SH2 drc specific diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index caade3a6..d8b3a2dd 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -297,54 +297,61 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common // _r_r_r_shift #define emith_add_r_r_r_lsl(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_add_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r(d, s1, s2); \ } while (0) #define emith_add_r_r_r_lsl_ptr(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_add_r_r_r_ptr(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_add_r_r_r_ptr(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r_ptr(d, s1, s2); \ } while (0) #define emith_add_r_r_r_lsr(d, s1, s2, lsrimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsr(tmp_, s2, lsrimm); \ - emith_add_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s2, lsrimm); \ + emith_add_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_add_r_r_r(d, s1, s2); \ } while (0) #define emith_sub_r_r_r_lsl(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_sub_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_sub_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_sub_r_r_r(d, s1, s2); \ } while (0) #define emith_or_r_r_r_lsl(d, s1, s2, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s2, lslimm); \ - emith_or_r_r_r(d, s1, tmp_); \ - rcache_free_tmp(tmp_); \ + if (lslimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsl(tmp_, s2, lslimm); \ + emith_or_r_r_r(d, s1, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_or_r_r_r(d, s1, s2); \ } while (0) // _r_r_shift -#define emith_or_r_r_lsl(d, s, lslimm) do { \ - int tmp_ = rcache_get_tmp(); \ - emith_lsl(tmp_, s, lslimm); \ - emith_or_r_r(d, tmp_); \ - rcache_free_tmp(tmp_); \ -} while (0) +#define emith_or_r_r_lsl(d, s, lslimm) \ + emith_or_r_r_r_lsl(d, d, s, lslimm) -// d != s #define emith_eor_r_r_lsr(d, s, lsrimm) do { \ - emith_push(s); \ - emith_lsr(s, s, lsrimm); \ - emith_eor_r_r(d, s); \ - emith_pop(s); \ + if (lsrimm) { \ + int tmp_ = rcache_get_tmp(); \ + emith_lsr(tmp_, s, lsrimm); \ + emith_eor_r_r(d, tmp_); \ + rcache_free_tmp(tmp_); \ + } else emith_eor_r_r(d, s); \ } while (0) // _r_imm @@ -792,14 +799,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMIT_OP_MODRM64(0x8b, 0, r, 4); \ EMIT_SIB64(0, rs, rm); /* mov r, [rm + rs * 1] */ \ } while (0) -#define emith_read_r_r_r_wb(r, rs, rm) do { \ - emith_read_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_read_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_read_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) #define emith_write_r_r_r(r, rs, rm) do { \ EMIT_XREX_IF(0, r, rm, rs); \ @@ -811,15 +810,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMIT_OP_MODRM64(0x89, 0, r, 4); \ EMIT_SIB64(0, rs, rm); /* mov [rm + rs * 1], r */ \ } while (0) -#define emith_write_r_r_r_wb(r, rs, rm) do { \ - emith_write_r_r_r(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) -#define emith_write_r_r_r_ptr_wb(r, rs, rm) do { \ - emith_write_r_r_r_ptr(r, rs, rm); \ - emith_add_r_r_ptr(rs, rm); \ -} while (0) - #define emith_ctx_read(r, offs) \ emith_read_r_r_offs(r, CONTEXT_REG, offs) @@ -846,10 +836,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common emith_ctx_write(r_, offs_); \ } while (0) -// assumes EBX is free #define emith_ret_to_ctx(offs) do { \ - emith_pop(xBX); \ - emith_ctx_write(xBX, offs); \ + int tmp_ = rcache_get_tmp(); \ + emith_pop(tmp_); \ + emith_ctx_write(tmp_, offs); \ + rcache_free_tmp(tmp_); \ } while (0) #define emith_jump(ptr) do { \ @@ -860,24 +851,24 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common #define emith_jump_patchable(target) \ emith_jump(target) +#define emith_jump_patchable_size() 5 /* JMP rel32 */ #define emith_jump_cond(cond, ptr) do { \ u32 disp = (u8 *)(ptr) - ((u8 *)tcache_ptr + 6); \ EMIT_OP(0x0f80 | (cond)); \ EMIT(disp, u32); \ } while (0) +#define emith_jump_cond_inrange(ptr) !0 #define emith_jump_cond_patchable(cond, target) \ emith_jump_cond(cond, target) -#define emith_jump_patch(ptr, target) ({ \ +#define emith_jump_patch(ptr, target, pos) do { \ u32 disp_ = (u8 *)(target) - ((u8 *)(ptr) + 4); \ u32 offs_ = (*(u8 *)(ptr) == 0x0f) ? 2 : 1; \ EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ - ptr; \ -}) - -#define emith_jump_cond_inrange(ptr) !0 + if ((void *)(pos) != NULL) *(u8 **)(pos) = (u8 *)ptr; \ +} while (0) #define emith_jump_patch_size() 6 #define emith_jump_at(ptr, target) do { \ @@ -903,20 +894,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI, // x86-64,i386 common EMIT(offs, u32); \ } while (0) -#define emith_call_link(r, target) do { \ - EMIT_OP(0xe8); \ - EMIT(0, u32); /* call pc+0 */ \ - emith_pop(r); \ - emith_add_r_r_ptr_imm(r, r, 13); \ - emith_jump(target); \ -} while (0) - #define emith_call_cleanup() \ emith_add_r_r_ptr_imm(xSP, xSP, sizeof(void *)); // remove return addr #define emith_ret() \ EMIT_OP(0xc3) +#define emith_add_r_ret_imm(r, imm) do { \ + emith_read_r_r_offs_ptr(r, xSP, 0); \ + emith_add_r_r_ptr_imm(r, r, imm); \ +} while (0) + #define emith_jump_reg(r) \ EMIT_OP_MODRM(0xff, 3, 4, r) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index f6fbadaf..ec8554cc 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -40,7 +40,7 @@ #define PROPAGATE_CONSTANTS 1 #define LINK_BRANCHES 1 #define BRANCH_CACHE 1 -#define CALL_STACK 0 +#define CALL_STACK 1 #define ALIAS_REGISTERS 1 #define REMAP_REGISTER 1 #define LOOP_DETECTION 1 @@ -635,7 +635,7 @@ static signed char reg_map_host[HOST_REGS]; static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); static void REGPARM(1) (*sh2_drc_dispatcher)(u32 pc); #if CALL_STACK -static void REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc, uptr host_pr); +static u32 REGPARM(2) (*sh2_drc_dispatcher_call)(u32 pc); static void REGPARM(1) (*sh2_drc_dispatcher_return)(u32 pc); #endif static void REGPARM(1) (*sh2_drc_exit)(u32 pc); @@ -1150,7 +1150,8 @@ static void dr_block_link(struct block_entry *be, struct block_link *bl, int emi bl->jump, bl->target_pc, be->tcache_ptr); if (emit_jump) { - u8 *jump = emith_jump_patch(bl->jump, be->tcache_ptr); + u8 *jump; + emith_jump_patch(bl->jump, be->tcache_ptr, &jump); // only needs sync if patch is possibly crossing cacheline (assume 16 byte) if ((uintptr_t)jump >>4 != ((uintptr_t)jump+emith_jump_patch_size()-1) >>4) host_instructions_updated(jump, jump+emith_jump_patch_size()); @@ -1171,7 +1172,8 @@ static void dr_block_unlink(struct block_link *bl, int emit_jump) if (bl->target) { if (emit_jump) { - u8 *jump = emith_jump_patch(bl->jump, sh2_drc_dispatcher); + u8 *jump; + emith_jump_patch(bl->jump, sh2_drc_dispatcher, &jump); // update cpu caches since the previous jump target doesn't exist anymore host_instructions_updated(jump, jump+emith_jump_patch_size()); } @@ -1381,7 +1383,7 @@ static void rcache_remap_vreg(int x); { d = 1; printf("cache check r=%d c=%d not connected?\n", i, gp->cnst); }\ if ((gp->flags & GRF_CDIRTY) && (gp->vreg != -1 || !(gp->flags & GRF_CONST)))\ { d = 1; printf("cache check r=%d CDIRTY?\n", i); } \ - if (gp->flags & GRF_PINNED) { \ + if (gp->flags & (GRF_STATIC|GRF_PINNED)) { \ if (gp->sreg == -1 || !(cache_regs[gp->sreg].flags & HRF_PINNED))\ { d = 1; printf("cache check r=%d v=%d not pinned?\n", i, gp->vreg); } \ else m &= ~(1 << gp->sreg); \ @@ -4407,7 +4409,7 @@ end_op: } #endif - rcache_unlock_all(); // may lock delay_reg + rcache_unlock_all(); #if LOOP_OPTIMIZER if (target && pinned_loop_pc[pinned_loop_count] == target_pc) { rcache_unpin_all(); @@ -4427,30 +4429,26 @@ end_op: #if CALL_STACK if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { // BSR - tmp = rcache_get_tmp_arg(1); - emith_call_link(tmp, sh2_drc_dispatcher_call); - rcache_free_tmp(tmp); - } else + emith_call(sh2_drc_dispatcher_call); + } #endif - target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); + + target = dr_prepare_ext_branch(block->entryp, target_pc, sh2->is_slave, tcache_id); patchable = 1; } // create branch - if (patchable) { - if (cond != -1) + if (cond != -1) { + if (patchable) emith_jump_cond_patchable(cond, target); - else if (target != NULL) { - rcache_invalidate(); - emith_jump_patchable(target); - } - } else { - if (cond != -1) + else emith_jump_cond(cond, target); - else if (target != NULL) { - rcache_invalidate(); + } else { + rcache_invalidate(); + if (patchable) + emith_jump_patchable(target); + else emith_jump(target); - } } // branch not taken, correct cycle count @@ -4476,14 +4474,14 @@ end_op: rcache_invalidate(); #if CALL_STACK struct op_data *opd_b = (op_flags[i] & OF_DELAY_OP) ? opd-1 : opd; + if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { + // JSR/BSRF + emith_call(sh2_drc_dispatcher_call); + } + if (opd_b->rm == SHR_PR) { // RTS emith_jump(sh2_drc_dispatcher_return); - } else if ((opd_b->dest & BITMASK1(SHR_PR)) && pc+2 < end_pc) { - // JSR/BSRF - tmp = rcache_get_tmp_arg(1); - emith_call_link(tmp, sh2_drc_dispatcher_call); - rcache_free(tmp); } else #endif if (gconst_get(SHR_PC, &target_pc)) { @@ -4544,7 +4542,7 @@ end_op: rcache_flush(); emith_jump(sh2_drc_dispatcher); } - emith_jump_patch(branch_patch_ptr[i], target); + emith_jump_patch(branch_patch_ptr[i], target, NULL); } emith_pool_commit(0); @@ -4713,20 +4711,6 @@ static void sh2_generate_utils(void) emith_sh2_drc_exit(); emith_flush(); -#if CALL_STACK - // sh2_drc_dispatcher_call(u32 pc, uptr host_pr) - sh2_drc_dispatcher_call = (void *)tcache_ptr; - emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_imm(arg2, 2*sizeof(void *)); - emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); - emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_r_ptr_imm(arg3, CONTEXT_REG, offsetof(SH2, rts_cache) + sizeof(void *)); - emith_write_r_r_r_ptr_wb(arg1, arg2, arg3); - emith_ctx_read(arg3, SHR_PR * 4); - emith_write_r_r_offs(arg3, arg2, (s8)-sizeof(void *)); - emith_flush(); - // FALLTHROUGH -#endif // sh2_drc_dispatcher(u32 pc) sh2_drc_dispatcher = (void *)tcache_ptr; emith_ctx_write(arg0, SHR_PC * 4); @@ -4782,35 +4766,49 @@ static void sh2_generate_utils(void) emith_flush(); #if CALL_STACK + // pc = sh2_drc_dispatcher_call(u32 pc) + sh2_drc_dispatcher_call = (void *)tcache_ptr; + emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); + emith_ctx_read(arg1, SHR_PR * 4); + emith_add_r_imm(arg2, 2*sizeof(void *)); + emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); + emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); + emith_add_r_r_r_lsl_ptr(arg2, CONTEXT_REG, arg2, 0); + emith_write_r_r_offs(arg1, arg2, offsetof(SH2, rts_cache)); + emith_add_r_ret_imm(arg1, emith_jump_patchable_size()); // skip jump_patchable for rts host address + emith_write_r_r_offs_ptr(arg1, arg2, offsetof(SH2, rts_cache) + sizeof(void *)); + emith_ret(); + emith_flush(); + // sh2_drc_dispatcher_return(u32 pc) sh2_drc_dispatcher_return = (void *)tcache_ptr; emith_ctx_read(arg2, offsetof(SH2, rts_cache_idx)); - emith_add_r_r_ptr_imm(arg1, CONTEXT_REG, offsetof(SH2, rts_cache)); - emith_read_r_r_r_wb(arg3, arg1, arg2); + emith_add_r_r_r_lsl_ptr(arg1, CONTEXT_REG, arg2, 0); + emith_read_r_r_offs(arg3, arg1, offsetof(SH2, rts_cache)); emith_cmp_r_r(arg0, arg3); #if (DRC_DEBUG & 128) EMITH_SJMP_START(DCOND_EQ); - emith_move_r_ptr_imm(arg2, (uptr)&rcmiss); - emith_read_r_r_offs_c(DCOND_NE, arg1, arg2, 0); + emith_move_r_ptr_imm(arg3, (uptr)&rcmiss); + emith_read_r_r_offs_c(DCOND_NE, arg1, arg3, 0); emith_add_r_imm_c(DCOND_NE, arg1, 1); - emith_write_r_r_offs_c(DCOND_NE, arg1, arg2, 0); + emith_write_r_r_offs_c(DCOND_NE, arg1, arg3, 0); EMITH_SJMP_END(DCOND_EQ); #endif emith_jump_cond(DCOND_NE, sh2_drc_dispatcher); - emith_read_r_r_offs_ptr(arg0, arg1, sizeof(void *)); + emith_read_r_r_offs_ptr(arg0, arg1, offsetof(SH2, rts_cache) + sizeof(void *)); emith_sub_r_imm(arg2, 2*sizeof(void *)); emith_and_r_imm(arg2, (ARRAY_SIZE(sh2s->rts_cache)-1) * 2*sizeof(void *)); emith_ctx_write(arg2, offsetof(SH2, rts_cache_idx)); #if (DRC_DEBUG & 128) - emith_move_r_ptr_imm(arg2, (uptr)&rchit); - emith_read_r_r_offs(arg1, arg2, 0); + emith_move_r_ptr_imm(arg3, (uptr)&rchit); + emith_read_r_r_offs(arg1, arg3, 0); emith_add_r_imm(arg1, 1); - emith_write_r_r_offs(arg1, arg2, 0); + emith_write_r_r_offs(arg1, arg3, 0); #endif emith_jump_reg(arg0); emith_flush(); #endif - + // sh2_drc_test_irq(void) // assumes it's called from main function (may jump to dispatcher) sh2_drc_test_irq = (void *)tcache_ptr; diff --git a/pico/32x/pwm.c b/pico/32x/pwm.c index 1c1ec428..0aa2f586 100644 --- a/pico/32x/pwm.c +++ b/pico/32x/pwm.c @@ -88,7 +88,7 @@ static void consume_fifo_do(SH2 *sh2, unsigned int m68k_cycles, mem->pwm_index[0] = (mem->pwm_index[0]+1) % 4; Pico32x.pwm_p[0]--; pwm.current[0] = convert_sample(fifo_l[mem->pwm_index[0]]); - sum |=pwm.current[0]; + sum |= pwm.current[0]; } if (Pico32x.pwm_p[1] > 0) { mem->pwm_index[1] = (mem->pwm_index[1]+1) % 4; -- 2.39.5