From: notaz Date: Tue, 29 Dec 2009 22:43:10 +0000 (+0000) Subject: 32x: drc: inline dispatcher and irq handling; do write-caused irqs X-Git-Tag: v1.85~203 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=e05b81fc5b3f640496795ced5d893ece4cc51c2d;p=picodrive.git 32x: drc: inline dispatcher and irq handling; do write-caused irqs git-svn-id: file:///home/notaz/opt/svn/PicoDrive@849 be3aeb3a-fb24-0410-a615-afba39da0efa --- diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 8b6af690..6a115e37 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -155,6 +155,10 @@ #define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \ EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | (offset_12)) +#define EOP_C_AM2_REG(cond,u,b,l,rn,rd,shift_imm,shift_op,rm) \ + EMIT(((cond)<<28) | 0x07000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ + ((shift_imm)<<7) | ((shift_op)<<5) | (rm)) + /* addressing mode 3 */ #define EOP_C_AM3(cond,u,r,l,rn,rd,s,h,immed_reg) \ EMIT(((cond)<<28) | 0x01000090 | ((u)<<23) | ((r)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | \ @@ -165,12 +169,16 @@ #define EOP_C_AM3_REG(cond,u,l,rn,rd,s,h,rm) EOP_C_AM3(cond,u,0,l,rn,rd,s,h,rm) /* ldr and str */ +#define EOP_LDR_IMM2(cond,rd,rn,offset_12) EOP_C_AM2_IMM(cond,1,0,1,rn,rd,offset_12) + #define EOP_LDR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,offset_12) #define EOP_LDR_NEGIMM(rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,0,0,1,rn,rd,offset_12) #define EOP_LDR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,1,rn,rd,0) #define EOP_STR_IMM( rd,rn,offset_12) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,offset_12) #define EOP_STR_SIMPLE(rd,rn) EOP_C_AM2_IMM(A_COND_AL,1,0,0,rn,rd,0) +#define EOP_LDR_REG_LSL(cond,rd,rn,rm,shift_imm) EOP_C_AM2_REG(cond,1,0,1,rn,rd,shift_imm,A_AM1_LSL,rm) + #define EOP_LDRH_IMM( rd,rn,offset_8) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,offset_8) #define EOP_LDRH_SIMPLE(rd,rn) EOP_C_AM3_IMM(A_COND_AL,1,1,rn,rd,0,1,0) #define EOP_LDRH_REG( rd,rn,rm) EOP_C_AM3_REG(A_COND_AL,1,1,rn,rd,0,1,rm) @@ -192,8 +200,6 @@ #define EOP_C_BX(cond,rm) \ EMIT(((cond)<<28) | 0x012fff10 | (rm)) -#define EOP_BX(rm) EOP_C_BX(A_COND_AL,rm) - #define EOP_C_B_PTR(ptr,cond,l,signed_immed_24) \ EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | ((l)<<24) | (signed_immed_24)) @@ -232,6 +238,7 @@ #define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm) +// XXX: AND, RSB, *C, MVN will break if 1 insn is not enough static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int imm) { int ror2; @@ -253,10 +260,9 @@ static void emith_op_imm2(int cond, int s, int op, int rd, int rn, unsigned int EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0x0f, v & 0xff); - if (op == A_OP_MOV) { + if (op == A_OP_MOV) op = A_OP_ORR; - rn = rd; - } + rn = rd; } } @@ -461,6 +467,12 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_and_r_r_imm(d, s, imm) \ emith_op_imm2(A_COND_AL, 0, A_OP_AND, d, s, imm) +#define emith_add_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 0, A_OP_ADD, d, s, imm) + +#define emith_sub_r_r_imm(d, s, imm) \ + emith_op_imm2(A_COND_AL, 0, A_OP_SUB, d, s, imm) + #define emith_neg_r_r(d, s) \ EOP_RSB_IMM(d, s, 0, 0) @@ -583,18 +595,12 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_pass_arg_imm(arg, imm) \ emith_move_r_imm(arg, imm) -#define emith_call_cond(cond, target) \ - emith_xbranch(cond, target, 1) +#define emith_jump(target) \ + emith_jump_cond(A_COND_AL, target) #define emith_jump_cond(cond, target) \ emith_xbranch(cond, target, 0) -#define emith_call(target) \ - emith_call_cond(A_COND_AL, target) - -#define emith_jump(target) \ - emith_jump_cond(A_COND_AL, target) - #define emith_jump_patchable(cond) \ emith_jump_cond(cond, 0) @@ -604,8 +610,37 @@ static int emith_xbranch(int cond, void *target, int is_call) *ptr_ = (*ptr_ & 0xff000000) | (val & 0x00ffffff); \ } while (0) +#define emith_jump_reg_c(cond, r) \ + EOP_C_BX(cond, r) + #define emith_jump_reg(r) \ - EOP_BX(r) + emith_jump_reg_c(A_COND_AL, r) + +#define emith_jump_ctx_c(cond, offs) \ + EOP_LDR_IMM2(cond,15,CONTEXT_REG,offs) + +#define emith_jump_ctx(offs) \ + emith_jump_ctx_c(A_COND_AL, offs) + +#define emith_call_cond(cond, target) \ + emith_xbranch(cond, target, 1) + +#define emith_call(target) \ + emith_call_cond(A_COND_AL, target) + +#define emith_call_ctx(offs) { \ + emith_move_r_r(14, 15); \ + emith_jump_ctx(offs); \ +} + +#define emith_ret_c(cond) \ + emith_jump_reg_c(cond, 14) + +#define emith_ret() \ + emith_ret_c(A_COND_AL) + +#define emith_ret_to_ctx(offs) \ + emith_ctx_write(14, offs) /* SH2 drc specific */ #define emith_sh2_drc_entry() \ @@ -614,6 +649,18 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_sh2_drc_exit() \ EOP_LDMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R15M) +#define emith_sh2_wcall(a, tab, ret_ptr) { \ + int val_ = (char *)(ret_ptr) - (char *)tcache_ptr - 2*4; \ + if (val_ >= 0) \ + emith_add_r_r_imm(14, 15, val_); \ + else if (val_ < 0) \ + emith_sub_r_r_imm(14, 15, -val_); \ + emith_lsr(12, a, SH2_WRITE_SHIFT); \ + EOP_LDR_REG_LSL(A_COND_AL,12,tab,12,2); \ + emith_ctx_read(2, offsetof(SH2, is_slave)); \ + emith_jump_reg(12); \ +} + #define emith_sh2_dtbf_loop() { \ int cr, rn; \ int tmp_ = rcache_get_tmp(); \ diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 247d1d6e..02ad79cc 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -239,7 +239,17 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_bic_r_imm(r, imm); \ } +#define emith_jump_reg_c(cond, r) emith_jump_reg(r) +#define emith_jump_ctx_c(cond, offs) emith_jump_ctx(offs) +#define emith_ret_c(cond) emith_ret() + // _r_r_imm +#define emith_add_r_r_imm(d, s, imm) { \ + if (d != s) \ + emith_move_r_r(d, s); \ + emith_add_r_imm(d, imm); \ +} + #define emith_and_r_r_imm(d, s, imm) { \ if (d != s) \ emith_move_r_r(d, s); \ @@ -279,6 +289,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_push(r) \ EMIT_OP(0x50 + (r)) +#define emith_push_imm(imm) { \ + EMIT_OP(0x68); \ + EMIT(imm, u32); \ +} + #define emith_pop(r) \ EMIT_OP(0x58 + (r)) @@ -376,29 +391,41 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_rolcf emith_rolc #define emith_rorcf emith_rorc -// XXX: offs is 8bit only -#define emith_ctx_read(r, offs) do { \ - EMIT_OP_MODRM(0x8b, 1, r, xBP); \ - EMIT(offs, u8); /* mov tmp, [ebp+#offs] */ \ +#define emith_ctx_op(op, r, offs) do { \ + /* mov r <-> [ebp+#offs] */ \ + if ((offs) >= 0x80) { \ + EMIT_OP_MODRM(op, 2, r, xBP); \ + EMIT(offs, u32); \ + } else { \ + EMIT_OP_MODRM(op, 1, r, xBP); \ + EMIT(offs, u8); \ + } \ } while (0) +#define emith_ctx_read(r, offs) \ + emith_ctx_op(0x8b, r, offs) + +#define emith_ctx_write(r, offs) \ + emith_ctx_op(0x89, r, offs) + #define emith_ctx_read_multiple(r, offs, cnt, tmpr) do { \ int r_ = r, offs_ = offs, cnt_ = cnt; \ for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ emith_ctx_read(r_, offs_); \ } while (0) -#define emith_ctx_write(r, offs) do { \ - EMIT_OP_MODRM(0x89, 1, r, xBP); \ - EMIT(offs, u8); /* mov [ebp+#offs], tmp */ \ -} while (0) - #define emith_ctx_write_multiple(r, offs, cnt, tmpr) do { \ int r_ = r, offs_ = offs, cnt_ = cnt; \ for (; cnt_ > 0; r_++, offs_ += 4, cnt_--) \ emith_ctx_write(r_, offs_); \ } while (0) +// assumes EBX is free +#define emith_ret_to_ctx(offs) { \ + emith_pop(xBX); \ + emith_ctx_write(xBX, offs); \ +} + #define emith_jump(ptr) { \ u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 5); \ EMIT_OP(0xe9); \ @@ -429,9 +456,25 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_call_cond(cond, ptr) \ emith_call(ptr) +#define emith_call_reg(r) \ + EMIT_OP_MODRM(0xff, 3, 2, r) + +#define emith_call_ctx(offs) { \ + EMIT_OP_MODRM(0xff, 2, 2, xBP); \ + EMIT(offs, u32); \ +} + +#define emith_ret() \ + EMIT_OP(0xc3) + #define emith_jump_reg(r) \ EMIT_OP_MODRM(0xff, 3, 4, r) +#define emith_jump_ctx(offs) { \ + EMIT_OP_MODRM(0xff, 2, 4, xBP); \ + EMIT(offs, u32); \ +} + #define EMITH_JMP_START(cond) { \ u8 *cond_ptr; \ JMP8_POS(cond_ptr) @@ -476,7 +519,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_pop(xSI); \ emith_pop(xBP); \ emith_pop(xBX); \ - EMIT_OP(0xc3); /* ret */\ + emith_ret(); \ +} + +// assumes EBX is free temporary +#define emith_sh2_wcall(a, tab, ret_ptr) { \ + int arg2_; \ + host_arg2reg(arg2_, 2); \ + emith_lsr(xBX, a, SH2_WRITE_SHIFT); \ + EMIT_OP_MODRM(0x8b, 0, xBX, 4); \ + EMIT_SIB(2, xBX, tab); /* mov ebx, [tab + ebx * 4] */ \ + emith_ctx_read(arg2_, offsetof(SH2, is_slave)); \ + emith_push_imm((long)(ret_ptr)); \ + emith_jump_reg(xBX); \ } #define emith_sh2_dtbf_loop() { \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 618b497a..ee843641 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -41,6 +41,16 @@ static char sh2dasm_buff[64]; #else #define do_host_disasm(x) #endif + +#if (DRC_DEBUG & 4) +static void REGPARM(3) *sh2_drc_announce_entry(void *block, SH2 *sh2, u32 sr) +{ + if (block != NULL) + dbg(4, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm', + sh2->pc, block, (signed int)sr >> 12); + return block; +} +#endif // } debug #define BLOCK_CYCLE_LIMIT 100 @@ -101,7 +111,7 @@ static temp_reg_t reg_temp[] = { { 3, }, }; -#else +#elif defined(__i386__) #include "../drc/emit_x86.c" static const int reg_map_g2h[] = { @@ -121,6 +131,8 @@ static temp_reg_t reg_temp[] = { { xDX, }, }; +#else +#error unsupported arch #endif #define T 0x00000001 @@ -130,6 +142,7 @@ static temp_reg_t reg_temp[] = { #define M 0x00000200 #define T_save 0x00000800 +#define I_SHIFT 4 #define Q_SHIFT 8 #define M_SHIFT 9 @@ -159,12 +172,16 @@ static void **hash_table; #define HASH_FUNC(hash_tab, addr) \ ((block_desc **)(hash_tab))[(addr) & HASH_MASK] -static void REGPARM(2) (*sh2_drc_entry)(const void *block, SH2 *sh2); -static void (*sh2_drc_exit)(void); +static void REGPARM(1) (*sh2_drc_entry)(SH2 *sh2); +static void (*sh2_drc_dispatcher)(void); +static void (*sh2_drc_exit)(void); +static void (*sh2_drc_test_irq)(void); +static void REGPARM(2) (*sh2_drc_write8)(u32 a, u32 d); +static void REGPARM(2) (*sh2_drc_write8_slot)(u32 a, u32 d); +static void REGPARM(2) (*sh2_drc_write16)(u32 a, u32 d); +static void REGPARM(2) (*sh2_drc_write16_slot)(u32 a, u32 d); -// tmp extern void REGPARM(2) sh2_do_op(SH2 *sh2, int opcode); -static void REGPARM(1) sh2_test_irq(SH2 *sh2); static void flush_tcache(int tcid) { @@ -484,9 +501,14 @@ static int emit_memhandler_read(int size) emith_move_r_r(ctxr, CONTEXT_REG); switch (size) { case 0: // 8 + // must writeback cycles for poll detection stuff + if (reg_map_g2h[SHR_SR] != -1) + emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); emith_call(p32x_sh2_read8); break; case 1: // 16 + if (reg_map_g2h[SHR_SR] != -1) + emith_ctx_write(reg_map_g2h[SHR_SR], SHR_SR * 4); emith_call(p32x_sh2_read16); break; case 2: // 32 @@ -498,19 +520,32 @@ static int emit_memhandler_read(int size) return rcache_get_tmp_arg(0); } -static void emit_memhandler_write(int size) +static void emit_memhandler_write(int size, u32 pc, int delay) { int ctxr; host_arg2reg(ctxr, 2); - emith_move_r_r(ctxr, CONTEXT_REG); switch (size) { case 0: // 8 - emith_call(p32x_sh2_write8); + // XXX: consider inlining sh2_drc_write8 + if (delay) { + emith_call(sh2_drc_write8_slot); + } else { + emit_move_r_imm32(SHR_PC, pc); + rcache_clean(); + emith_call(sh2_drc_write8); + } break; case 1: // 16 - emith_call(p32x_sh2_write16); + if (delay) { + emith_call(sh2_drc_write16_slot); + } else { + emit_move_r_imm32(SHR_PC, pc); + rcache_clean(); + emith_call(sh2_drc_write16); + } break; case 2: // 32 + emith_move_r_r(ctxr, CONTEXT_REG); emith_call(p32x_sh2_write32); break; } @@ -528,19 +563,6 @@ static int emit_indirect_indexed_read(int rx, int ry, int size) return emit_memhandler_read(size); } -// tmp_wr -> @(Rx,Ry) -static void emit_indirect_indexed_write(int tmp_wr, int rx, int ry, int size) -{ - int a0, t; - rcache_clean(); - t = rcache_get_tmp_arg(1); - emith_move_r_r(t, tmp_wr); - a0 = rcache_get_reg_arg(0, rx); - t = rcache_get_reg(ry, RC_GR_READ); - emith_add_r_r(a0, t); - emit_memhandler_write(size); -} - // read @Rn, @rm static void emit_indirect_read_double(u32 *rnr, u32 *rmr, int rn, int rm, int size) { @@ -593,27 +615,67 @@ static void emit_do_static_regs(int is_write, int tmpr) } } -static void sh2_generate_utils(void) +static void emit_block_entry(void) { - int ctx, blk, tmp; + int arg0, arg1, arg2; - host_arg2reg(blk, 0); - host_arg2reg(ctx, 1); - host_arg2reg(tmp, 2); + host_arg2reg(arg0, 0); + host_arg2reg(arg1, 1); + host_arg2reg(arg2, 2); - // sh2_drc_entry(void *block, SH2 *sh2) - sh2_drc_entry = (void *)tcache_ptr; - emith_sh2_drc_entry(); - emith_move_r_r(CONTEXT_REG, ctx); // move ctx, arg1 - emit_do_static_regs(0, tmp); - emith_jump_reg(blk); // jump arg0 +#if (DRC_DEBUG & 4) + emith_move_r_r(arg1, CONTEXT_REG); + emith_move_r_r(arg2, rcache_get_reg(SHR_SR, RC_GR_READ)); + emith_call(sh2_drc_announce_entry); + rcache_invalidate(); +#endif + emith_tst_r_r(arg0, arg0); + EMITH_SJMP_START(DCOND_EQ); + emith_jump_reg_c(DCOND_NE, arg0); + EMITH_SJMP_END(DCOND_EQ); +} - // sh2_drc_exit(void) - sh2_drc_exit = (void *)tcache_ptr; - emit_do_static_regs(1, tmp); - emith_sh2_drc_exit(); +static void REGPARM(3) *lookup_block(u32 pc, int is_slave, int *tcache_id) +{ + block_desc *bd = NULL; + void *block = NULL; + *tcache_id = 0; + + // we have full block id tables for data_array and RAM + // BIOS goes to data_array table too + if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) { + int blkid = Pico32xMem->drcblk_da[is_slave][(pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT]; + *tcache_id = 1 + is_slave; + if (blkid & 1) { + bd = &block_tables[*tcache_id][blkid >> 1]; + block = bd->tcache_ptr; + } + } + // RAM + else if ((pc & 0xc6000000) == 0x06000000) { + int blkid = Pico32xMem->drcblk_ram[(pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT]; + if (blkid & 1) { + bd = &block_tables[0][blkid >> 1]; + block = bd->tcache_ptr; + } + } + // ROM + else if ((pc & 0xc6000000) == 0x02000000) { + bd = HASH_FUNC(hash_table, pc); - rcache_invalidate(); + if (bd != NULL) { + if (bd->addr == pc) + block = bd->tcache_ptr; + else + block = dr_find_block(bd, pc); + } + } + +#if (DRC_DEBUG & 1) + if (bd != NULL) + bd->refcount++; +#endif + return block; } #define DELAYED_OP \ @@ -628,6 +690,12 @@ static void sh2_generate_utils(void) drcf.use_saved_t = 1; \ } +#define FLUSH_CYCLES(sr) \ + if (cycles > 0) { \ + emith_sub_r_imm(sr, cycles << 12); \ + cycles = 0; \ + } + #define CHECK_UNHANDLED_BITS(mask) { \ if ((op & (mask)) != 0) \ goto default_; \ @@ -651,7 +719,7 @@ static void sh2_generate_utils(void) #define OP_FLAGS(pc) op_flags[((pc) - base_pc) / 2] #define OF_DELAY_OP (1 << 0) -static void *sh2_translate(SH2 *sh2, int tcache_id) +static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // XXX: maybe use structs instead? void *branch_target_ptr[MAX_LOCAL_BRANCHES]; @@ -768,8 +836,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) int blkid; sr = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_sub_r_imm(sr, cycles << 12); - cycles = 0; + FLUSH_CYCLES(sr); rcache_flush(); do_host_disasm(tcache_id); @@ -863,8 +930,12 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 - tmp = rcache_get_reg(GET_Rm(), RC_GR_READ); - emit_indirect_indexed_write(tmp, SHR_R0, GET_Rn(), op & 3); + rcache_clean(); + tmp = rcache_get_reg_arg(1, GET_Rm()); + tmp2 = rcache_get_reg_arg(0, SHR_R0); + tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ); + emith_add_r_r(tmp2, tmp3); + emit_memhandler_write(op & 3, pc, drcf.delayed_op); goto end_op; case 0x07: // MUL.L Rm,Rn 0000nnnnmmmm0111 @@ -954,9 +1025,8 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) emit_move_r_imm32(SHR_PC, pc - 2); tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); emith_clear_msb(tmp, tmp, 20); // clear cycles - drcf.test_irq = 1; cycles = 1; - break; + goto end_op; case 2: // RTE 0000000000101011 DELAYED_OP; rcache_clean(); @@ -1036,7 +1106,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) tmp = rcache_get_reg_arg(0, GET_Rn()); tmp2 = rcache_get_reg_arg(1, GET_Rm()); emith_add_r_imm(tmp, (op & 0x0f) * 4); - emit_memhandler_write(2); + emit_memhandler_write(2, pc, drcf.delayed_op); goto end_op; case 0x02: @@ -1048,7 +1118,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) rcache_clean(); rcache_get_reg_arg(0, GET_Rn()); rcache_get_reg_arg(1, GET_Rm()); - emit_memhandler_write(op & 3); + emit_memhandler_write(op & 3, pc, drcf.delayed_op); goto end_op; case 0x04: // MOV.B Rm,@–Rn 0010nnnnmmmm0100 case 0x05: // MOV.W Rm,@–Rn 0010nnnnmmmm0101 @@ -1058,7 +1128,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) rcache_clean(); rcache_get_reg_arg(0, GET_Rn()); rcache_get_reg_arg(1, GET_Rm()); - emit_memhandler_write(op & 3); + emit_memhandler_write(op & 3, pc, drcf.delayed_op); goto end_op; case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 sr = rcache_get_reg(SHR_SR, RC_GR_RMW); @@ -1380,8 +1450,8 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) rcache_get_reg_arg(0, GET_Rn()); tmp3 = rcache_get_reg_arg(1, tmp); if (tmp == SHR_SR) - emith_clear_msb(tmp3, tmp3, 20); // reserved bits defined by ISA as 0 - emit_memhandler_write(2); + emith_clear_msb(tmp3, tmp3, 22); // reserved bits defined by ISA as 0 + emit_memhandler_write(2, pc, drcf.delayed_op); goto end_op; case 0x04: case 0x05: @@ -1541,7 +1611,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) emith_move_r_r(tmp2, tmp); rcache_free_tmp(tmp); rcache_get_reg_arg(0, GET_Rn()); - emit_memhandler_write(0); + emit_memhandler_write(0, pc, drcf.delayed_op); cycles += 3; break; default: @@ -1720,7 +1790,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg_arg(1, SHR_R0); tmp3 = (op & 0x100) >> 8; emith_add_r_imm(tmp, (op & 0x0f) << tmp3); - emit_memhandler_write(tmp3); + emit_memhandler_write(tmp3, pc, drcf.delayed_op); goto end_op; case 0x0400: // MOV.B @(disp,Rm),R0 10000100mmmmdddd case 0x0500: // MOV.W @(disp,Rm),R0 10000101mmmmdddd @@ -1824,7 +1894,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) tmp2 = rcache_get_reg_arg(1, SHR_R0); tmp3 = (op & 0x300) >> 8; emith_add_r_imm(tmp, (op & 0xff) << tmp3); - emit_memhandler_write(tmp3); + emit_memhandler_write(tmp3, pc, drcf.delayed_op); goto end_op; case 0x0400: // MOV.B @(disp,GBR),R0 11000100dddddddd case 0x0500: // MOV.W @(disp,GBR),R0 11000101dddddddd @@ -1850,12 +1920,12 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) emith_add_r_imm(tmp, 4); tmp = rcache_get_reg_arg(1, SHR_SR); emith_clear_msb(tmp, tmp, 22); - emit_memhandler_write(2); + emit_memhandler_write(2, pc, drcf.delayed_op); // push PC rcache_get_reg_arg(0, SHR_SP); tmp = rcache_get_tmp_arg(1); emith_move_r_imm(tmp, pc); - emit_memhandler_write(2); + emit_memhandler_write(2, pc, drcf.delayed_op); // obtain new PC tmp = rcache_get_reg_arg(0, SHR_VBR); emith_add_r_imm(tmp, (op & 0xff) * 4); @@ -1918,7 +1988,7 @@ static void *sh2_translate(SH2 *sh2, int tcache_id) tmp3 = rcache_get_reg_arg(0, SHR_GBR); tmp4 = rcache_get_reg(SHR_R0, RC_GR_READ); emith_add_r_r(tmp3, tmp4); - emit_memhandler_write(0); + emit_memhandler_write(0, pc, drcf.delayed_op); cycles += 2; goto end_op; } @@ -1962,8 +2032,7 @@ end_op: if (branch_patch_cond != -1 && drcf.delayed_op != 2) { sr = rcache_get_reg(SHR_SR, RC_GR_RMW); // handle cycles - emith_sub_r_imm(sr, cycles << 12); - cycles = 0; + FLUSH_CYCLES(sr); rcache_clean(); if (drcf.use_saved_t) @@ -1983,8 +2052,16 @@ end_op: } } // test irq? - if (drcf.test_irq && drcf.delayed_op != 2) - break; + // XXX: delay slots.. + if (drcf.test_irq && drcf.delayed_op != 2) { + if (!drcf.delayed_op) + emit_move_r_imm32(SHR_PC, pc); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + FLUSH_CYCLES(sr); + rcache_flush(); + emith_call(sh2_drc_test_irq); + drcf.test_irq = 0; + } if (drcf.delayed_op == 1) break; @@ -1995,19 +2072,13 @@ end_op: if (!drcf.delayed_op) emit_move_r_imm32(SHR_PC, pc); - if (drcf.test_irq) { - rcache_flush(); - emith_pass_arg_r(0, CONTEXT_REG); - emith_call(sh2_test_irq); - } - end_block_btf: this_block->end_addr = pc; tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); - emith_sub_r_imm(tmp, cycles << 12); + FLUSH_CYCLES(tmp); rcache_flush(); - emith_jump(sh2_drc_exit); + emith_jump(sh2_drc_dispatcher); // link local branches for (i = 0; i < branch_patch_count; i++) { @@ -2023,7 +2094,7 @@ end_block_btf: target = tcache_ptr; emit_move_r_imm32(SHR_PC, branch_patch_pc[i]); rcache_flush(); - emith_jump(sh2_drc_exit); + emith_jump(sh2_drc_dispatcher); } emith_jump_patch(branch_patch_ptr[i], target); } @@ -2085,67 +2156,165 @@ unimplemented: */ } -void __attribute__((noinline)) sh2_drc_dispatcher(SH2 *sh2) +static void sh2_generate_utils(void) { - // TODO: need to handle self-caused interrupts - sh2_test_irq(sh2); + int arg0, arg1, arg2, sr, tmp; + void *sh2_drc_write_end, *sh2_drc_write_slot_end; - while (((signed int)sh2->sr >> 12) > 0) - { - void *block = NULL; - block_desc *bd = NULL; - int tcache_id = 0; - - // FIXME: must avoid doing it so often.. - //sh2_test_irq(sh2); - - // we have full block id tables for data_array and RAM - // BIOS goes to data_array table too - if ((sh2->pc & 0xe0000000) == 0xc0000000 || (sh2->pc & ~0xfff) == 0) { - int blkid = Pico32xMem->drcblk_da[sh2->is_slave][(sh2->pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT]; - tcache_id = 1 + sh2->is_slave; - if (blkid & 1) { - bd = &block_tables[tcache_id][blkid >> 1]; - block = bd->tcache_ptr; - } - } - // RAM - else if ((sh2->pc & 0xc6000000) == 0x06000000) { - int blkid = Pico32xMem->drcblk_ram[(sh2->pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT]; - if (blkid & 1) { - bd = &block_tables[tcache_id][blkid >> 1]; - block = bd->tcache_ptr; - } - } - // ROM - else if ((sh2->pc & 0xc6000000) == 0x02000000) { - bd = HASH_FUNC(hash_table, sh2->pc); - - if (bd != NULL) { - if (bd->addr == sh2->pc) - block = bd->tcache_ptr; - else - block = dr_find_block(bd, sh2->pc); - } - } + host_arg2reg(arg0, 0); + host_arg2reg(arg1, 1); + host_arg2reg(arg2, 2); + emith_move_r_r(arg0, arg0); // nop - if (block == NULL) - block = sh2_translate(sh2, tcache_id); - if (block == NULL) { - // sh2_translate failed, possibly tcache overflow, clean up and try again - flush_tcache(tcache_id); - block = sh2_translate(sh2, tcache_id); - } + // sh2_drc_exit(void) + sh2_drc_exit = (void *)tcache_ptr; + emit_do_static_regs(1, arg2); + emith_sh2_drc_exit(); - dbg(4, "= %csh2 enter %08x %p, c=%d", sh2->is_slave ? 's' : 'm', - sh2->pc, block, (signed int)sh2->sr >> 12); -#if (DRC_DEBUG & 1) - if (bd != NULL) - bd->refcount++; + // sh2_drc_dispatcher(void) + sh2_drc_dispatcher = (void *)tcache_ptr; + sr = rcache_get_reg(SHR_SR, RC_GR_READ); + emith_cmp_r_imm(sr, 0); + emith_jump_cond(DCOND_LT, sh2_drc_exit); + rcache_invalidate(); + emith_ctx_read(arg0, SHR_PC * 4); + emith_ctx_read(arg1, offsetof(SH2, is_slave)); + emith_add_r_r_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); + emith_call(lookup_block); + emit_block_entry(); + // lookup failed, call sh2_translate() + emith_move_r_r(arg0, CONTEXT_REG); + emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); // tcache_id + emith_call(sh2_translate); + emit_block_entry(); + // sh2_translate() failed, flush cache and retry + emith_ctx_read(arg0, offsetof(SH2, drc_tmp)); + emith_call(flush_tcache); + emith_move_r_r(arg0, CONTEXT_REG); + emith_ctx_read(arg1, offsetof(SH2, drc_tmp)); + emith_call(sh2_translate); + emit_block_entry(); + // XXX: can't translate, fail + emith_call(exit); + + // sh2_drc_test_irq(void) + // assumes it's called from main function (may jump to dispatcher) + sh2_drc_test_irq = (void *)tcache_ptr; + emith_ctx_read(arg1, offsetof(SH2, pending_level)); + sr = rcache_get_reg(SHR_SR, RC_GR_READ); + emith_lsr(arg0, sr, I_SHIFT); + emith_and_r_imm(arg0, 0x0f); + emith_cmp_r_r(arg1, arg0); // pending_level > ((sr >> 4) & 0x0f)? + EMITH_SJMP_START(DCOND_GT); + emith_ret_c(DCOND_LE); // nope, return + EMITH_SJMP_END(DCOND_GT); + // adjust SP + tmp = rcache_get_reg(SHR_SP, RC_GR_RMW); + emith_sub_r_imm(tmp, 4*2); + rcache_clean(); + // push SR + tmp = rcache_get_reg_arg(0, SHR_SP); + emith_add_r_imm(tmp, 4); + tmp = rcache_get_reg_arg(1, SHR_SR); + emith_clear_msb(tmp, tmp, 22); + emith_move_r_r(arg2, CONTEXT_REG); + emith_call(p32x_sh2_write32); + rcache_invalidate(); + // push PC + rcache_get_reg_arg(0, SHR_SP); + emith_ctx_read(arg1, SHR_PC * 4); + emith_move_r_r(arg2, CONTEXT_REG); + emith_call(p32x_sh2_write32); + rcache_invalidate(); + // update I, cycles, do callback + emith_ctx_read(arg1, offsetof(SH2, pending_level)); + sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_bic_r_imm(sr, I); + emith_or_r_r_lsl(sr, arg1, I_SHIFT); + emith_sub_r_imm(sr, 13 << 12); // at least 13 cycles + rcache_flush(); + emith_move_r_r(arg0, CONTEXT_REG); + emith_call_ctx(offsetof(SH2, irq_callback)); // vector = sh2->irq_callback(sh2, level); + // obtain new PC + emith_lsl(arg0, arg0, 2); + emith_ctx_read(arg1, SHR_VBR * 4); + emith_add_r_r(arg0, arg1); + emit_memhandler_read(2); + emith_ctx_write(arg0, SHR_PC * 4); +#ifdef __i386__ + emith_add_r_imm(xSP, 4); // fix stack +#endif + emith_jump(sh2_drc_dispatcher); + rcache_invalidate(); + + // sh2_drc_entry(SH2 *sh2) + sh2_drc_entry = (void *)tcache_ptr; + emith_sh2_drc_entry(); + emith_move_r_r(CONTEXT_REG, arg0); // move ctx, arg0 + emit_do_static_regs(0, arg2); + emith_call(sh2_drc_test_irq); + emith_jump(sh2_drc_dispatcher); + + // write-caused irq detection + sh2_drc_write_end = tcache_ptr; + emith_tst_r_r(arg0, arg0); + EMITH_SJMP_START(DCOND_NE); + emith_jump_ctx_c(DCOND_EQ, offsetof(SH2, drc_tmp)); // return + EMITH_SJMP_END(DCOND_NE); + // since PC is up to date, jump to it's block instead of returning + emith_call(sh2_drc_test_irq); + emith_jump_ctx(offsetof(SH2, drc_tmp)); + + // write-caused irq detection for writes in delay slot + sh2_drc_write_slot_end = tcache_ptr; + emith_tst_r_r(arg0, arg0); + EMITH_SJMP_START(DCOND_NE); + emith_jump_ctx_c(DCOND_EQ, offsetof(SH2, drc_tmp)); + EMITH_SJMP_END(DCOND_NE); + // just burn cycles to get back to dispatcher after branch is handled + sr = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_ctx_write(sr, offsetof(SH2, irq_cycles)); + emith_clear_msb(sr, sr, 20); // clear cycles + rcache_flush(); + emith_jump_ctx(offsetof(SH2, drc_tmp)); + + // sh2_drc_write8(u32 a, u32 d) + sh2_drc_write8 = (void *)tcache_ptr; + emith_ret_to_ctx(offsetof(SH2, drc_tmp)); + emith_ctx_read(arg2, offsetof(SH2, write8_tab)); + emith_sh2_wcall(arg0, arg2, sh2_drc_write_end); + + // sh2_drc_write16(u32 a, u32 d) + sh2_drc_write16 = (void *)tcache_ptr; + emith_ret_to_ctx(offsetof(SH2, drc_tmp)); + emith_ctx_read(arg2, offsetof(SH2, write16_tab)); + emith_sh2_wcall(arg0, arg2, sh2_drc_write_end); + + // sh2_drc_write8_slot(u32 a, u32 d) + sh2_drc_write8_slot = (void *)tcache_ptr; + emith_ret_to_ctx(offsetof(SH2, drc_tmp)); + emith_ctx_read(arg2, offsetof(SH2, write8_tab)); + emith_sh2_wcall(arg0, arg2, sh2_drc_write_slot_end); + + // sh2_drc_write16_slot(u32 a, u32 d) + sh2_drc_write16_slot = (void *)tcache_ptr; + emith_ret_to_ctx(offsetof(SH2, drc_tmp)); + emith_ctx_read(arg2, offsetof(SH2, write16_tab)); + emith_sh2_wcall(arg0, arg2, sh2_drc_write_slot_end); + + rcache_invalidate(); +#if (DRC_DEBUG & 2) + host_dasm_new_symbol(sh2_drc_entry); + host_dasm_new_symbol(sh2_drc_dispatcher); + host_dasm_new_symbol(sh2_drc_exit); + host_dasm_new_symbol(sh2_drc_test_irq); + host_dasm_new_symbol(sh2_drc_write_end); + host_dasm_new_symbol(sh2_drc_write_slot_end); + host_dasm_new_symbol(sh2_drc_write8); + host_dasm_new_symbol(sh2_drc_write8_slot); + host_dasm_new_symbol(sh2_drc_write16); + host_dasm_new_symbol(sh2_drc_write16_slot); #endif - sh2_drc_entry(block, sh2); - dbg(4, "= leave %p", block); - } } static void sh2_smc_rm_block(u16 *drcblk, u16 *p, block_desc *btab, u32 a) @@ -2193,6 +2362,7 @@ void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) void sh2_execute(SH2 *sh2c, int cycles) { + int ret_cycles; sh2 = sh2c; // XXX sh2c->cycles_aim += cycles; @@ -2203,23 +2373,14 @@ void sh2_execute(SH2 *sh2c, int cycles) // others are usual SH2 flags sh2c->sr &= 0x3f3; sh2c->sr |= cycles << 12; - sh2_drc_dispatcher(sh2c); + sh2_drc_entry(sh2c); - sh2c->cycles_done += cycles - ((signed int)sh2c->sr >> 12); -} + // TODO: irq cycles + ret_cycles = (signed int)sh2c->sr >> 12; + if (ret_cycles > 0) + printf("warning: drc returned with cycles: %d\n", ret_cycles); -static void REGPARM(1) sh2_test_irq(SH2 *sh2) -{ - if (sh2->pending_level > ((sh2->sr >> 4) & 0x0f)) - { - if (sh2->pending_irl > sh2->pending_int_irq) - sh2_do_irq(sh2, sh2->pending_irl, 64 + sh2->pending_irl/2); - else { - sh2_do_irq(sh2, sh2->pending_int_irq, sh2->pending_int_vector); - sh2->pending_int_irq = 0; // auto-clear - sh2->pending_level = sh2->pending_irl; - } - } + sh2c->cycles_done += cycles - ret_cycles; } #if (DRC_DEBUG & 1) diff --git a/cpu/sh2/mame/sh2pico.c b/cpu/sh2/mame/sh2pico.c index 167f8cc2..a65638b4 100644 --- a/cpu/sh2/mame/sh2pico.c +++ b/cpu/sh2/mame/sh2pico.c @@ -77,13 +77,9 @@ void sh2_execute(SH2 *sh2_, int cycles) /* FIXME: Darxide doesn't like this */ if (sh2->test_irq && !sh2->delay && sh2->pending_level > ((sh2->sr >> 4) & 0x0f)) { - if (sh2->pending_irl > sh2->pending_int_irq) - sh2_do_irq(sh2, sh2->pending_irl, 64 + sh2->pending_irl/2); - else { - sh2_do_irq(sh2, sh2->pending_int_irq, sh2->pending_int_vector); - sh2->pending_int_irq = 0; // auto-clear - sh2->pending_level = sh2->pending_irl; - } + int level = sh2->pending_level; + int vector = sh2->irq_callback(sh2, level); + sh2_do_irq(sh2, level, vector); sh2->test_irq = 0; } diff --git a/cpu/sh2/sh2.c b/cpu/sh2/sh2.c index d8d82ed2..f0bd9079 100644 --- a/cpu/sh2/sh2.c +++ b/cpu/sh2/sh2.c @@ -36,8 +36,6 @@ void sh2_reset(SH2 *sh2) void sh2_do_irq(SH2 *sh2, int level, int vector) { - sh2->irq_callback(sh2->is_slave, level); - sh2->r[15] -= 4; p32x_sh2_write32(sh2->r[15], sh2->sr, sh2); /* push SR onto stack */ sh2->r[15] -= 4; diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index 9d6f0d21..074b142e 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -1,6 +1,12 @@ #ifndef __SH2_H__ #define __SH2_H__ +#if !defined(REGPARM) && defined(__i386__) +#define REGPARM(x) __attribute__((regparm(x))) +#else +#define REGPARM(x) +#endif + // registers - matches structure order typedef enum { SHR_R0 = 0, SHR_SP = 15, @@ -8,7 +14,7 @@ typedef enum { SHR_GBR, SHR_VBR, SHR_MACH, SHR_MACL, } sh2_reg_e; -typedef struct +typedef struct SH2_ { unsigned int r[16]; // 00 unsigned int pc; // 40 @@ -26,6 +32,7 @@ typedef struct // drc stuff int drc_tmp; // 70 + int irq_cycles; // interpreter stuff int icount; // cycles left in current timeslice @@ -37,7 +44,7 @@ typedef struct int pending_irl; int pending_int_irq; // internal irq int pending_int_vector; - void (*irq_callback)(int id, int level); + int REGPARM(2) (*irq_callback)(struct SH2_ *sh2, int level); int is_slave; unsigned int cycles_aim; // subtract sh2_icount to get global counter @@ -57,17 +64,11 @@ void sh2_execute(SH2 *sh2, int cycles); // pico memhandlers // XXX: move somewhere else -#if !defined(REGPARM) && defined(__i386__) -#define REGPARM(x) __attribute__((regparm(x))) -#else -#define REGPARM(x) -#endif - unsigned int REGPARM(2) p32x_sh2_read8(unsigned int a, SH2 *sh2); unsigned int REGPARM(2) p32x_sh2_read16(unsigned int a, SH2 *sh2); unsigned int REGPARM(2) p32x_sh2_read32(unsigned int a, SH2 *sh2); -void REGPARM(3) p32x_sh2_write8(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) p32x_sh2_write16(unsigned int a, unsigned int d, SH2 *sh2); -void REGPARM(3) p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); +int REGPARM(3) p32x_sh2_write8 (unsigned int a, unsigned int d, SH2 *sh2); +int REGPARM(3) p32x_sh2_write16(unsigned int a, unsigned int d, SH2 *sh2); +int REGPARM(3) p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); #endif /* __SH2_H__ */ diff --git a/pico/32x/32x.c b/pico/32x/32x.c index ba89d982..323b0b65 100644 --- a/pico/32x/32x.c +++ b/pico/32x/32x.c @@ -4,10 +4,19 @@ struct Pico32x Pico32x; SH2 sh2s[2]; -static void sh2_irq_cb(int id, int level) +static int REGPARM(2) sh2_irq_cb(SH2 *sh2, int level) { - // diagnostic for now - elprintf(EL_32X, "%csh2 ack %d @ %08x", id ? 's' : 'm', level, sh2_pc(id)); + if (sh2->pending_irl > sh2->pending_int_irq) { + elprintf(EL_32X, "%csh2 ack/irl %d @ %08x", + sh2->is_slave ? 's' : 'm', level, sh2->pc); + return 64 + sh2->pending_irl / 2; + } else { + elprintf(EL_32X, "%csh2 ack/int %d/%d @ %08x", + sh2->is_slave ? 's' : 'm', level, sh2->pending_int_vector, sh2->pc); + sh2->pending_int_irq = 0; // auto-clear + sh2->pending_level = sh2->pending_irl; + return sh2->pending_int_vector; + } } void p32x_update_irls(void) diff --git a/pico/32x/memory.c b/pico/32x/memory.c index eaf1e3ad..7104cef6 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -523,7 +523,7 @@ static u32 sh2_peripheral_read32(u32 a, int id) return d; } -static void sh2_peripheral_write8(u32 a, u32 d, int id) +static int REGPARM(3) sh2_peripheral_write8(u32 a, u32 d, int id) { u8 *r = (void *)Pico32xMem->sh2_peri_regs[id]; elprintf(EL_32X, "%csh2 peri w8 [%08x] %02x @%06x", id ? 's' : 'm', a, d, sh2_pc(id)); @@ -540,11 +540,13 @@ static void sh2_peripheral_write8(u32 a, u32 d, int id) int vector = PREG8(oregs, 0x63) & 0x7f; elprintf(EL_32X, "%csh2 SCI recv irq (%d, %d)", (id ^ 1) ? 's' : 'm', level, vector); sh2_internal_irq(&sh2s[id ^ 1], level, vector); + return 1; } } + return 0; } -static void sh2_peripheral_write16(u32 a, u32 d, int id) +static int REGPARM(3) sh2_peripheral_write16(u32 a, u32 d, int id) { u16 *r = (void *)Pico32xMem->sh2_peri_regs[id]; elprintf(EL_32X, "%csh2 peri w16 [%08x] %04x @%06x", id ? 's' : 'm', a, d, sh2_pc(id)); @@ -559,10 +561,11 @@ static void sh2_peripheral_write16(u32 a, u32 d, int id) } if ((d & 0xff00) == 0x5a00) // WTCNT PREG8(r, 0x81) = d; - return; + return 0; } r[(a / 2) ^ 1] = d; + return 0; } static void sh2_peripheral_write32(u32 a, u32 d, int id) @@ -1016,52 +1019,55 @@ static u32 sh2_read16_da(u32 a, int id) return ((u16 *)Pico32xMem->data_array[id])[(a & 0xfff) / 2]; } -static void sh2_write_ignore(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write_ignore(u32 a, u32 d, int id) { + return 0; } // write8 -static void sh2_write8_unmapped(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write8_unmapped(u32 a, u32 d, int id) { elprintf(EL_UIO, "%csh2 unmapped w8 [%08x] %02x @%06x", id ? 's' : 'm', a, d & 0xff, sh2_pc(id)); + return 0; } -static void sh2_write8_cs0(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write8_cs0(u32 a, u32 d, int id) { elprintf(EL_32X, "%csh2 w8 [%08x] %02x @%06x", id ? 's' : 'm', a, d & 0xff, sh2_pc(id)); if ((a & 0x3ff00) == 0x4100) { p32x_vdp_write8(a, d); - return; + return 0; } if ((a & 0x3ff00) == 0x4000) { p32x_sh2reg_write8(a, d, id); - return; + return 1; } - sh2_write8_unmapped(a, d, id); + return sh2_write8_unmapped(a, d, id); } #define sh2_write8_dramN(n) \ if (!(a & 0x20000) || d) { \ u8 *dram = (u8 *)Pico32xMem->dram[n]; \ dram[(a & 0x1ffff) ^ 1] = d; \ - } + } \ + return 0; -static void sh2_write8_dram0(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write8_dram0(u32 a, u32 d, int id) { sh2_write8_dramN(0); } -static void sh2_write8_dram1(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write8_dram1(u32 a, u32 d, int id) { sh2_write8_dramN(1); } -static void sh2_write8_sdram(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write8_sdram(u32 a, u32 d, int id) { u32 a1 = a & 0x3ffff; #ifdef DRC_SH2 @@ -1070,9 +1076,10 @@ static void sh2_write8_sdram(u32 a, u32 d, int id) sh2_drc_wcheck_ram(a, t, id); #endif Pico32xMem->sdram[a1 ^ 1] = d; + return 0; } -static void sh2_write8_da(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write8_da(u32 a, u32 d, int id) { u32 a1 = a & 0xfff; #ifdef DRC_SH2 @@ -1081,16 +1088,18 @@ static void sh2_write8_da(u32 a, u32 d, int id) sh2_drc_wcheck_da(a, t, id); #endif Pico32xMem->data_array[id][a1 ^ 1] = d; + return 0; } // write16 -static void sh2_write16_unmapped(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write16_unmapped(u32 a, u32 d, int id) { elprintf(EL_UIO, "%csh2 unmapped w16 [%08x] %04x @%06x", id ? 's' : 'm', a, d & 0xffff, sh2_pc(id)); + return 0; } -static void sh2_write16_cs0(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write16_cs0(u32 a, u32 d, int id) { if (((EL_LOGMASK & EL_PWM) || (a & 0x30) != 0x30)) // hide PWM elprintf(EL_32X, "%csh2 w16 [%08x] %04x @%06x", @@ -1099,45 +1108,46 @@ static void sh2_write16_cs0(u32 a, u32 d, int id) if ((a & 0x3ff00) == 0x4100) { sh2_poll[id].cnt = 0; // for poll before VDP accesses p32x_vdp_write16(a, d); - return; + return 0; } if ((a & 0x3fe00) == 0x4200) { Pico32xMem->pal[(a & 0x1ff) / 2] = d; Pico32x.dirty_pal = 1; - return; + return 0; } if ((a & 0x3ff00) == 0x4000) { p32x_sh2reg_write16(a, d, id); - return; + return 1; } - sh2_write16_unmapped(a, d, id); + return sh2_write16_unmapped(a, d, id); } #define sh2_write16_dramN(n) \ u16 *pd = &Pico32xMem->dram[n][(a & 0x1ffff) / 2]; \ if (!(a & 0x20000)) { \ *pd = d; \ - return; \ + return 0; \ } \ /* overwrite */ \ if (!(d & 0xff00)) d |= *pd & 0xff00; \ if (!(d & 0x00ff)) d |= *pd & 0x00ff; \ - *pd = d + *pd = d; \ + return 0 -static void sh2_write16_dram0(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write16_dram0(u32 a, u32 d, int id) { sh2_write16_dramN(0); } -static void sh2_write16_dram1(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write16_dram1(u32 a, u32 d, int id) { sh2_write16_dramN(1); } -static void sh2_write16_sdram(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write16_sdram(u32 a, u32 d, int id) { u32 a1 = a & 0x3ffff; #ifdef DRC_SH2 @@ -1146,9 +1156,10 @@ static void sh2_write16_sdram(u32 a, u32 d, int id) sh2_drc_wcheck_ram(a, t, id); #endif ((u16 *)Pico32xMem->sdram)[a1 / 2] = d; + return 0; } -static void sh2_write16_da(u32 a, u32 d, int id) +static int REGPARM(3) sh2_write16_da(u32 a, u32 d, int id) { u32 a1 = a & 0xfff; #ifdef DRC_SH2 @@ -1157,6 +1168,7 @@ static void sh2_write16_da(u32 a, u32 d, int id) sh2_drc_wcheck_da(a, t, id); #endif ((u16 *)Pico32xMem->data_array[id])[a1 / 2] = d; + return 0; } @@ -1165,18 +1177,21 @@ typedef struct { u32 mask; } sh2_memmap; -typedef u32 (sh2_read_handler)(u32 a, int id); -typedef void (sh2_write_handler)(u32 a, u32 d, int id); +typedef u32 (sh2_read_handler)(u32 a, int id); +typedef int REGPARM(3) (sh2_write_handler)(u32 a, u32 d, int id); -#define SH2MAP_ADDR2OFFS(a) \ - (((a >> 25) & 3) | ((a >> 27) & 0x1c)) +#define SH2MAP_ADDR2OFFS_R(a) \ + ((((a) >> 25) & 3) | (((a) >> 27) & 0x1c)) + +#define SH2MAP_ADDR2OFFS_W(a) \ + ((u32)(a) >> SH2_WRITE_SHIFT) u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) { const sh2_memmap *sh2_map = sh2->read8_map; uptr p; - sh2_map += SH2MAP_ADDR2OFFS(a); + sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2->is_slave); @@ -1189,7 +1204,7 @@ u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) const sh2_memmap *sh2_map = sh2->read16_map; uptr p; - sh2_map += SH2MAP_ADDR2OFFS(a); + sh2_map += SH2MAP_ADDR2OFFS_R(a); p = sh2_map->addr; if (map_flag_set(p)) return ((sh2_read_handler *)(p << 1))(a, sh2->is_slave); @@ -1204,7 +1219,7 @@ u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) u32 offs; uptr p; - offs = SH2MAP_ADDR2OFFS(a); + offs = SH2MAP_ADDR2OFFS_R(a); sh2_map += offs; p = sh2_map->addr; if (!map_flag_set(p)) { @@ -1220,40 +1235,42 @@ u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) return (handler(a, sh2->is_slave) << 16) | handler(a + 2, sh2->is_slave); } -void REGPARM(3) p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) +// return nonzero if write potentially causes an interrupt (used by drc) +int REGPARM(3) p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) { const void **sh2_wmap = sh2->write8_tab; sh2_write_handler *wh; - wh = sh2_wmap[SH2MAP_ADDR2OFFS(a)]; - wh(a, d, sh2->is_slave); + wh = sh2_wmap[SH2MAP_ADDR2OFFS_W(a)]; + return wh(a, d, sh2->is_slave); } -void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) +int REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) { const void **sh2_wmap = sh2->write16_tab; sh2_write_handler *wh; - wh = sh2_wmap[SH2MAP_ADDR2OFFS(a)]; - wh(a, d, sh2->is_slave); + wh = sh2_wmap[SH2MAP_ADDR2OFFS_W(a)]; + return wh(a, d, sh2->is_slave); } -void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) +int REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) { const void **sh2_wmap = sh2->write16_tab; sh2_write_handler *handler; u32 offs; - offs = SH2MAP_ADDR2OFFS(a); + offs = SH2MAP_ADDR2OFFS_W(a); - if (offs == 0x1f) { + if (offs == SH2MAP_ADDR2OFFS_W(0xffffc000)) { sh2_peripheral_write32(a, d, sh2->is_slave); - return; + return 0; } handler = sh2_wmap[offs]; handler(a, d >> 16, sh2->is_slave); handler(a + 2, d, sh2->is_slave); + return 0; } // ----------------------------------------------------------------- @@ -1380,7 +1397,7 @@ static void get_bios(void) static sh2_memmap sh2_read8_map[0x20], sh2_read16_map[0x20]; // for writes we are using handlers only -static void *sh2_write8_map[0x20], *sh2_write16_map[0x20]; +static sh2_write_handler *sh2_write8_map[0x80], *sh2_write16_map[0x80]; void Pico32xSwapDRAM(int b) { @@ -1393,8 +1410,8 @@ void Pico32xSwapDRAM(int b) sh2_read8_map[2].addr = sh2_read8_map[6].addr = sh2_read16_map[2].addr = sh2_read16_map[6].addr = MAP_MEMORY(Pico32xMem->dram[b]); - sh2_write8_map[2] = sh2_write8_map[6] = b ? sh2_write8_dram1 : sh2_write8_dram0; - sh2_write16_map[2] = sh2_write16_map[6] = b ? sh2_write16_dram1 : sh2_write16_dram0; + sh2_write8_map[0x04/2] = sh2_write8_map[0x24/2] = b ? sh2_write8_dram1 : sh2_write8_dram0; + sh2_write16_map[0x04/2] = sh2_write16_map[0x24/2] = b ? sh2_write16_dram1 : sh2_write16_dram0; } void PicoMemSetup32x(void) @@ -1448,24 +1465,27 @@ void PicoMemSetup32x(void) // SH2 maps: A31,A30,A29,CS1,CS0 // all unmapped by default - for (i = 0; i < 0x20; i++) { + for (i = 0; i < ARRAY_SIZE(sh2_read8_map); i++) { sh2_read8_map[i].addr = MAP_HANDLER(sh2_read8_unmapped); sh2_read16_map[i].addr = MAP_HANDLER(sh2_read16_unmapped); + } + + for (i = 0; i < ARRAY_SIZE(sh2_write8_map); i++) { sh2_write8_map[i] = sh2_write8_unmapped; sh2_write16_map[i] = sh2_write16_unmapped; } // "purge area" - for (i = 0x08; i <= 0x0b; i++) { - sh2_write8_map[i] = - sh2_write16_map[i] = sh2_write_ignore; + for (i = 0x40; i <= 0x5f; i++) { + sh2_write8_map[i >> 1] = + sh2_write16_map[i >> 1] = sh2_write_ignore; } // CS0 sh2_read8_map[0].addr = sh2_read8_map[4].addr = MAP_HANDLER(sh2_read8_cs0); sh2_read16_map[0].addr = sh2_read16_map[4].addr = MAP_HANDLER(sh2_read16_cs0); - sh2_write8_map[0] = sh2_write8_map[4] = sh2_write8_cs0; - sh2_write16_map[0] = sh2_write16_map[4] = sh2_write16_cs0; + sh2_write8_map[0x00/2] = sh2_write8_map[0x20/2] = sh2_write8_cs0; + sh2_write16_map[0x00/2] = sh2_write16_map[0x20/2] = sh2_write16_cs0; // CS1 - ROM sh2_read8_map[1].addr = sh2_read8_map[5].addr = sh2_read16_map[1].addr = sh2_read16_map[5].addr = MAP_MEMORY(Pico.rom); @@ -1477,20 +1497,20 @@ void PicoMemSetup32x(void) // CS3 - SDRAM sh2_read8_map[3].addr = sh2_read8_map[7].addr = sh2_read16_map[3].addr = sh2_read16_map[7].addr = MAP_MEMORY(Pico32xMem->sdram); - sh2_write8_map[3] = sh2_write8_map[7] = sh2_write8_sdram; - sh2_write16_map[3] = sh2_write16_map[7] = sh2_write16_sdram; + sh2_write8_map[0x06/2] = sh2_write8_map[0x26/2] = sh2_write8_sdram; + sh2_write16_map[0x06/2] = sh2_write16_map[0x26/2] = sh2_write16_sdram; sh2_read8_map[3].mask = sh2_read8_map[7].mask = sh2_read16_map[3].mask = sh2_read16_map[7].mask = 0x03ffff; // SH2 data array sh2_read8_map[0x18].addr = MAP_HANDLER(sh2_read8_da); sh2_read16_map[0x18].addr = MAP_HANDLER(sh2_read16_da); - sh2_write8_map[0x18] = sh2_write8_da; - sh2_write16_map[0x18] = sh2_write16_da; + sh2_write8_map[0xc0/2] = sh2_write8_da; + sh2_write16_map[0xc0/2] = sh2_write16_da; // SH2 IO sh2_read8_map[0x1f].addr = MAP_HANDLER(sh2_peripheral_read8); sh2_read16_map[0x1f].addr = MAP_HANDLER(sh2_peripheral_read16); - sh2_write8_map[0x1f] = sh2_peripheral_write8; - sh2_write16_map[0x1f] = sh2_peripheral_write16; + sh2_write8_map[0xff/2] = sh2_peripheral_write8; + sh2_write16_map[0xff/2] = sh2_peripheral_write16; // map DRAM area, both 68k and SH2 Pico32xSwapDRAM(1); diff --git a/pico/pico_int.h b/pico/pico_int.h index 68a77c0b..2f4d1d38 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -469,6 +469,8 @@ typedef struct #define SH2_DRCBLK_RAM_SHIFT 1 #define SH2_DRCBLK_DA_SHIFT 1 +#define SH2_WRITE_SHIFT 25 + struct Pico32x { unsigned short regs[0x20]; diff --git a/platform/linux/Makefile b/platform/linux/Makefile index 250f824b..47a8dcc4 100644 --- a/platform/linux/Makefile +++ b/platform/linux/Makefile @@ -68,7 +68,7 @@ vpath %.s = ../.. vpath %.S = ../.. vpath %.asm = ../.. -DIRS += platform/linux +DIRS += platform/linux zlib unzip all: mkdirs PicoDrive