From 80599a42dbc06f3e86a09dae9dc98dccbb84b48c Mon Sep 17 00:00:00 2001 From: notaz Date: Mon, 26 Oct 2009 11:16:19 +0000 Subject: [PATCH] 32x: drc: handlers wip git-svn-id: file:///home/notaz/opt/svn/PicoDrive@826 be3aeb3a-fb24-0410-a615-afba39da0efa --- cpu/drc/emit_arm.c | 185 ++++++++++--- cpu/drc/emit_x86.c | 204 ++++++++++++-- cpu/sh2/compiler.c | 499 ++++++++++++++++++++++++++++++++--- cpu/sh2/sh2.h | 18 +- pico/32x/memory.c | 12 +- pico/carthw/svp/compiler.c | 2 +- platform/gp2x/Makefile | 2 +- platform/linux/Makefile | 3 +- platform/linux/host_dasm.c | 2 +- platform/linux/port_config.h | 5 +- 10 files changed, 823 insertions(+), 109 deletions(-) diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index 46b45d4..d208779 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -30,8 +30,15 @@ #define A_COND_NE 0x1 #define A_COND_MI 0x4 #define A_COND_PL 0x5 +#define A_COND_LS 0x9 #define A_COND_LE 0xd +/* unified conditions */ +#define DCOND_EQ A_COND_EQ +#define DCOND_NE A_COND_NE +#define DCOND_MI A_COND_MI +#define DCOND_PL A_COND_PL + /* addressing mode 1 */ #define A_AM1_LSL 0 #define A_AM1_LSR 1 @@ -49,6 +56,7 @@ #define A_OP_RSB 0x3 #define A_OP_ADD 0x4 #define A_OP_TST 0x8 +#define A_OP_TEQ 0x9 #define A_OP_CMP 0xa #define A_OP_ORR 0xc #define A_OP_MOV 0xd @@ -71,37 +79,43 @@ #define EOP_CMP_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_CMP,1,rn, 0,ror2,imm8) #define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8) -#define EOP_MOV_REG(s, rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm) -#define EOP_ORR_REG(s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_ORR,s,rn,rd,shift_imm,shift_op,rm) -#define EOP_ADD_REG(s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_ADD,s,rn,rd,shift_imm,shift_op,rm) -#define EOP_TST_REG( rn, shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_TST,1,rn, 0,shift_imm,shift_op,rm) +#define EOP_MOV_IMM_C(cond,rd, ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_MOV,0, 0,rd,ror2,imm8) +#define EOP_ORR_IMM_C(cond,rd,rn,ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_ORR,0,rn,rd,ror2,imm8) +#define EOP_RSB_IMM_C(cond,rd,rn,ror2,imm8) EOP_C_DOP_IMM(cond,A_OP_RSB,0,rn,rd,ror2,imm8) + +#define EOP_MOV_REG(cond,s,rd, rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm) +#define EOP_ORR_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ORR,s,rn,rd,shift_imm,shift_op,rm) +#define EOP_ADD_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_ADD,s,rn,rd,shift_imm,shift_op,rm) +#define EOP_SUB_REG(cond,s,rd,rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_SUB,s,rn,rd,shift_imm,shift_op,rm) +#define EOP_TST_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_TST,1,rn, 0,shift_imm,shift_op,rm) +#define EOP_TEQ_REG(cond, rn,rm,shift_op,shift_imm) EOP_C_DOP_REG_XIMM(cond,A_OP_TEQ,1,rn, 0,shift_imm,shift_op,rm) -#define EOP_MOV_REG2(s, rd,rs,shift_op,rm) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_MOV,s, 0,rd,rs,shift_op,rm) -#define EOP_ADD_REG2(s,rn,rd,rs,shift_op,rm) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_ADD,s,rn,rd,rs,shift_op,rm) -#define EOP_SUB_REG2(s,rn,rd,rs,shift_op,rm) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_SUB,s,rn,rd,rs,shift_op,rm) +#define EOP_MOV_REG2(s,rd, rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_MOV,s, 0,rd,rs,shift_op,rm) +#define EOP_ADD_REG2(s,rd,rn,rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_ADD,s,rn,rd,rs,shift_op,rm) +#define EOP_SUB_REG2(s,rd,rn,rm,shift_op,rs) EOP_C_DOP_REG_XREG(A_COND_AL,A_OP_SUB,s,rn,rd,rs,shift_op,rm) -#define EOP_MOV_REG_SIMPLE(rd,rm) EOP_MOV_REG(0,rd,0,A_AM1_LSL,rm) -#define EOP_MOV_REG_LSL(rd, rm,shift_imm) EOP_MOV_REG(0,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_MOV_REG_LSR(rd, rm,shift_imm) EOP_MOV_REG(0,rd,shift_imm,A_AM1_LSR,rm) -#define EOP_MOV_REG_ASR(rd, rm,shift_imm) EOP_MOV_REG(0,rd,shift_imm,A_AM1_ASR,rm) -#define EOP_MOV_REG_ROR(rd, rm,shift_imm) EOP_MOV_REG(0,rd,shift_imm,A_AM1_ROR,rm) +#define EOP_MOV_REG_SIMPLE(rd,rm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSL,0) +#define EOP_MOV_REG_LSL(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSL,shift_imm) +#define EOP_MOV_REG_LSR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_LSR,shift_imm) +#define EOP_MOV_REG_ASR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_ASR,shift_imm) +#define EOP_MOV_REG_ROR(rd, rm,shift_imm) EOP_MOV_REG(A_COND_AL,0,rd,rm,A_AM1_ROR,shift_imm) -#define EOP_ORR_REG_SIMPLE(rd,rm) EOP_ORR_REG(0,rd,rd,0,A_AM1_LSL,rm) -#define EOP_ORR_REG_LSL(rd,rn,rm,shift_imm) EOP_ORR_REG(0,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_ORR_REG_LSR(rd,rn,rm,shift_imm) EOP_ORR_REG(0,rn,rd,shift_imm,A_AM1_LSR,rm) -#define EOP_ORR_REG_ASR(rd,rn,rm,shift_imm) EOP_ORR_REG(0,rn,rd,shift_imm,A_AM1_ASR,rm) -#define EOP_ORR_REG_ROR(rd,rn,rm,shift_imm) EOP_ORR_REG(0,rn,rd,shift_imm,A_AM1_ROR,rm) +#define EOP_ORR_REG_SIMPLE(rd,rm) EOP_ORR_REG(A_COND_AL,0,rd,rd,rm,A_AM1_LSL,0) +#define EOP_ORR_REG_LSL(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSL,shift_imm) +#define EOP_ORR_REG_LSR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSR,shift_imm) +#define EOP_ORR_REG_ASR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_ASR,shift_imm) +#define EOP_ORR_REG_ROR(rd,rn,rm,shift_imm) EOP_ORR_REG(A_COND_AL,0,rd,rn,rm,A_AM1_ROR,shift_imm) -#define EOP_ADD_REG_SIMPLE(rd,rm) EOP_ADD_REG(0,rd,rd,0,A_AM1_LSL,rm) -#define EOP_ADD_REG_LSL(rd,rn,rm,shift_imm) EOP_ADD_REG(0,rn,rd,shift_imm,A_AM1_LSL,rm) -#define EOP_ADD_REG_LSR(rd,rn,rm,shift_imm) EOP_ADD_REG(0,rn,rd,shift_imm,A_AM1_LSR,rm) +#define EOP_ADD_REG_SIMPLE(rd,rm) EOP_ADD_REG(A_COND_AL,0,rd,rd,rm,A_AM1_LSL,0) +#define EOP_ADD_REG_LSL(rd,rn,rm,shift_imm) EOP_ADD_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSL,shift_imm) +#define EOP_ADD_REG_LSR(rd,rn,rm,shift_imm) EOP_ADD_REG(A_COND_AL,0,rd,rn,rm,A_AM1_LSR,shift_imm) -#define EOP_TST_REG_SIMPLE(rn,rm) EOP_TST_REG( rn, 0,A_AM1_LSL,rm) +#define EOP_TST_REG_SIMPLE(rn,rm) EOP_TST_REG(A_COND_AL, rn, 0,A_AM1_LSL,rm) -#define EOP_MOV_REG2_LSL(rd, rm,rs) EOP_MOV_REG2(0, rd,rs,A_AM1_LSL,rm) -#define EOP_MOV_REG2_ROR(rd, rm,rs) EOP_MOV_REG2(0, rd,rs,A_AM1_ROR,rm) -#define EOP_ADD_REG2_LSL(rd,rn,rm,rs) EOP_ADD_REG2(0,rn,rd,rs,A_AM1_LSL,rm) -#define EOP_SUB_REG2_LSL(rd,rn,rm,rs) EOP_SUB_REG2(0,rn,rd,rs,A_AM1_LSL,rm) +#define EOP_MOV_REG2_LSL(rd, rm,rs) EOP_MOV_REG2(0,rd, rm,A_AM1_LSL,rs) +#define EOP_MOV_REG2_ROR(rd, rm,rs) EOP_MOV_REG2(0,rd, rm,A_AM1_ROR,rs) +#define EOP_ADD_REG2_LSL(rd,rn,rm,rs) EOP_ADD_REG2(0,rd,rn,rm,A_AM1_LSL,rs) +#define EOP_SUB_REG2_LSL(rd,rn,rm,rs) EOP_SUB_REG2(0,rd,rn,rm,A_AM1_LSL,rs) /* addressing mode 2 */ #define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \ @@ -169,13 +183,15 @@ #define EOP_MSR_REG(rm) EOP_C_MSR_REG(A_COND_AL,rm) -static void emith_op_imm(int cond, int op, int r, unsigned int imm) +static void emith_op_imm(int cond, int s, int op, int r, unsigned int imm) { - int ror2, rn = r; + int ror2, rd = r, rn = r; u32 v; if (op == A_OP_MOV) rn = 0; + else if (op == A_OP_TST || op == A_OP_TEQ) + rd = 0; else if (imm == 0) return; @@ -184,7 +200,7 @@ static void emith_op_imm(int cond, int op, int r, unsigned int imm) for (; v && !(v & 3); v >>= 2) ror2--; - EOP_C_DOP_IMM(cond, op, 0, rn, r, ror2 & 0x0f, v & 0xff); + EOP_C_DOP_IMM(cond, op, s, rn, rd, ror2 & 0x0f, v & 0xff); if (op == A_OP_MOV) { op = A_OP_ORR; @@ -226,6 +242,13 @@ static int emith_xbranch(int cond, void *target, int is_call) } +// fake "simple" or "short" jump - using cond insns instead +#define EMITH_SJMP_START(cond) \ + (void)(cond) + +#define EMITH_SJMP_END(cond) \ + (void)(cond) + #define EMITH_CONDITIONAL(code, is_nonzero) { \ u32 val, cond, *ptr; \ cond = (is_nonzero) ? A_COND_NE : A_COND_EQ; \ @@ -236,17 +259,67 @@ static int emith_xbranch(int cond, void *target, int is_call) EMIT_PTR(ptr, ((cond)<<28) | 0x0a000000 | (val & 0xffffff)); \ } -#define emith_move_r_r(dst, src) \ - EOP_MOV_REG_SIMPLE(dst, src) +#define emith_move_r_r(d, s) \ + EOP_MOV_REG_SIMPLE(d, s) + +#define emith_add_r_r(d, s) \ + EOP_ADD_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) + +#define emith_sub_r_r(d, s) \ + EOP_SUB_REG(A_COND_AL,0,d,d,s,A_AM1_LSL,0) + +#define emith_teq_r_r(d, s) \ + EOP_TEQ_REG(A_COND_AL,d,s,A_AM1_LSL,0) + +#define emith_subf_r_r(d, s) \ + EOP_SUB_REG(A_COND_AL,1,d,d,s,A_AM1_LSL,0) #define emith_move_r_imm(r, imm) \ - emith_op_imm(A_COND_AL, A_OP_MOV, r, imm) + emith_op_imm(A_COND_AL, 0, A_OP_MOV, r, imm) #define emith_add_r_imm(r, imm) \ - emith_op_imm(A_COND_AL, A_OP_ADD, r, imm) + emith_op_imm(A_COND_AL, 0, A_OP_ADD, r, imm) #define emith_sub_r_imm(r, imm) \ - emith_op_imm(A_COND_AL, A_OP_SUB, r, imm) + emith_op_imm(A_COND_AL, 0, A_OP_SUB, r, imm) + +#define emith_bic_r_imm(r, imm) \ + emith_op_imm(A_COND_AL, 0, A_OP_BIC, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_op_imm(A_COND_AL, 0, A_OP_ORR, r, imm) + +// note: use 8bit imm only +#define emith_tst_r_imm(r, imm) \ + emith_op_imm(A_COND_AL, 1, A_OP_TST, r, imm) + +#define emith_subf_r_imm(r, imm) \ + emith_op_imm(A_COND_AL, 1, A_OP_SUB, r, imm) + +#define emith_add_r_imm_c(cond, r, imm) \ + emith_op_imm(cond, 0, A_OP_ADD, r, imm) + +#define emith_sub_r_imm_c(cond, r, imm) \ + emith_op_imm(cond, 0, A_OP_SUB, r, imm) + +#define emith_or_r_imm_c(cond, r, imm) \ + emith_op_imm(cond, 0, A_OP_ORR, r, imm) + +#define emith_lsl(d, s, cnt) \ + EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_LSL,cnt) + +#define emith_lsr(d, s, cnt) \ + EOP_MOV_REG(A_COND_AL,0,d,s,A_AM1_LSR,cnt) + +#define emith_asrf(d, s, cnt) \ + EOP_MOV_REG(A_COND_AL,1,d,s,A_AM1_ASR,cnt) + +#define emith_mul(d, s1, s2) { \ + if ((d) != (s1)) /* rd != rm limitation */ \ + EOP_MUL(d, s1, s2); \ + else \ + EOP_MUL(d, s2, s1); \ +} #define emith_ctx_read(r, offs) \ EOP_LDR_IMM(r, CONTEXT_REG, offs) @@ -254,6 +327,30 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_ctx_write(r, offs) \ EOP_STR_IMM(r, CONTEXT_REG, offs) +#define emith_clear_msb(d, s, count) { \ + u32 t; \ + if ((count) <= 8) { \ + t = (count) - 8; \ + t = (0xff << t) & 0xff; \ + EOP_BIC_IMM(d,s,8/2,t); \ + } else if ((count) >= 24) { \ + t = (count) - 24; \ + t = 0xff >> t; \ + EOP_AND_IMM(d,s,0,t); \ + } else { \ + EOP_MOV_REG_LSL(d,s,count); \ + EOP_MOV_REG_LSR(d,d,count); \ + } \ +} + +#define emith_sext(d, s, bits) { \ + EOP_MOV_REG_LSL(d,s,32 - (bits)); \ + EOP_MOV_REG_ASR(d,d,32 - (bits)); \ +} + +#define host_arg2reg(rd, arg) \ + rd = arg + // upto 4 args #define emith_pass_arg_r(arg, reg) \ EOP_MOV_REG_SIMPLE(arg, reg) @@ -274,9 +371,29 @@ static int emith_xbranch(int cond, void *target, int is_call) emith_jump_cond(A_COND_AL, target) /* SH2 drc specific */ -#define emith_test_t() { \ +#define emith_sh2_test_t() { \ int r = rcache_get_reg(SHR_SR, RC_GR_READ); \ EOP_TST_IMM(r, 0, 1); \ } +#define emith_sh2_dtbf_loop() { \ + int cr, rn; \ + tmp = rcache_get_tmp(); \ + cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \ + rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW); \ + emith_sub_r_imm(rn, 1); /* sub rn, #1 */ \ + emith_bic_r_imm(cr, 1); /* bic cr, #1 */ \ + emith_sub_r_imm(cr, (cycles+1) << 12); /* sub cr, #(cycles+1)<<12 */ \ + cycles = 0; \ + emith_asrf(tmp, cr, 2+12); /* movs tmp, cr, asr #2+12 */ \ + EOP_MOV_IMM_C(A_COND_MI,tmp,0,0); /* movmi tmp, #0 */ \ + emith_lsl(cr, cr, 20); /* mov cr, cr, lsl #20 */ \ + emith_lsr(cr, cr, 20); /* mov cr, cr, lsr #20 */ \ + emith_subf_r_r(rn, tmp); /* subs rn, tmp */ \ + EOP_RSB_IMM_C(A_COND_LS,tmp,rn,0,0); /* rsbls tmp, rn, #0 */ \ + EOP_ORR_REG(A_COND_LS,0,cr,cr,tmp,A_AM1_LSL,12+2); /* orrls cr,tmp,lsl #12+2 */\ + EOP_ORR_IMM_C(A_COND_LS,cr,cr,0,1); /* orrls cr, #1 */ \ + EOP_MOV_IMM_C(A_COND_LS,rn,0,0); /* movls rn, #0 */ \ + rcache_free_tmp(tmp); \ +} diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 71b9a40..e94e685 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -4,6 +4,20 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define CONTEXT_REG xBP +#define IOP_JE 0x74 +#define IOP_JNE 0x75 +#define IOP_JBE 0x76 +#define IOP_JA 0x77 +#define IOP_JS 0x78 +#define IOP_JNS 0x79 +#define IOP_JLE 0x7e + +// unified conditions (we just use rel8 jump instructions for x86) +#define DCOND_EQ IOP_JE +#define DCOND_NE IOP_JNE +#define DCOND_MI IOP_JS // MInus +#define DCOND_PL IOP_JNS // PLus or zero + #define EMIT_PTR(ptr, val, type) \ *(type *)(ptr) = val @@ -25,24 +39,148 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_MODRM(mod, r, rm); \ } +#define JMP8_POS(ptr) \ + ptr = tcache_ptr; \ + tcache_ptr += 2 + +#define JMP8_EMIT(op, ptr) \ + EMIT_PTR(ptr, op, u8); \ + EMIT_PTR(ptr + 1, (tcache_ptr - (ptr+2)), u8) + #define emith_move_r_r(dst, src) \ EMIT_OP_MODRM(0x8b, 3, dst, src) +#define emith_add_r_r(d, s) \ + EMIT_OP_MODRM(0x01, 3, s, d) + +#define emith_sub_r_r(d, s) \ + EMIT_OP_MODRM(0x29, 3, s, d) + +#define emith_or_r_r(d, s) \ + EMIT_OP_MODRM(0x09, 3, s, d) + +#define emith_eor_r_r(d, s) \ + EMIT_OP_MODRM(0x31, 3, s, d) + +// fake teq - test equivalence - get_flags(d ^ s) +#define emith_teq_r_r(d, s) { \ + emith_push(d); \ + emith_eor_r_r(d, s); \ + emith_pop(d); \ +} + +// _r_imm #define emith_move_r_imm(r, imm) { \ EMIT_OP(0xb8 + (r)); \ EMIT(imm, u32); \ } -#define emith_add_r_imm(r, imm) { \ - EMIT_OP_MODRM(0x81, 3, 0, r); \ +#define emith_arith_r_imm(op, r, imm) { \ + EMIT_OP_MODRM(0x81, 3, op, r); \ EMIT(imm, u32); \ } -#define emith_sub_r_imm(r, imm) { \ - EMIT_OP_MODRM(0x81, 3, 5, r); \ +// 2 - adc, 3 - sbb, 6 - xor, 7 - cmp +#define emith_add_r_imm(r, imm) \ + emith_arith_r_imm(0, r, imm) + +#define emith_or_r_imm(r, imm) \ + emith_arith_r_imm(1, r, imm) + +#define emith_and_r_imm(r, imm) \ + emith_arith_r_imm(4, r, imm) + +#define emith_sub_r_imm(r, imm) \ + emith_arith_r_imm(5, r, imm) + +#define emith_tst_r_imm(r, imm) { \ + EMIT_OP_MODRM(0xf7, 3, 0, r); \ EMIT(imm, u32); \ } +// fake +#define emith_bic_r_imm(r, imm) \ + emith_arith_r_imm(4, r, ~(imm)) + +// fake conditionals (using SJMP instead) +#define emith_add_r_imm_c(cond, r, imm) { \ + (void)(cond); \ + emith_arith_r_imm(0, r, imm); \ +} + +#define emith_or_r_imm_c(cond, r, imm) { \ + (void)(cond); \ + emith_arith_r_imm(1, r, imm); \ +} + +#define emith_sub_r_imm_c(cond, r, imm) { \ + (void)(cond); \ + emith_arith_r_imm(5, r, imm); \ +} + +// shift +#define emith_shift(op, d, s, cnt) { \ + if (d != s) \ + emith_move_r_r(d, s); \ + EMIT_OP_MODRM(0xc1, 3, op, d); \ + EMIT(cnt, u8); \ +} + +#define emith_asr(d, s, cnt) \ + emith_shift(7, d, s, cnt) + +#define emith_lsl(d, s, cnt) \ + emith_shift(4, d, s, cnt) + +// misc +#define emith_push(r) \ + EMIT_OP(0x50 + (r)) + +#define emith_pop(r) \ + EMIT_OP(0x58 + (r)) + +#define emith_neg_r(r) \ + EMIT_OP_MODRM(0xf7, 3, 3, r) + +#define emith_clear_msb(d, s, count) { \ + u32 t = (u32)-1; \ + t >>= count; \ + if (d != s) \ + emith_move_r_r(d, s); \ + emith_and_r_imm(d, t); \ +} + +#define emith_sext(d, s, bits) { \ + emith_lsl(d, s, 32 - (bits)); \ + emith_asr(d, d, 32 - (bits)); \ +} + +// XXX: stupid mess +#define emith_mul(d, s1, s2) { \ + int rmr; \ + if (d != xAX) \ + emith_push(xAX); \ + if ((s1) == xAX) \ + rmr = s2; \ + else if ((s2) == xAX) \ + rmr = s1; \ + else { \ + emith_move_r_r(xAX, s1); \ + rmr = s2; \ + } \ + emith_push(xDX); \ + EMIT_OP_MODRM(0xf7, 3, 4, rmr); /* MUL rmr */ \ + emith_pop(xDX); \ + if (d != xAX) { \ + emith_move_r_r(d, xAX); \ + emith_pop(xAX); \ + } \ +} + +// "flag" instructions are the same +#define emith_subf_r_imm emith_sub_r_imm +#define emith_subf_r_r emith_sub_r_r + // XXX: offs is 8bit only #define emith_ctx_read(r, offs) { \ EMIT_OP_MODRM(0x8b, 1, r, xBP); \ @@ -66,15 +204,16 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(disp, u32); \ } -#define EMITH_CONDITIONAL(code, is_nonzero) { \ - u8 *ptr = tcache_ptr; \ - tcache_ptr = tcache_ptr + 2; \ - code; \ - EMIT_PTR(ptr, ((is_nonzero) ? 0x75 : 0x74), u8); \ - EMIT_PTR(ptr + 1, (tcache_ptr - (ptr + 2)), u8); \ +// "simple" or "short" jump +#define EMITH_SJMP_START(cond) { \ + u8 *cond_ptr; \ + JMP8_POS(cond_ptr) + +#define EMITH_SJMP_END(cond) \ + JMP8_EMIT(cond, cond_ptr); \ } -#define arg2reg(rd, arg) \ +#define host_arg2reg(rd, arg) \ switch (arg) { \ case 0: rd = xAX; break; \ case 1: rd = xDX; break; \ @@ -83,25 +222,46 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define emith_pass_arg_r(arg, reg) { \ int rd = 7; \ - arg2reg(rd, arg); \ + host_arg2reg(rd, arg); \ emith_move_r_r(rd, reg); \ } #define emith_pass_arg_imm(arg, imm) { \ int rd = 7; \ - arg2reg(rd, arg); \ + host_arg2reg(rd, arg); \ emith_move_r_imm(rd, imm); \ } /* SH2 drc specific */ -#define emith_test_t() { \ - if (reg_map_g2h[SHR_SR] == -1) { \ - EMIT_OP_MODRM(0xf6, 1, 0, 5); \ - EMIT(SHR_SR * 4, u8); \ - EMIT(0x01, u8); /* test [ebp+SHR_SR], byte 1 */ \ - } else { \ - EMIT_OP_MODRM(0xf7, 3, 0, reg_map_g2h[SHR_SR]); \ - EMIT(0x01, u16); /* test , word 1 */ \ - } \ +#define emith_sh2_test_t() { \ + int t = rcache_get_reg(SHR_SR, RC_GR_READ); \ + EMIT_OP_MODRM(0xf6, 3, 0, t); \ + EMIT(0x01, u8); /* test , byte 1 */ \ +} + +#define emith_sh2_dtbf_loop() { \ + u8 *jmp0; /* negative cycles check */ \ + u8 *jmp1; /* unsinged overflow check */ \ + int cr, rn; \ + tmp = rcache_get_tmp(); \ + cr = rcache_get_reg(SHR_SR, RC_GR_RMW); \ + rn = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW);\ + emith_sub_r_imm(rn, 1); \ + emith_sub_r_imm(cr, (cycles+1) << 12); \ + cycles = 0; \ + emith_asr(tmp, cr, 2+12); \ + JMP8_POS(jmp0); /* no negative cycles */ \ + emith_move_r_imm(tmp, 0); \ + JMP8_EMIT(IOP_JNS, jmp0); \ + emith_and_r_imm(cr, 0xffe); \ + emith_subf_r_r(rn, tmp); \ + JMP8_POS(jmp1); /* no overflow */ \ + emith_neg_r(rn); /* count left */ \ + emith_lsl(rn, rn, 2+12); \ + emith_or_r_r(cr, rn); \ + emith_or_r_imm(cr, 1); \ + emith_move_r_imm(rn, 0); \ + JMP8_EMIT(IOP_JA, jmp1); \ + rcache_free_tmp(tmp); \ } diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 5d68892..43ca7b7 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -74,6 +74,8 @@ typedef struct { u32 val; } temp_reg_t; +// note: reg_temp[] must have at least the amount of +// registers used by handlers in worst case (currently 3?) #ifdef ARM #include "../drc/emit_arm.c" @@ -116,6 +118,12 @@ static temp_reg_t reg_temp[] = { #endif +#define T 0x00000001 +#define S 0x00000002 +#define I 0x000000f0 +#define Q 0x00000100 +#define M 0x00000200 + typedef enum { SHR_R0 = 0, SHR_R15 = 15, SHR_PC, SHR_PPC, SHR_PR, SHR_SR, @@ -224,7 +232,7 @@ static temp_reg_t *rcache_evict(void) } if (oldest == -1) { - printf("no registers to ec=vict, aborting\n"); + printf("no registers to evict, aborting\n"); exit(1); } @@ -243,6 +251,7 @@ typedef enum { RC_GR_RMW, } rc_gr_mode; +// note: must not be called when doing conditional code static int rcache_get_reg(sh2_reg_e r, rc_gr_mode mode) { temp_reg_t *tr; @@ -305,6 +314,78 @@ do_alloc: return tr->reg; } +static int rcache_get_arg_id(int arg) +{ + int i, r = 0; + host_arg2reg(r, arg); + + for (i = 0; i < ARRAY_SIZE(reg_temp); i++) + if (reg_temp[i].reg == r) + break; + + if (i == ARRAY_SIZE(reg_temp)) + // let's just say it's untracked arg reg + return r; + + if (reg_temp[i].type == HR_CACHED_DIRTY) { + // writeback + emith_ctx_write(reg_temp[i].reg, reg_temp[i].val * 4); + } + else if (reg_temp[i].type == HR_TEMP) { + printf("arg %d reg %d already used, aborting\n", arg, r); + exit(1); + } + + return i; +} + +// get a reg to be used as function arg +// it's assumed that regs are cleaned before call +static int rcache_get_tmp_arg(int arg) +{ + int id = rcache_get_arg_id(arg); + reg_temp[id].type = HR_TEMP; + + return reg_temp[id].reg; +} + +// same but caches reg. RC_GR_READ only. +static int rcache_get_reg_arg(int arg, sh2_reg_e r) +{ + int i, srcr, dstr, dstid; + + dstid = rcache_get_arg_id(arg); + dstr = reg_temp[dstid].reg; + + // maybe already statically mapped? + srcr = reg_map_g2h[r]; + if (srcr != -1) + goto do_cache; + + // maybe already cached? + for (i = ARRAY_SIZE(reg_temp) - 1; i >= 0; i--) { + if ((reg_temp[i].type == HR_CACHED || reg_temp[i].type == HR_CACHED_DIRTY) && + reg_temp[i].val == r) + { + srcr = reg_temp[i].reg; + goto do_cache; + } + } + + // must read + srcr = dstr; + emith_ctx_read(srcr, r * 4); + +do_cache: + if (srcr != dstr) + emith_move_r_r(dstr, srcr); + + reg_temp[dstid].stamp = ++rcache_counter; + reg_temp[dstid].type = HR_CACHED; + reg_temp[dstid].val = r; + return dstr; +} + static void rcache_free_tmp(int hr) { int i; @@ -312,23 +393,39 @@ static void rcache_free_tmp(int hr) if (reg_temp[i].reg == hr) break; - if (i == ARRAY_SIZE(reg_temp) || reg_temp[i].type != HR_TEMP) + if (i == ARRAY_SIZE(reg_temp) || reg_temp[i].type != HR_TEMP) { printf("rcache_free_tmp fail: #%i hr %d, type %d\n", i, hr, reg_temp[i].type); + return; + } + + reg_temp[i].type = HR_FREE; } -static void rcache_flush(void) +static void rcache_clean(void) { int i; - for (i = 0; i < ARRAY_SIZE(reg_temp); i++) { + for (i = 0; i < ARRAY_SIZE(reg_temp); i++) if (reg_temp[i].type == HR_CACHED_DIRTY) { // writeback emith_ctx_write(reg_temp[i].reg, reg_temp[i].val * 4); + reg_temp[i].type = HR_CACHED; } +} + +static void rcache_invalidate(void) +{ + int i; + for (i = 0; i < ARRAY_SIZE(reg_temp); i++) reg_temp[i].type = HR_FREE; - } rcache_counter = 0; } +static void rcache_flush(void) +{ + rcache_clean(); + rcache_invalidate(); +} + // --------------------------------------------------------------- static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) @@ -345,20 +442,147 @@ static void emit_move_r_r(sh2_reg_e dst, sh2_reg_e src) emith_move_r_r(hr_d, hr_s); } -/* -static int sh2_translate_op4(int op) +// arguments must be ready +// reg cache must be clean before call +static int emit_memhandler_read(int size) { - switch (op & 0x000f) - { - case 0x0b: - default: - emith_pass_arg(2, sh2, op); - emith_call(sh2_do_op); + int ctxr; + host_arg2reg(ctxr, 1); + emith_move_r_r(ctxr, CONTEXT_REG); + switch (size) { + case 0: // 8 + emith_call(p32x_sh2_read8); + break; + case 1: // 16 + emith_call(p32x_sh2_read16); + break; + case 2: // 32 + emith_call(p32x_sh2_read32); break; } + rcache_invalidate(); + // assuming arg0 and retval reg matches + return rcache_get_tmp_arg(0); +} - return 0; +static void emit_memhandler_write(int size) +{ + int ctxr; + host_arg2reg(ctxr, 2); + emith_move_r_r(ctxr, CONTEXT_REG); + switch (size) { + case 0: // 8 + emith_call(p32x_sh2_write8); + break; + case 1: // 16 + emith_call(p32x_sh2_write16); + break; + case 2: // 32 + emith_call(p32x_sh2_write32); + break; + } + rcache_invalidate(); } + +/* +MOV #imm,Rn 1110nnnniiiiiiii +MOV.W @(disp,PC),Rn 1001nnnndddddddd +MOV.L @(disp,PC),Rn 1101nnnndddddddd +MOV Rm,Rn 0110nnnnmmmm0011 +MOV.B @Rm,Rn 0110nnnnmmmm0000 +MOV.W @Rm,Rn 0110nnnnmmmm0001 +MOV.L @Rm,Rn 0110nnnnmmmm0010 +MOV.B @Rm+,Rn 0110nnnnmmmm0100 +MOV.W @Rm+,Rn 0110nnnnmmmm0101 +MOV.L @Rm+,Rn 0110nnnnmmmm0110 +MOV.B R0,@(disp,Rn) 10000000nnnndddd +MOV.W R0,@(disp,Rn) 10000001nnnndddd +MOV.B @(disp,Rm),R0 10000100mmmmdddd +MOV.W @(disp,Rm),R0 10000101mmmmdddd +MOV.L @(disp,Rm),Rn 0101nnnnmmmmdddd +MOV.B R0,@(disp,GBR) 11000000dddddddd +MOV.W R0,@(disp,GBR) 11000001dddddddd +MOV.L R0,@(disp,GBR) 11000010dddddddd +MOV.B @(disp,GBR),R0 11000100dddddddd +MOV.W @(disp,GBR),R0 11000101dddddddd +MOV.L @(disp,GBR),R0 11000110dddddddd +MOVA @(disp,PC),R0 11000111dddddddd +SWAP.B Rm,Rn 0110nnnnmmmm1000 +SWAP.W Rm,Rn 0110nnnnmmmm1001 +XTRCT Rm,Rn 0010nnnnmmmm1101 +ADD Rm,Rn 0011nnnnmmmm1100 +ADD #imm,Rn 0111nnnniiiiiiii +ADDC Rm,Rn 0011nnnnmmmm1110 +ADDV Rm,Rn 0011nnnnmmmm1111 +CMP/EQ #imm,R0 10001000iiiiiiii +CMP/EQ Rm,Rn 0011nnnnmmmm0000 +CMP/HS Rm,Rn 0011nnnnmmmm0010 +CMP/GE Rm,Rn 0011nnnnmmmm0011 +CMP/HI Rm,Rn 0011nnnnmmmm0110 +CMP/GT Rm,Rn 0011nnnnmmmm0111 +CMP/PZ Rn 0100nnnn00010001 +CMP/PL Rn 0100nnnn00010101 +CMP/ST Rm,Rn 0010nnnnmmmm1100 +DIV1 Rm,Rn 0011nnnnmmmm0100 +DMULS. Rm,Rn 0011nnnnmmmm1101 +DMULU.L Rm,Rn 0011nnnnmmmm0101 +EXTS.B Rm,Rn 0110nnnnmmmm1110 +EXTS.W Rm,Rn 0110nnnnmmmm1111 +EXTU.B Rm,Rn 0110nnnnmmmm1100 +EXTU.W Rm,Rn 0110nnnnmmmm1101 +MAC @Rm+,@Rn+ 0100nnnnmmmm1111 +MULS.W Rm,Rn 0010nnnnmmmm1111 +MULU.W Rm,Rn 0010nnnnmmmm1110 +NEG Rm,Rn 0110nnnnmmmm1011 +NEGC Rm,Rn 0110nnnnmmmm1010 +SUB Rm,Rn 0011nnnnmmmm1000 +SUBC Rm,Rn 0011nnnnmmmm1010 +SUBV Rm,Rn 0011nnnnmmmm1011 +AND Rm,Rn 0010nnnnmmmm1001 +AND #imm,R0 11001001iiiiiiii +AND.B #imm,@(R0,GBR) 11001101iiiiiiii +NOT Rm,Rn 0110nnnnmmmm0111 +OR Rm,Rn 0010nnnnmmmm1011 +OR #imm,R0 11001011iiiiiiii +OR.B #imm,@(R0,GBR) 11001111iiiiiiii +TAS.B @Rn 0100nnnn00011011 +TST Rm,Rn 0010nnnnmmmm1000 +TST #imm,R0 11001000iiiiiiii +TST.B #imm,@(R0,GBR) 11001100iiiiiiii +XOR Rm,Rn 0010nnnnmmmm1010 +XOR #imm,R0 11001010iiiiiiii +XOR.B #imm,@(R0,GBR) 11001110iiiiiiii +ROTL Rn 0100nnnn00000100 +ROTR Rn 0100nnnn00000101 +ROTCL Rn 0100nnnn00100100 +ROTCR Rn 0100nnnn00100101 +SHAL Rn 0100nnnn00100000 +SHAR Rn 0100nnnn00100001 +SHLL Rn 0100nnnn00000000 +SHLR Rn 0100nnnn00000001 +SHLL2 Rn 0100nnnn00001000 +SHLR2 Rn 0100nnnn00001001 +SHLL8 Rn 0100nnnn00011000 +SHLR8 Rn 0100nnnn00011001 +SHLL16 Rn 0100nnnn00101000 +SHLR16 Rn 0100nnnn00101001 +LDC Rm,GBR 0100mmmm00011110 +LDC Rm,VBR 0100mmmm00101110 +LDC.L @Rm+,GBR 0100mmmm00010111 +LDC.L @Rm+,VBR 0100mmmm00100111 +LDS Rm,MACH 0100mmmm00001010 +LDS Rm,MACL 0100mmmm00011010 +LDS Rm,PR 0100mmmm00101010 +LDS.L @Rm+,MACH 0100mmmm00000110 +LDS.L @Rm+,MACL 0100mmmm00010110 +LDS.L @Rm+,PR 0100mmmm00100110 +STC.L SR,@–Rn 0100nnnn00000011 +STC.L GBR,@–Rn 0100nnnn00010011 +STC.L VBR,@–Rn 0100nnnn00100011 +STS.L MACH,@–Rn 0100nnnn00000010 +STS.L MACL,@–Rn 0100nnnn00010010 +STS.L PR,@–Rn 0100nnnn00100010 +TRAPA #imm 11000011iiiiiiii */ #define DELAYED_OP \ @@ -369,6 +593,18 @@ static int sh2_translate_op4(int op) goto default_; \ } +#define GET_Fx() \ + ((op >> 4) & 0x0f) + +#define GET_Rm GET_Fx + +#define GET_Rn() \ + ((op >> 8) & 0x0f) + +#define CHECK_FX_GT_3() \ + if (GET_Fx() > 3) \ + goto default_ + static void *sh2_translate(SH2 *sh2, block_desc *other_block) { void *block_entry; @@ -377,7 +613,7 @@ static void *sh2_translate(SH2 *sh2, block_desc *other_block) int op, delayed_op = 0, test_irq = 0; int tcache_id = 0, blkid = 0; int cycles = 0; - u32 tmp, tmp2; + u32 tmp, tmp2, tmp3; // validate PC tmp = sh2->pc >> 29; @@ -438,7 +674,27 @@ static void *sh2_translate(SH2 *sh2, block_desc *other_block) switch ((op >> 12) & 0x0f) { case 0x00: - switch (op & 0x0f) { + switch (op & 0x0f) + { + case 0x02: + tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + switch (GET_Fx()) + { + case 0: // STC SR,Rn 0000nnnn00000010 + tmp2 = SHR_SR; + break; + case 1: // STC GBR,Rn 0000nnnn00010010 + tmp2 = SHR_GBR; + break; + case 2: // STC VBR,Rn 0000nnnn00100010 + tmp2 = SHR_VBR; + break; + default: + goto default_; + } + tmp2 = rcache_get_reg(tmp2, RC_GR_READ); + emith_move_r_r(tmp, tmp2); + goto end_op; case 0x03: CHECK_UNHANDLED_BITS(0xd0); // BRAF Rm 0000mmmm00100011 @@ -447,24 +703,106 @@ static void *sh2_translate(SH2 *sh2, block_desc *other_block) if (!(op & 0x20)) emit_move_r_imm32(SHR_PR, pc + 2); tmp = rcache_get_reg(SHR_PPC, RC_GR_WRITE); - tmp2 = rcache_get_reg((op >> 8) & 0x0f, RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); emith_move_r_r(tmp, tmp2); emith_add_r_imm(tmp, pc + 2); cycles++; goto end_op; + case 0x04: // MOV.B Rm,@(R0,Rn) 0000nnnnmmmm0100 + case 0x05: // MOV.W Rm,@(R0,Rn) 0000nnnnmmmm0101 + case 0x06: // MOV.L Rm,@(R0,Rn) 0000nnnnmmmm0110 + rcache_clean(); + tmp = rcache_get_reg_arg(0, SHR_R0); + tmp2 = rcache_get_reg_arg(1, GET_Rm()); + tmp3 = rcache_get_reg(GET_Rn(), RC_GR_READ); + emith_add_r_r(tmp, tmp3); + emit_memhandler_write(op & 3); + goto end_op; + case 0x07: + // MUL.L Rm,Rn 0000nnnnmmmm0111 + tmp = rcache_get_reg(GET_Rn(), RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); + tmp3 = rcache_get_reg(SHR_MACL, RC_GR_WRITE); + emith_mul(tmp3, tmp2, tmp); + cycles++; + goto end_op; + case 0x08: + CHECK_UNHANDLED_BITS(0xf00); + switch (GET_Fx()) + { + case 0: // CLRT 0000000000001000 + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_bic_r_imm(tmp, T); + break; + case 1: // SETT 0000000000011000 + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_or_r_imm(tmp, T); + break; + case 2: // CLRMAC 0000000000101000 + tmp = rcache_get_reg(SHR_MACL, RC_GR_WRITE); + emith_move_r_imm(tmp, 0); + tmp = rcache_get_reg(SHR_MACH, RC_GR_WRITE); + emith_move_r_imm(tmp, 0); + break; + default: + goto default_; + } + goto end_op; case 0x09: - CHECK_UNHANDLED_BITS(0xf0); - // NOP 0000000000001001 + switch (GET_Fx()) + { + case 0: // NOP 0000000000001001 + CHECK_UNHANDLED_BITS(0xf00); + break; + case 1: // DIV0U 0000000000011001 + CHECK_UNHANDLED_BITS(0xf00); + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_bic_r_imm(tmp, M|Q|T); + break; + case 2: // MOVT Rn 0000nnnn00101001 + tmp = rcache_get_reg(SHR_SR, RC_GR_READ); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + emith_clear_msb(tmp2, tmp, 31); + break; + default: + goto default_; + } + goto end_op; + case 0x0a: + tmp = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + switch (GET_Fx()) + { + case 0: // STS MACH,Rn 0000nnnn00001010 + tmp2 = rcache_get_reg(SHR_MACH, RC_GR_READ); + break; + case 1: // STS MACL,Rn 0000nnnn00011010 + tmp2 = rcache_get_reg(SHR_MACL, RC_GR_READ); + break; + case 2: // STS PR,Rn 0000nnnn00101010 + tmp2 = rcache_get_reg(SHR_PR, RC_GR_READ); + break; + default: + goto default_; + } + emith_move_r_r(tmp, tmp2); goto end_op; case 0x0b: - CHECK_UNHANDLED_BITS(0xd0); - DELAYED_OP; - if (!(op & 0x20)) { - // RTS 0000000000001011 + CHECK_UNHANDLED_BITS(0xf00); + switch (GET_Fx()) + { + case 0: // RTS 0000000000001011 + DELAYED_OP; emit_move_r_r(SHR_PPC, SHR_PR); cycles++; - } else { - // RTE 0000000000101011 + break; + case 1: // SLEEP 0000000000011011 + emit_move_r_imm32(SHR_PC, pc - 2); + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_clear_msb(tmp, tmp, 20); // clear cycles + test_irq = 1; + cycles = 1; + break; + case 2: // RTE 0000000000101011 //emit_move_r_r(SHR_PC, SHR_PR); emit_move_r_imm32(SHR_PC, pc - 2); rcache_flush(); @@ -474,18 +812,101 @@ static void *sh2_translate(SH2 *sh2, block_desc *other_block) emit_move_r_r(SHR_PPC, SHR_PC); test_irq = 1; cycles += 3; + break; + default: + goto default_; } goto end_op; + case 0x0c: // MOV.B @(R0,Rm),Rn 0000nnnnmmmm1100 + case 0x0d: // MOV.W @(R0,Rm),Rn 0000nnnnmmmm1101 + case 0x0e: // MOV.L @(R0,Rm),Rn 0000nnnnmmmm1110 + rcache_clean(); + tmp = rcache_get_reg_arg(0, SHR_R0); + tmp2 = rcache_get_reg(GET_Rm(), RC_GR_READ); + emith_add_r_r(tmp, tmp2); + tmp = emit_memhandler_read(op & 3); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_WRITE); + rcache_free_tmp(tmp); + if ((op & 3) != 2) { + emith_sext(tmp2, tmp, (op & 1) ? 16 : 8); + } else + emith_move_r_r(tmp2, tmp); + goto end_op; + case 0x0f: // MAC.L @Rm+,@Rn+ 0000nnnnmmmm1111 + // TODO + break; + } + goto default_; + + case 0x01: + // MOV.L Rm,@(disp,Rn) 0001nnnnmmmmdddd + rcache_clean(); + tmp = rcache_get_reg_arg(0, GET_Rn()); + tmp2 = rcache_get_reg_arg(1, GET_Rm()); + emith_add_r_imm(tmp, (op & 0x0f) * 4); + emit_memhandler_write(2); + goto end_op; + + case 0x02: + switch (op & 0x0f) + { + case 0x00: // MOV.B Rm,@Rn 0010nnnnmmmm0000 + case 0x01: // MOV.W Rm,@Rn 0010nnnnmmmm0001 + case 0x02: // MOV.L Rm,@Rn 0010nnnnmmmm0010 + rcache_clean(); + rcache_get_reg_arg(0, GET_Rn()); + rcache_get_reg_arg(1, GET_Rm()); + emit_memhandler_write(op & 3); + goto end_op; + case 0x04: // MOV.B Rm,@–Rn 0010nnnnmmmm0100 + case 0x05: // MOV.W Rm,@–Rn 0010nnnnmmmm0101 + case 0x06: // MOV.L Rm,@–Rn 0010nnnnmmmm0110 + tmp = rcache_get_reg(GET_Rn(), RC_GR_RMW); + emith_sub_r_imm(tmp, (1 << (op & 3))); + rcache_clean(); + rcache_get_reg_arg(0, GET_Rn()); + rcache_get_reg_arg(1, GET_Rm()); + emit_memhandler_write(op & 3); + goto end_op; + case 0x07: // DIV0S Rm,Rn 0010nnnnmmmm0111 + tmp = rcache_get_reg(SHR_SR, RC_GR_RMW); + tmp2 = rcache_get_reg(GET_Rn(), RC_GR_READ); + tmp3 = rcache_get_reg(GET_Rm(), RC_GR_READ); + emith_bic_r_imm(tmp, M|Q|T); + emith_tst_r_imm(tmp2, (1<<31)); + EMITH_SJMP_START(DCOND_EQ); + emith_or_r_imm_c(DCOND_NE, tmp, Q); + EMITH_SJMP_END(DCOND_EQ); + emith_tst_r_imm(tmp3, (1<<31)); + EMITH_SJMP_START(DCOND_EQ); + emith_or_r_imm_c(DCOND_NE, tmp, M); + EMITH_SJMP_END(DCOND_EQ); + emith_teq_r_r(tmp2, tmp3); + EMITH_SJMP_START(DCOND_PL); + emith_or_r_imm_c(DCOND_MI, tmp, T); + EMITH_SJMP_END(DCOND_PL); + goto end_op; } goto default_; case 0x04: switch (op & 0x0f) { case 0x00: - if ((op & 0xf0) != 1) + if ((op & 0xf0) != 0x10) goto default_; // DT Rn 0100nnnn00010000 - goto default_; + if (p32x_sh2_read16(pc, sh2) == 0x8bfd) { // BF #-2 + emith_sh2_dtbf_loop(); + goto end_op; + } + tmp = rcache_get_reg((op >> 8) & 0x0f, RC_GR_RMW); + tmp2 = rcache_get_reg(SHR_SR, RC_GR_RMW); + emith_bic_r_imm(tmp2, T); + emith_subf_r_imm(tmp, 1); + EMITH_SJMP_START(DCOND_NE); + emith_or_r_imm_c(DCOND_EQ, tmp2, T); + EMITH_SJMP_END(DCOND_NE); + goto end_op; case 0x07: if ((op & 0xf0) != 0) goto default_; @@ -524,17 +945,27 @@ static void *sh2_translate(SH2 *sh2, block_desc *other_block) // BT label 10001001dddddddd case 0x0900: // BF label 10001011dddddddd - case 0x0b00: - tmp = ((signed int)(op << 24) >> 23); - tmp2 = delayed_op ? SHR_PPC : SHR_PC; - emit_move_r_imm32(tmp2, pc + (delayed_op ? 2 : 0)); - emith_test_t(); - EMITH_CONDITIONAL(emit_move_r_imm32(tmp2, pc + tmp + 2), (op & 0x0200) ? 1 : 0); + case 0x0b00: { + // jmp_cond ~ cond when guest doesn't jump + int jmp_cond = (op & 0x0200) ? DCOND_NE : DCOND_EQ; + int insn_cond = (op & 0x0200) ? DCOND_EQ : DCOND_NE; + signed int offs = ((signed int)(op << 24) >> 23); + tmp = rcache_get_reg(delayed_op ? SHR_PPC : SHR_PC, RC_GR_WRITE); + emith_move_r_imm(tmp, pc + (delayed_op ? 2 : 0)); + emith_sh2_test_t(); + EMITH_SJMP_START(jmp_cond); + if (!delayed_op) + offs += 2; + if (offs < 0) { + emith_sub_r_imm_c(insn_cond, tmp, -offs); + } else + emith_add_r_imm_c(insn_cond, tmp, offs); + EMITH_SJMP_END(jmp_cond); cycles += 2; if (!delayed_op) goto end_block; goto end_op; - } + }} goto default_; case 0x0a: diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index d722dea..264bb4e 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -50,11 +50,17 @@ void sh2_execute(SH2 *sh2, int cycles); // pico memhandlers // XXX: move somewhere else -unsigned int p32x_sh2_read8(unsigned int a, SH2 *sh2); -unsigned int p32x_sh2_read16(unsigned int a, SH2 *sh2); -unsigned int p32x_sh2_read32(unsigned int a, SH2 *sh2); -void p32x_sh2_write8(unsigned int a, unsigned int d, SH2 *sh2); -void p32x_sh2_write16(unsigned int a, unsigned int d, SH2 *sh2); -void p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); +#if !defined(REGPARM) && defined(__i386__) +#define REGPARM(x) __attribute__((regparm(x))) +#else +#define REGPARM(x) +#endif + +unsigned int REGPARM(2) p32x_sh2_read8(unsigned int a, SH2 *sh2); +unsigned int REGPARM(2) p32x_sh2_read16(unsigned int a, SH2 *sh2); +unsigned int REGPARM(2) p32x_sh2_read32(unsigned int a, SH2 *sh2); +void REGPARM(3) p32x_sh2_write8(unsigned int a, unsigned int d, SH2 *sh2); +void REGPARM(3) p32x_sh2_write16(unsigned int a, unsigned int d, SH2 *sh2); +void REGPARM(3) p32x_sh2_write32(unsigned int a, unsigned int d, SH2 *sh2); #endif /* __SH2_H__ */ diff --git a/pico/32x/memory.c b/pico/32x/memory.c index e50ad36..4417a48 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -1167,7 +1167,7 @@ typedef void (sh2_write_handler)(u32 a, u32 d, int id); #define SH2MAP_ADDR2OFFS(a) \ (((a >> 25) & 3) | ((a >> 27) & 0x1c)) -u32 p32x_sh2_read8(u32 a, SH2 *sh2) +u32 REGPARM(2) p32x_sh2_read8(u32 a, SH2 *sh2) { const sh2_memmap *sh2_map = sh2->read8_map; uptr p; @@ -1180,7 +1180,7 @@ u32 p32x_sh2_read8(u32 a, SH2 *sh2) return *(u8 *)((p << 1) + ((a & sh2_map->mask) ^ 1)); } -u32 p32x_sh2_read16(u32 a, SH2 *sh2) +u32 REGPARM(2) p32x_sh2_read16(u32 a, SH2 *sh2) { const sh2_memmap *sh2_map = sh2->read16_map; uptr p; @@ -1193,7 +1193,7 @@ u32 p32x_sh2_read16(u32 a, SH2 *sh2) return *(u16 *)((p << 1) + ((a & sh2_map->mask) & ~1)); } -u32 p32x_sh2_read32(u32 a, SH2 *sh2) +u32 REGPARM(2) p32x_sh2_read32(u32 a, SH2 *sh2) { const sh2_memmap *sh2_map = sh2->read16_map; sh2_read_handler *handler; @@ -1216,7 +1216,7 @@ u32 p32x_sh2_read32(u32 a, SH2 *sh2) return (handler(a, sh2->is_slave) << 16) | handler(a + 2, sh2->is_slave); } -void p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) +void REGPARM(3) p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) { const void **sh2_wmap = sh2->write8_tab; sh2_write_handler *wh; @@ -1225,7 +1225,7 @@ void p32x_sh2_write8(u32 a, u32 d, SH2 *sh2) wh(a, d, sh2->is_slave); } -void p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) +void REGPARM(3) p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) { const void **sh2_wmap = sh2->write16_tab; sh2_write_handler *wh; @@ -1234,7 +1234,7 @@ void p32x_sh2_write16(u32 a, u32 d, SH2 *sh2) wh(a, d, sh2->is_slave); } -void p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) +void REGPARM(3) p32x_sh2_write32(u32 a, u32 d, SH2 *sh2) { const void **sh2_wmap = sh2->write16_tab; sh2_write_handler *handler; diff --git a/pico/carthw/svp/compiler.c b/pico/carthw/svp/compiler.c index c26e8e3..60834a2 100644 --- a/pico/carthw/svp/compiler.c +++ b/pico/carthw/svp/compiler.c @@ -356,7 +356,7 @@ static void tr_mov16(int r, int val) static void tr_mov16_cond(int cond, int r, int val) { - emith_op_imm(cond, A_OP_MOV, r, val); + emith_op_imm(cond, 0, A_OP_MOV, r, val); hostreg_r[r] = -1; } diff --git a/platform/gp2x/Makefile b/platform/gp2x/Makefile index 01e4e3b..934be24 100644 --- a/platform/gp2x/Makefile +++ b/platform/gp2x/Makefile @@ -13,7 +13,7 @@ amalgamate = 0 #profile = 1 #use_musashi = 1 use_sh2drc = 1 -drc_debug = 1 +#drc_debug = 3 -include Makefile.local diff --git a/platform/linux/Makefile b/platform/linux/Makefile index 7ecf4e3..8d37fd4 100644 --- a/platform/linux/Makefile +++ b/platform/linux/Makefile @@ -3,7 +3,7 @@ use_musashi = 1 #use_fame = 1 #use_mz80 = 1 use_sh2drc = 1 -drc_debug = 1 +#drc_debug = 3 #profile = 1 #fake_in_gp2x = 1 @@ -140,6 +140,7 @@ mkdirs: include ../common/revision.mak pico/carthw/svp/compiler.o : ../../cpu/drc/emit_arm.c +cpu/sh2/compiler.o : ../../cpu/drc/emit_x86.c pico/pico.o pico/cd/pico.o : ../../pico/pico_cmn.c ../../pico/pico_int.h pico/memory.o pico/cd/memory.o : ../../pico/pico_int.h ../../pico/memory.h diff --git a/platform/linux/host_dasm.c b/platform/linux/host_dasm.c index 91ea4da..3cc9c7b 100644 --- a/platform/linux/host_dasm.c +++ b/platform/linux/host_dasm.c @@ -170,7 +170,7 @@ void host_dasm(void *addr, int len) vma_end = vma + len; while (vma < vma_end) { - printf(" %p ", (void *)(long)vma); + printf(" %p ", (void *)(long)vma); vma += print_insn_func(vma, &di); printf("\n"); } diff --git a/platform/linux/port_config.h b/platform/linux/port_config.h index d2f993a..f576048 100644 --- a/platform/linux/port_config.h +++ b/platform/linux/port_config.h @@ -26,9 +26,8 @@ #define SIMPLE_WRITE_SOUND 0 #define mix_32_to_16l_stereo_lvl mix_32_to_16l_stereo -#define EL_LOGMASK (EL_ANOMALY|EL_STATUS|EL_UIO|EL_IDLE|EL_32X) -// EL_VDPDMA|EL_ASVDP|EL_SR) // |EL_BUSREQ|EL_Z80BNK) -//#define EL_LOGMASK (EL_ANOMALY|EL_STATUS) +#define EL_LOGMASK (EL_STATUS|EL_ANOMALY|EL_UIO|EL_IDLE) +// EL_VDPDMA|EL_ASVDP|EL_SR | EL_BUSREQ|EL_Z80BNK | EL_32X) //#define dprintf(f,...) printf("%05i:%03i: " f "\n",Pico.m.frame_count,Pico.m.scanline,##__VA_ARGS__) #define dprintf(x...) -- 2.39.2