#ifndef ARM
#define DUMP_BLOCK 0x0c9a
unsigned int tcache[512*1024];
-void regfile_load(void){}
-void regfile_store(void){}
+void ssp_drc_next(void){}
+void ssp_drc_next_patch(void){}
+void ssp_drc_end(void){}
#endif
#include "gen_arm.c"
hostreg_r[r] = -1;
}
-/* trashes r0 */
+/* trashes r1 */
static void tr_flush_dirty_pmcrs(void)
{
u32 i, val = (u32)-1;
tr_flush_dirty_ST();
//tr_flush_dirty_pmcrs();
tr_mov16(0, reg);
- emit_call(ssp_pm_read);
+ emit_call(A_COND_AL, ssp_pm_read);
hostreg_clear();
}
static void tr_r0_to_PC(int const_val)
{
+/*
+ * do nothing - dispatcher will take care of this
EOP_MOV_REG_LSL(1, 0, 16); // mov r1, r0, lsl #16
EOP_STR_IMM(1,7,0x400+6*4); // str r1, [r7, #(0x400+6*8)]
hostreg_r[1] = -1;
+*/
}
static void tr_r0_to_AL(int const_val)
tr_flush_dirty_ST();
//tr_flush_dirty_pmcrs();
tr_mov16(1, reg);
- emit_call(ssp_pm_write);
+ emit_call(A_COND_AL, ssp_pm_write);
hostreg_clear();
}
// -----------------------------------------------------
-static int translate_op(unsigned int op, int *pc, int imm)
+static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *jump_pc)
{
u32 tmpv, tmpv2, tmpv3;
int ret = 0;
}
tr_read_funcs[tmpv](op);
tr_write_funcs[tmpv2]((known_regb & (1 << tmpv)) ? known_regs.gr[tmpv].h : -1);
- if (tmpv2 == SSP_PC) ret |= 0x10000;
+ if (tmpv2 == SSP_PC) {
+ ret |= 0x10000;
+ *end_cond = -A_COND_AL;
+ }
ret++; break;
// ld d, (ri)
tr_rX_read(r, mod);
else tr_ptrr_mod(r, mod, 1, 1);
tr_write_funcs[tmpv](-1);
- if (tmpv == SSP_PC) ret |= 0x10000;
+ if (tmpv == SSP_PC) {
+ ret |= 0x10000;
+ *end_cond = -A_COND_AL;
+ }
ret++; break;
}
if (ret > 0) break;
tr_mov16(0, imm);
tr_write_funcs[tmpv](imm);
- if (tmpv == SSP_PC) ret |= 0x10000;
+ if (tmpv == SSP_PC) {
+ ret |= 0x10000;
+ *jump_pc = imm;
+ }
ret += 2; break;
// ld d, ((ri))
tmpv2 = (op >> 4) & 0xf; // dst
tr_rX_read2(op);
tr_write_funcs[tmpv2](-1);
- if (tmpv2 == SSP_PC) ret |= 0x10000;
+ if (tmpv2 == SSP_PC) {
+ ret |= 0x10000;
+ *end_cond = -A_COND_AL;
+ }
ret += 3; break;
// ldi (ri), imm
tcache_ptr = real_ptr;
}
tr_mov16_cond(tmpv, 0, imm);
- if (tmpv != A_COND_AL) {
+ if (tmpv != A_COND_AL)
tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
- }
tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
ret |= 0x10000;
+ *end_cond = tmpv;
+ *jump_pc = imm;
ret += 2; break;
}
EOP_LDRH_SIMPLE(0,0); // ldrh r0, [r0]
hostreg_r[0] = hostreg_r[1] = -1;
tr_write_funcs[tmpv2](-1);
- if (tmpv2 == SSP_PC) ret |= 0x10000;
+ if (tmpv2 == SSP_PC) {
+ ret |= 0x10000;
+ *end_cond = -A_COND_AL;
+ }
ret += 3; break;
// bra cond, addr
case 0x26:
tmpv = tr_cond_check(op);
tr_mov16_cond(tmpv, 0, imm);
- if (tmpv != A_COND_AL) {
+ if (tmpv != A_COND_AL)
tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
- }
tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
ret |= 0x10000;
+ *end_cond = tmpv;
+ *jump_pc = imm;
ret += 2; break;
// mod cond, op
return ret;
}
+static void emit_block_prologue(void)
+{
+ // check if there are enough cycles..
+ // note: r0 must contain PC of current block
+ EOP_CMP_IMM(11,0,0); // cmp r11, #0
+ emit_call(A_COND_LE, ssp_drc_end);
+}
+
+/* cond:
+ * >0: direct (un)conditional jump
+ * <0: indirect jump
+ */
+static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
+{
+ if (cycles > 0xff) { printf("large cycle count: %i\n", cycles); cycles = 0xff; }
+ EOP_SUB_IMM(11,11,0,cycles); // sub r11, r11, #cycles
+
+ if (cond < 0 || (end_pc >= 0x400 && pc < 0x400)) {
+ // indirect jump, or rom -> iram jump, must use dispatcher
+ emit_jump(A_COND_AL, ssp_drc_next);
+ }
+ else if (cond == A_COND_AL) {
+ u32 *target = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
+ if (target != NULL)
+ emit_jump(A_COND_AL, target);
+ else {
+ emit_jump(A_COND_AL, ssp_drc_next);
+ // cause the next block to be emitted over jump instrction
+ tcache_ptr--;
+ }
+ }
+ else {
+ u32 *target1 = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
+ u32 *target2 = (end_pc < 0x400) ? block_table_iram[ssp->drc.iram_context][end_pc] : block_table[end_pc];
+ if (target1 != NULL)
+ emit_jump(cond, target1);
+ else emit_call(cond, ssp_drc_next_patch);
+ if (target2 != NULL)
+ emit_jump(tr_neg_cond(cond), target2); // neg_cond, to be able to swap jumps if needed
+ else emit_call(tr_neg_cond(cond), ssp_drc_next_patch);
+ }
+}
+
void *ssp_translate_block(int pc)
{
unsigned int op, op1, imm, ccount = 0;
unsigned int *block_start;
- int ret, ret_prev = -1, tpc;
+ int ret, end_cond = A_COND_AL, jump_pc = -1;
printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<2);
block_start = tcache_ptr;
if ((op1 & 0xf) == 4 || (op1 & 0xf) == 6)
imm = PROGRAM(pc++); // immediate
- tpc = pc;
- ret = translate_op(op, &pc, imm);
+ ret = translate_op(op, &pc, imm, &end_cond, &jump_pc);
if (ret <= 0)
{
printf("NULL func! op=%08x (%02x)\n", op, op1);
exit(1);
}
- else
- {
- ccount += ret & 0xffff;
- if (ret & 0x10000) break;
- }
- ret_prev = ret;
+ ccount += ret & 0xffff;
+ if (ret & 0x10000) break;
}
- if (ccount >= 100)
- emit_pc_dump(pc);
+ if (ccount >= 100) {
+ end_cond = A_COND_AL;
+ jump_pc = pc;
+ emit_mov_const(A_COND_AL, 0, pc);
+ }
tr_flush_dirty_prs();
tr_flush_dirty_ST();
tr_flush_dirty_pmcrs();
- emit_block_epilogue(ccount + 1);
- *tcache_ptr++ = 0xffffffff; // end of block
+ emit_block_epilogue(ccount, end_cond, jump_pc, pc);
if (tcache_ptr - tcache > TCACHE_SIZE/4) {
printf("tcache overflow!\n");
memset(block_table, 0, sizeof(block_table));
memset(block_table_iram, 0, sizeof(block_table_iram));
tcache_ptr = tcache;
- *tcache_ptr++ = 0xffffffff;
PicoLoadStateHook = ssp1601_state_load;
void ssp1601_dyn_reset(ssp1601_t *ssp)
{
+ // debug
+ {
+ int i, u;
+ FILE *f = fopen("tcache.bin", "wb");
+ fwrite(tcache, 1, (tcache_ptr - tcache)*4, f);
+ fclose(f);
+
+ for (i = 0; i < 0x5090/2; i++)
+ if (block_table[i])
+ printf("%06x -> __:%04x\n", (block_table[i] - tcache)*4, i<<1);
+ for (u = 1; u < 15; u++)
+ for (i = 0; i < 0x800/2; i++)
+ if (block_table_iram[u][i])
+ printf("%06x -> %02i:%04x\n", (block_table_iram[u][i] - tcache)*4, u, i<<1);
+ }
+
ssp1601_reset(ssp);
ssp->drc.iram_dirty = 1;
ssp->drc.iram_context = 0;
ssp->drc.ptr_dram = (u32) svp->dram;
ssp->drc.ptr_btable = (u32) block_table;
ssp->drc.ptr_btable_iram = (u32) block_table_iram;
+
+ // prevent new versions of IRAM from appearing
+ memset(svp->iram_rom, 0, 0x800);
}
void ssp1601_dyn_run(int cycles)
extern unsigned int tcache[];
-void ssp_regfile_load(void);
-void ssp_regfile_store(void);
int ssp_drc_entry(int cycles);
void ssp_drc_next(void);
+void ssp_drc_next_patch(void);
+void ssp_drc_end(void);
void ssp_hle_800(void);
#define A_COND_NE 0x1
#define A_COND_MI 0x4
#define A_COND_PL 0x5
+#define A_COND_LE 0xd
/* addressing mode 1 */
#define A_AM1_LSL 0
#define EOP_AND_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_AND,0,rn,rd,ror2,imm8)
#define EOP_SUB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_SUB,0,rn,rd,ror2,imm8)
#define EOP_TST_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_TST,1,rn, 0,ror2,imm8)
+#define EOP_CMP_IMM( rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_CMP,1,rn, 0,ror2,imm8)
#define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8)
#define EOP_MOV_REG(s, rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm)
EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 0, val&0xff);
}
-/*
-static void check_offset_12(unsigned int val)
-{
- if (!(val & ~0xfff)) return;
- printf("offset_12 overflow %04x\n", val);
- exit(1);
-}
-*/
-
static void check_offset_24(int val)
{
if (val >= (int)0xff000000 && val <= 0x00ffffff) return;
exit(1);
}
-static void emit_call(void *target)
+static void emit_call(int cond, void *target)
{
int val = (unsigned int *)target - tcache_ptr - 2;
check_offset_24(val);
- EOP_BL(val & 0xffffff); // bl target
-}
-
-static void emit_block_prologue(void)
-{
- // nothing
+ EOP_C_B(cond,1,val & 0xffffff); // bl target
}
-static void emit_block_epilogue(int cycles)
+static void emit_jump(int cond, void *target)
{
- if (cycles > 0xff) { printf("large cycle count: %i\n", cycles); cycles = 0xff; }
- EOP_SUB_IMM(11,11,0,cycles); // sub r11, r11, #cycles
-#ifdef ARM
- emit_call(ssp_drc_next);
-#endif
-}
+ int val = (unsigned int *)target - tcache_ptr - 2;
+ check_offset_24(val);
-static void emit_pc_dump(int pc)
-{
- emit_mov_const(A_COND_AL, 3, pc<<16);
- EOP_STR_IMM(3,7,0x400+6*4); // str r3, [r7, #(0x400+6*8)]
+ EOP_C_B(cond,0,val & 0xffffff); // b target
}
-static void handle_caches()
+static void handle_caches(void)
{
#ifdef ARM
extern void flush_inval_caches(const void *start_addr, const void *end_addr);
unsigned int ptr_btable; // 49c
unsigned int ptr_btable_iram; // 4a0
unsigned int tmp0; // 4a4
+ unsigned int tmp1; // 4a8
+ unsigned int tmp2; // 4ac
} drc;
} ssp1601_t;
.global tcache
.global flush_inval_caches
-.global ssp_regfile_load
-.global ssp_regfile_store
.global ssp_drc_entry
.global ssp_drc_next
+.global ssp_drc_next_patch
+.global ssp_drc_end
.global ssp_hle_800
@ translation cache buffer
@ r10: P
@ r11: cycles
-@ trashes r2,r3
+#define SSP_OFFS_GR 0x400
+#define SSP_PC 6
+#define SSP_P 7
+#define SSP_PM0 8
+#define SSP_OFFS_EMUSTAT 0x484 // emu_status
+#define SSP_OFFS_IRAM_DIRTY 0x494
+#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
+#define SSP_OFFS_BLTAB 0x49c // block_table
+#define SSP_OFFS_BLTAB_IRAM 0x4a0
+#define SSP_OFFS_TMP0 0x4a4 // for entry PC
+#define SSP_OFFS_TMP1 0x4a8
+#define SSP_OFFS_TMP2 0x4ac
+#define SSP_WAIT_PM0 0x2000
+
+
+.macro ssp_drc_do_next patch_jump=0
+.if \patch_jump
+ str lr, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
+.endif
+ mov r0, r0, lsl #16
+ mov r0, r0, lsr #16
+ str r0, [r7, #SSP_OFFS_TMP0]
+ cmp r0, #0x400
+ blt 0f @ ssp_de_iram
+
+ ldr r2, [r7, #SSP_OFFS_BLTAB]
+ ldr r2, [r2, r0, lsl #2]
+ tst r2, r2
+.if \patch_jump
+ bne ssp_drc_do_patch
+.else
+ bxne r2
+.endif
+ bl ssp_translate_block
+ mov r2, r0
+ ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
+ ldr r1, [r7, #SSP_OFFS_BLTAB]
+ str r2, [r1, r0, lsl #2]
+.if \patch_jump
+ b ssp_drc_do_patch
+.else
+ bx r2
+.endif
+
+0: @ ssp_de_iram:
+ ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
+ tst r1, r1
+ ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
+ beq 1f @ ssp_de_iram_ctx
+
+ bl ssp_get_iram_context
+ mov r1, #0
+ str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
+ mov r1, r0
+ str r1, [r7, #SSP_OFFS_IRAM_CTX]
+ ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
+
+1: @ ssp_de_iram_ctx:
+ ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
+ add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
+ add r1, r2, r0, lsl #2
+ ldr r2, [r1]
+ tst r2, r2
+.if \patch_jump
+ bne ssp_drc_do_patch
+.else
+ bxne r2
+.endif
+ str r1, [r7, #SSP_OFFS_TMP1]
+ bl ssp_translate_block
+ mov r2, r0
+ ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
+ ldr r1, [r7, #SSP_OFFS_TMP1] @ &block_table_iram[iram_context][rPC]
+ str r2, [r1]
+.if \patch_jump
+ b ssp_drc_do_patch
+.else
+ bx r2
+.endif
+.endm @ ssp_drc_do_next
+
+
+ssp_drc_entry:
+ stmfd sp!, {r4-r11, lr}
+ mov r11, r0
ssp_regfile_load:
ldr r7, =ssp
ldr r7, [r7]
ldr r8, [r7, #0x440] @ r0-r2
ldr r9, [r7, #0x444] @ r4-r6
- ldr r10,[r7, #(0x400+7*4)] @ P
- bx lr
+ ldr r10,[r7, #(0x400+SSP_P*4)] @ P
+
+ ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
+ mov r0, r0, lsr #16
+
+
+ssp_drc_next:
+ ssp_drc_do_next 0
+
+
+ssp_drc_next_patch:
+ ssp_drc_do_next 1
+
+ssp_drc_do_patch:
+ ldr r1, [r7, #SSP_OFFS_TMP2] @ jump instr. (actually call) address + 4
+ subs r12,r2, r1
+ moveq r3, #0xe1000000
+ orreq r3, r3, #0x00a00000 @ nop
+ streq r3, [r1, #-4]
+ beq ssp_drc_dp_end
+
+ cmp r12,#4
+ ldreq r3, [r1]
+ addeq r3, r3, #1
+ streq r3, [r1, #-4] @ move the other cond up
+ moveq r3, #0xe1000000
+ orreq r3, r3, #0x00a00000
+ streq r3, [r1] @ fill it's place with nop
+ beq ssp_drc_dp_end
+
+ ldr r3, [r1, #-4]
+ sub r12,r12,#4
+ mov r3, r3, lsr #24
+ bic r3, r3, #1 @ L bit
+ orr r3, r3, r12,lsl #6
+ mov r3, r3, ror #8 @ patched branch instruction
+ str r3, [r1, #-4]
+
+ssp_drc_dp_end:
+ str r2, [r7, #SSP_OFFS_TMP1]
+ sub r0, r1, #4
+ add r1, r1, #4
+ bl flush_inval_caches
+ ldr r2, [r7, #SSP_OFFS_TMP1]
+ ldr r0, [r7, #SSP_OFFS_TMP0]
+ bx r2
+ssp_drc_end:
+ mov r0, r0, lsl #16
+ str r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
+
ssp_regfile_store:
- str r10,[r7, #(0x400+7*4)] @ P
+ str r10,[r7, #(0x400+SSP_P*4)] @ P
str r8, [r7, #0x440] @ r0-r2
str r9, [r7, #0x444] @ r4-r6
add r8, r7, #0x400
add r8, r8, #4
stmia r8, {r2,r3,r5,r6,r9}
- bx lr
-
-
-#define SSP_OFFS_GR 0x400
-#define SSP_PM0 8
-#define SSP_PC 6
-#define SSP_OFFS_EMUSTAT 0x484 // emu_status
-#define SSP_OFFS_IRAM_DIRTY 0x494
-#define SSP_OFFS_IRAM_CTX 0x498 // iram_context
-#define SSP_OFFS_BLTAB 0x49c // block_table
-#define SSP_OFFS_BLTAB_IRAM 0x4a0
-#define SSP_OFFS_TMP0 0x4a4
-#define SSP_WAIT_PM0 0x2000
-
-
-ssp_drc_entry:
- stmfd sp!, {r4-r11, lr}
- mov r11, r0
- bl ssp_regfile_load
-
-ssp_drc_next:
- cmp r11, #0
- bmi ssp_drc_end
-
- ldr r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
- mov r0, r0, lsr #16
- str r0, [r7, #SSP_OFFS_TMP0]
- cmp r0, #0x400
- blt ssp_de_iram
- ldr r1, [r7, #SSP_OFFS_BLTAB]
- ldr r1, [r1, r0, lsl #2]
- tst r1, r1
- bxne r1
- bl ssp_translate_block
- ldr r2, [r7, #SSP_OFFS_TMP0] @ entry PC
- ldr r1, [r7, #SSP_OFFS_BLTAB]
- str r0, [r1, r2, lsl #2]
- bx r0
-
-ssp_de_iram:
- ldr r1, [r7, #SSP_OFFS_IRAM_DIRTY]
- tst r1, r1
- ldreq r1, [r7, #SSP_OFFS_IRAM_CTX]
- beq ssp_de_iram_ctx
-
- bl ssp_get_iram_context
- mov r1, #0
- str r1, [r7, #SSP_OFFS_IRAM_DIRTY]
- mov r1, r0
- str r1, [r7, #SSP_OFFS_IRAM_CTX]
- ldr r0, [r7, #SSP_OFFS_TMP0] @ entry PC
-
-ssp_de_iram_ctx:
- ldr r2, [r7, #SSP_OFFS_BLTAB_IRAM]
- add r2, r2, r1, lsl #12 @ block_tab_iram + iram_context * 0x800/2*4
- add r2, r2, r0, lsl #2
- ldr r1, [r2]
- tst r1, r1
- bxne r1
- str r2, [r7, #SSP_OFFS_TMP0]
- bl ssp_translate_block
- ldr r2, [r7, #SSP_OFFS_TMP0] @ &block_table_iram[iram_context][rPC]
- str r0, [r2]
- bx r0
-
-ssp_drc_end:
- bl ssp_regfile_store
mov r0, r11
ldmfd sp!, {r4-r11, lr}
bx lr
@ andi 2
@ bra z=1, gloc_0800
ssp_hle_800:
- @ block prologue
-@ stmfd sp!, {r4-r11, lr}
-@ bl regfile_load
-@ mov r11, #0
-
ldr r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
ldr r1, [r7, #SSP_OFFS_EMUSTAT]
tst r0, #0x20000
orreq r1, r1, #SSP_WAIT_PM0
- addeq r11,r11, #1024
+ subeq r11,r11, #1024
streq r1, [r7, #SSP_OFFS_EMUSTAT]
- movne r0, #0x04000000
- orrne r0, r0, #0x00040000
- strne r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
-
- bl ssp_drc_next
-@ bl regfile_store
-@ add r0, r11, #3
-@ ldmfd sp!, {r4-r11, lr}
-@ bx lr
+ mov r0, #0x400
+ beq ssp_drc_end
+ orrne r0, r0, #0x004
+
+ b ssp_drc_next