svp compiler: jump fixup
authornotaz <notasas@gmail.com>
Sat, 15 Mar 2008 15:01:42 +0000 (15:01 +0000)
committernotaz <notasas@gmail.com>
Sat, 15 Mar 2008 15:01:42 +0000 (15:01 +0000)
git-svn-id: file:///home/notaz/opt/svn/PicoDrive@385 be3aeb3a-fb24-0410-a615-afba39da0efa

Pico/carthw/svp/compiler.c
Pico/carthw/svp/compiler.h
Pico/carthw/svp/gen_arm.c
Pico/carthw/svp/ssp16.h
Pico/carthw/svp/stub_arm.S

index f89ad95..445f7cc 100644 (file)
@@ -22,8 +22,9 @@ extern ssp1601_t *ssp;
 #ifndef ARM
 #define DUMP_BLOCK 0x0c9a
 unsigned int tcache[512*1024];
-void regfile_load(void){}
-void regfile_store(void){}
+void ssp_drc_next(void){}
+void ssp_drc_next_patch(void){}
+void ssp_drc_end(void){}
 #endif
 
 #include "gen_arm.c"
@@ -343,7 +344,7 @@ static void tr_mov16_cond(int cond, int r, int val)
        hostreg_r[r] = -1;
 }
 
-/* trashes r0 */
+/* trashes r1 */
 static void tr_flush_dirty_pmcrs(void)
 {
        u32 i, val = (u32)-1;
@@ -756,7 +757,7 @@ static void tr_PMX_to_r0(int reg)
        tr_flush_dirty_ST();
        //tr_flush_dirty_pmcrs();
        tr_mov16(0, reg);
-       emit_call(ssp_pm_read);
+       emit_call(A_COND_AL, ssp_pm_read);
        hostreg_clear();
 }
 
@@ -906,9 +907,12 @@ static void tr_r0_to_STACK(int const_val)
 
 static void tr_r0_to_PC(int const_val)
 {
+/*
+ * do nothing - dispatcher will take care of this
        EOP_MOV_REG_LSL(1, 0, 16);              // mov  r1, r0, lsl #16
        EOP_STR_IMM(1,7,0x400+6*4);             // str  r1, [r7, #(0x400+6*8)]
        hostreg_r[1] = -1;
+*/
 }
 
 static void tr_r0_to_AL(int const_val)
@@ -990,7 +994,7 @@ static void tr_r0_to_PMX(int reg)
        tr_flush_dirty_ST();
        //tr_flush_dirty_pmcrs();
        tr_mov16(1, reg);
-       emit_call(ssp_pm_write);
+       emit_call(A_COND_AL, ssp_pm_write);
        hostreg_clear();
 }
 
@@ -1166,7 +1170,7 @@ static int tr_detect_rotate(unsigned int op, int *pc, int imm)
 
 // -----------------------------------------------------
 
-static int translate_op(unsigned int op, int *pc, int imm)
+static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *jump_pc)
 {
        u32 tmpv, tmpv2, tmpv3;
        int ret = 0;
@@ -1188,7 +1192,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        }
                        tr_read_funcs[tmpv](op);
                        tr_write_funcs[tmpv2]((known_regb & (1 << tmpv)) ? known_regs.gr[tmpv].h : -1);
-                       if (tmpv2 == SSP_PC) ret |= 0x10000;
+                       if (tmpv2 == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret++; break;
 
                // ld d, (ri)
@@ -1202,7 +1209,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                             tr_rX_read(r, mod);
                        else tr_ptrr_mod(r, mod, 1, 1);
                        tr_write_funcs[tmpv](-1);
-                       if (tmpv == SSP_PC) ret |= 0x10000;
+                       if (tmpv == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret++; break;
                }
 
@@ -1228,7 +1238,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        if (ret > 0) break;
                        tr_mov16(0, imm);
                        tr_write_funcs[tmpv](imm);
-                       if (tmpv == SSP_PC) ret |= 0x10000;
+                       if (tmpv == SSP_PC) {
+                               ret |= 0x10000;
+                               *jump_pc = imm;
+                       }
                        ret += 2; break;
 
                // ld d, ((ri))
@@ -1236,7 +1249,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        tmpv2 = (op >> 4) & 0xf;  // dst
                        tr_rX_read2(op);
                        tr_write_funcs[tmpv2](-1);
-                       if (tmpv2 == SSP_PC) ret |= 0x10000;
+                       if (tmpv2 == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret += 3; break;
 
                // ldi (ri), imm
@@ -1321,11 +1337,12 @@ static int translate_op(unsigned int op, int *pc, int imm)
                                tcache_ptr = real_ptr;
                        }
                        tr_mov16_cond(tmpv, 0, imm);
-                       if (tmpv != A_COND_AL) {
+                       if (tmpv != A_COND_AL)
                                tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
-                       }
                        tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
                        ret |= 0x10000;
+                       *end_cond = tmpv;
+                       *jump_pc = imm;
                        ret += 2; break;
                }
 
@@ -1338,18 +1355,22 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        EOP_LDRH_SIMPLE(0,0);                                   // ldrh r0, [r0]
                        hostreg_r[0] = hostreg_r[1] = -1;
                        tr_write_funcs[tmpv2](-1);
-                       if (tmpv2 == SSP_PC) ret |= 0x10000;
+                       if (tmpv2 == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret += 3; break;
 
                // bra cond, addr
                case 0x26:
                        tmpv = tr_cond_check(op);
                        tr_mov16_cond(tmpv, 0, imm);
-                       if (tmpv != A_COND_AL) {
+                       if (tmpv != A_COND_AL)
                                tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
-                       }
                        tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
                        ret |= 0x10000;
+                       *end_cond = tmpv;
+                       *jump_pc = imm;
                        ret += 2; break;
 
                // mod cond, op
@@ -1551,11 +1572,54 @@ static int translate_op(unsigned int op, int *pc, int imm)
        return ret;
 }
 
+static void emit_block_prologue(void)
+{
+       // check if there are enough cycles..
+       // note: r0 must contain PC of current block
+       EOP_CMP_IMM(11,0,0);                    // cmp r11, #0
+       emit_call(A_COND_LE, ssp_drc_end);
+}
+
+/* cond:
+ * >0: direct (un)conditional jump
+ * <0: indirect jump
+ */
+static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
+{
+       if (cycles > 0xff) { printf("large cycle count: %i\n", cycles); cycles = 0xff; }
+       EOP_SUB_IMM(11,11,0,cycles);            // sub r11, r11, #cycles
+
+       if (cond < 0 || (end_pc >= 0x400 && pc < 0x400)) {
+               // indirect jump, or rom -> iram jump, must use dispatcher
+               emit_jump(A_COND_AL, ssp_drc_next);
+       }
+       else if (cond == A_COND_AL) {
+               u32 *target = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
+               if (target != NULL)
+                       emit_jump(A_COND_AL, target);
+               else {
+                       emit_jump(A_COND_AL, ssp_drc_next);
+                       // cause the next block to be emitted over jump instrction
+                       tcache_ptr--;
+               }
+       }
+       else {
+               u32 *target1 = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
+               u32 *target2 = (end_pc < 0x400) ? block_table_iram[ssp->drc.iram_context][end_pc] : block_table[end_pc];
+               if (target1 != NULL)
+                    emit_jump(cond, target1);
+               else emit_call(cond, ssp_drc_next_patch);
+               if (target2 != NULL)
+                    emit_jump(tr_neg_cond(cond), target2); // neg_cond, to be able to swap jumps if needed
+               else emit_call(tr_neg_cond(cond), ssp_drc_next_patch);
+       }
+}
+
 void *ssp_translate_block(int pc)
 {
        unsigned int op, op1, imm, ccount = 0;
        unsigned int *block_start;
-       int ret, ret_prev = -1, tpc;
+       int ret, end_cond = A_COND_AL, jump_pc = -1;
 
        printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<2);
        block_start = tcache_ptr;
@@ -1574,31 +1638,28 @@ void *ssp_translate_block(int pc)
 
                if ((op1 & 0xf) == 4 || (op1 & 0xf) == 6)
                        imm = PROGRAM(pc++); // immediate
-               tpc = pc;
 
-               ret = translate_op(op, &pc, imm);
+               ret = translate_op(op, &pc, imm, &end_cond, &jump_pc);
                if (ret <= 0)
                {
                        printf("NULL func! op=%08x (%02x)\n", op, op1);
                        exit(1);
                }
-               else
-               {
-                       ccount += ret & 0xffff;
-                       if (ret & 0x10000) break;
-               }
 
-               ret_prev = ret;
+               ccount += ret & 0xffff;
+               if (ret & 0x10000) break;
        }
 
-       if (ccount >= 100)
-               emit_pc_dump(pc);
+       if (ccount >= 100) {
+               end_cond = A_COND_AL;
+               jump_pc = pc;
+               emit_mov_const(A_COND_AL, 0, pc);
+       }
 
        tr_flush_dirty_prs();
        tr_flush_dirty_ST();
        tr_flush_dirty_pmcrs();
-       emit_block_epilogue(ccount + 1);
-       *tcache_ptr++ = 0xffffffff; // end of block
+       emit_block_epilogue(ccount, end_cond, jump_pc, pc);
 
        if (tcache_ptr - tcache > TCACHE_SIZE/4) {
                printf("tcache overflow!\n");
@@ -1641,7 +1702,6 @@ int ssp1601_dyn_startup(void)
        memset(block_table, 0, sizeof(block_table));
        memset(block_table_iram, 0, sizeof(block_table_iram));
        tcache_ptr = tcache;
-       *tcache_ptr++ = 0xffffffff;
 
        PicoLoadStateHook = ssp1601_state_load;
 
@@ -1657,6 +1717,22 @@ int ssp1601_dyn_startup(void)
 
 void ssp1601_dyn_reset(ssp1601_t *ssp)
 {
+       // debug
+       {
+               int i, u;
+               FILE *f = fopen("tcache.bin", "wb");
+               fwrite(tcache, 1, (tcache_ptr - tcache)*4, f);
+               fclose(f);
+
+               for (i = 0; i < 0x5090/2; i++)
+                       if (block_table[i])
+                               printf("%06x -> __:%04x\n", (block_table[i] - tcache)*4, i<<1);
+               for (u = 1; u < 15; u++)
+                       for (i = 0; i < 0x800/2; i++)
+                               if (block_table_iram[u][i])
+                                       printf("%06x -> %02i:%04x\n", (block_table_iram[u][i] - tcache)*4, u, i<<1);
+       }
+
        ssp1601_reset(ssp);
        ssp->drc.iram_dirty = 1;
        ssp->drc.iram_context = 0;
@@ -1666,6 +1742,9 @@ void ssp1601_dyn_reset(ssp1601_t *ssp)
        ssp->drc.ptr_dram = (u32) svp->dram;
        ssp->drc.ptr_btable = (u32) block_table;
        ssp->drc.ptr_btable_iram = (u32) block_table_iram;
+
+       // prevent new versions of IRAM from appearing
+       memset(svp->iram_rom, 0, 0x800);
 }
 
 void ssp1601_dyn_run(int cycles)
index 32db84b..8b39b29 100644 (file)
@@ -2,10 +2,10 @@
 
 extern unsigned int tcache[];
 
-void ssp_regfile_load(void);
-void ssp_regfile_store(void);
 int  ssp_drc_entry(int cycles);
 void ssp_drc_next(void);
+void ssp_drc_next_patch(void);
+void ssp_drc_end(void);
 
 void ssp_hle_800(void);
 
index 909433a..00d5d2c 100644 (file)
@@ -15,6 +15,7 @@
 #define A_COND_NE 0x1
 #define A_COND_MI 0x4
 #define A_COND_PL 0x5
+#define A_COND_LE 0xd
 
 /* addressing mode 1 */
 #define A_AM1_LSL 0
@@ -52,6 +53,7 @@
 #define EOP_AND_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_AND,0,rn,rd,ror2,imm8)
 #define EOP_SUB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_SUB,0,rn,rd,ror2,imm8)
 #define EOP_TST_IMM(   rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_TST,1,rn, 0,ror2,imm8)
+#define EOP_CMP_IMM(   rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_CMP,1,rn, 0,ror2,imm8)
 #define EOP_RSB_IMM(rd,rn,ror2,imm8) EOP_C_DOP_IMM(A_COND_AL,A_OP_RSB,0,rn,rd,ror2,imm8)
 
 #define EOP_MOV_REG(s,   rd,shift_imm,shift_op,rm) EOP_C_DOP_REG_XIMM(A_COND_AL,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm)
@@ -171,15 +173,6 @@ static void emit_mov_const(int cond, int d, unsigned int val)
                EOP_C_DOP_IMM(cond, need_or ? A_OP_ORR : A_OP_MOV, 0, need_or ? d : 0, d, 0, val&0xff);
 }
 
-/*
-static void check_offset_12(unsigned int val)
-{
-       if (!(val & ~0xfff)) return;
-       printf("offset_12 overflow %04x\n", val);
-       exit(1);
-}
-*/
-
 static void check_offset_24(int val)
 {
        if (val >= (int)0xff000000 && val <= 0x00ffffff) return;
@@ -187,35 +180,23 @@ static void check_offset_24(int val)
        exit(1);
 }
 
-static void emit_call(void *target)
+static void emit_call(int cond, void *target)
 {
        int val = (unsigned int *)target - tcache_ptr - 2;
        check_offset_24(val);
 
-       EOP_BL(val & 0xffffff);                 // bl target
-}
-
-static void emit_block_prologue(void)
-{
-       // nothing
+       EOP_C_B(cond,1,val & 0xffffff);                 // bl target
 }
 
-static void emit_block_epilogue(int cycles)
+static void emit_jump(int cond, void *target)
 {
-       if (cycles > 0xff) { printf("large cycle count: %i\n", cycles); cycles = 0xff; }
-       EOP_SUB_IMM(11,11,0,cycles);            // sub r11, r11, #cycles
-#ifdef ARM
-       emit_call(ssp_drc_next);
-#endif
-}
+       int val = (unsigned int *)target - tcache_ptr - 2;
+       check_offset_24(val);
 
-static void emit_pc_dump(int pc)
-{
-       emit_mov_const(A_COND_AL, 3, pc<<16);
-       EOP_STR_IMM(3,7,0x400+6*4);             // str r3, [r7, #(0x400+6*8)]
+       EOP_C_B(cond,0,val & 0xffffff);                 // b target
 }
 
-static void handle_caches()
+static void handle_caches(void)
 {
 #ifdef ARM
        extern void flush_inval_caches(const void *start_addr, const void *end_addr);
index fddb5ed..9f395e3 100644 (file)
@@ -61,6 +61,8 @@ typedef struct
                unsigned int ptr_btable;        // 49c
                unsigned int ptr_btable_iram;   // 4a0
                unsigned int tmp0;              // 4a4
+               unsigned int tmp1;              // 4a8
+               unsigned int tmp2;              // 4ac
        } drc;
 } ssp1601_t;
 
index d215388..58195a7 100644 (file)
@@ -7,10 +7,10 @@
 .global tcache
 
 .global flush_inval_caches
-.global ssp_regfile_load
-.global ssp_regfile_store
 .global ssp_drc_entry
 .global ssp_drc_next
+.global ssp_drc_next_patch
+.global ssp_drc_end
 .global ssp_hle_800
 
 @ translation cache buffer
@@ -46,8 +46,92 @@ flush_inval_caches:
 @ r10: P
 @ r11: cycles
 
-@ trashes r2,r3
 
+#define SSP_OFFS_GR         0x400
+#define SSP_PC                  6
+#define SSP_P                   7
+#define SSP_PM0                 8
+#define SSP_OFFS_EMUSTAT    0x484 // emu_status
+#define SSP_OFFS_IRAM_DIRTY 0x494
+#define SSP_OFFS_IRAM_CTX   0x498 // iram_context
+#define SSP_OFFS_BLTAB      0x49c // block_table
+#define SSP_OFFS_BLTAB_IRAM 0x4a0
+#define SSP_OFFS_TMP0       0x4a4 // for entry PC
+#define SSP_OFFS_TMP1       0x4a8
+#define SSP_OFFS_TMP2       0x4ac
+#define SSP_WAIT_PM0       0x2000
+
+
+.macro ssp_drc_do_next patch_jump=0
+.if \patch_jump
+    str     lr, [r7, #SSP_OFFS_TMP2]           @ jump instr. (actually call) address + 4
+.endif
+    mov     r0, r0, lsl #16
+    mov     r0, r0, lsr #16
+    str     r0, [r7, #SSP_OFFS_TMP0]
+    cmp     r0, #0x400
+    blt     0f @ ssp_de_iram
+
+    ldr     r2, [r7, #SSP_OFFS_BLTAB]
+    ldr     r2, [r2, r0, lsl #2]
+    tst     r2, r2
+.if \patch_jump
+    bne     ssp_drc_do_patch
+.else
+    bxne    r2
+.endif
+    bl      ssp_translate_block
+    mov     r2, r0
+    ldr     r0, [r7, #SSP_OFFS_TMP0]           @ entry PC
+    ldr     r1, [r7, #SSP_OFFS_BLTAB]
+    str     r2, [r1, r0, lsl #2]
+.if \patch_jump
+    b       ssp_drc_do_patch
+.else
+    bx      r2
+.endif
+
+0: @ ssp_de_iram:
+    ldr     r1, [r7, #SSP_OFFS_IRAM_DIRTY]
+    tst     r1, r1
+    ldreq   r1, [r7, #SSP_OFFS_IRAM_CTX]
+    beq     1f @ ssp_de_iram_ctx
+
+    bl      ssp_get_iram_context
+    mov     r1, #0
+    str     r1, [r7, #SSP_OFFS_IRAM_DIRTY]
+    mov     r1, r0
+    str     r1, [r7, #SSP_OFFS_IRAM_CTX]
+    ldr     r0, [r7, #SSP_OFFS_TMP0]           @ entry PC
+    
+1: @ ssp_de_iram_ctx:
+    ldr     r2, [r7, #SSP_OFFS_BLTAB_IRAM]
+    add     r2, r2, r1, lsl #12                        @ block_tab_iram + iram_context * 0x800/2*4
+    add     r1, r2, r0, lsl #2
+    ldr     r2, [r1]
+    tst     r2, r2
+.if \patch_jump
+    bne     ssp_drc_do_patch
+.else
+    bxne    r2
+.endif
+    str     r1, [r7, #SSP_OFFS_TMP1]
+    bl      ssp_translate_block
+    mov     r2, r0
+    ldr     r0, [r7, #SSP_OFFS_TMP0]           @ entry PC
+    ldr     r1, [r7, #SSP_OFFS_TMP1]           @ &block_table_iram[iram_context][rPC]
+    str     r2, [r1]
+.if \patch_jump
+    b       ssp_drc_do_patch
+.else
+    bx      r2
+.endif
+.endm @ ssp_drc_do_next
+
+
+ssp_drc_entry:
+    stmfd   sp!, {r4-r11, lr}
+    mov     r11, r0
 ssp_regfile_load:
     ldr     r7, =ssp
     ldr     r7, [r7]
@@ -69,12 +153,60 @@ ssp_regfile_load:
 
     ldr     r8, [r7, #0x440]            @ r0-r2
     ldr     r9, [r7, #0x444]            @ r4-r6
-    ldr     r10,[r7, #(0x400+7*4)]      @ P
-    bx      lr
+    ldr     r10,[r7, #(0x400+SSP_P*4)]  @ P
+
+    ldr     r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
+    mov     r0, r0, lsr #16
+
+
+ssp_drc_next:
+    ssp_drc_do_next 0
+
+
+ssp_drc_next_patch:
+    ssp_drc_do_next 1
+
+ssp_drc_do_patch:
+    ldr     r1, [r7, #SSP_OFFS_TMP2]   @ jump instr. (actually call) address + 4
+    subs    r12,r2, r1
+    moveq   r3,     #0xe1000000
+    orreq   r3, r3, #0x00a00000                @ nop
+    streq   r3, [r1, #-4]
+    beq     ssp_drc_dp_end
+
+    cmp     r12,#4
+    ldreq   r3, [r1]
+    addeq   r3, r3, #1
+    streq   r3, [r1, #-4]               @ move the other cond up
+    moveq   r3,     #0xe1000000
+    orreq   r3, r3, #0x00a00000
+    streq   r3, [r1]                    @ fill it's place with nop
+    beq     ssp_drc_dp_end
+
+    ldr     r3, [r1, #-4]
+    sub     r12,r12,#4
+    mov     r3, r3, lsr #24
+    bic     r3, r3, #1                 @ L bit
+    orr     r3, r3, r12,lsl #6
+    mov     r3, r3, ror #8              @ patched branch instruction
+    str     r3, [r1, #-4]
+
+ssp_drc_dp_end:
+    str     r2, [r7, #SSP_OFFS_TMP1]
+    sub     r0, r1, #4
+    add     r1, r1, #4
+    bl      flush_inval_caches
+    ldr     r2, [r7, #SSP_OFFS_TMP1]
+    ldr     r0, [r7, #SSP_OFFS_TMP0]
+    bx      r2
 
 
+ssp_drc_end:
+    mov     r0, r0, lsl #16
+    str     r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
+
 ssp_regfile_store:
-    str     r10,[r7, #(0x400+7*4)]      @ P
+    str     r10,[r7, #(0x400+SSP_P*4)]  @ P
     str     r8, [r7, #0x440]            @ r0-r2
     str     r9, [r7, #0x444]            @ r4-r6
 
@@ -93,74 +225,7 @@ ssp_regfile_store:
     add     r8, r7, #0x400
     add     r8, r8, #4
     stmia   r8, {r2,r3,r5,r6,r9}
-    bx      lr
-
-
-#define SSP_OFFS_GR         0x400
-#define SSP_PM0                 8
-#define SSP_PC                  6
-#define SSP_OFFS_EMUSTAT    0x484 // emu_status
-#define SSP_OFFS_IRAM_DIRTY 0x494
-#define SSP_OFFS_IRAM_CTX   0x498 // iram_context
-#define SSP_OFFS_BLTAB      0x49c // block_table
-#define SSP_OFFS_BLTAB_IRAM 0x4a0
-#define SSP_OFFS_TMP0       0x4a4
-#define SSP_WAIT_PM0       0x2000
-
-
-ssp_drc_entry:
-    stmfd   sp!, {r4-r11, lr}
-    mov     r11, r0
-    bl      ssp_regfile_load
-
-ssp_drc_next:
-    cmp     r11, #0
-    bmi     ssp_drc_end
-
-    ldr     r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
-    mov     r0, r0, lsr #16
-    str     r0, [r7, #SSP_OFFS_TMP0]
-    cmp     r0, #0x400
-    blt     ssp_de_iram
 
-    ldr     r1, [r7, #SSP_OFFS_BLTAB]
-    ldr     r1, [r1, r0, lsl #2]
-    tst     r1, r1
-    bxne    r1
-    bl      ssp_translate_block
-    ldr     r2, [r7, #SSP_OFFS_TMP0]           @ entry PC
-    ldr     r1, [r7, #SSP_OFFS_BLTAB]
-    str     r0, [r1, r2, lsl #2]
-    bx      r0
-
-ssp_de_iram:
-    ldr     r1, [r7, #SSP_OFFS_IRAM_DIRTY]
-    tst     r1, r1
-    ldreq   r1, [r7, #SSP_OFFS_IRAM_CTX]
-    beq     ssp_de_iram_ctx
-
-    bl      ssp_get_iram_context
-    mov     r1, #0
-    str     r1, [r7, #SSP_OFFS_IRAM_DIRTY]
-    mov     r1, r0
-    str     r1, [r7, #SSP_OFFS_IRAM_CTX]
-    ldr     r0, [r7, #SSP_OFFS_TMP0]           @ entry PC
-    
-ssp_de_iram_ctx:
-    ldr     r2, [r7, #SSP_OFFS_BLTAB_IRAM]
-    add     r2, r2, r1, lsl #12                        @ block_tab_iram + iram_context * 0x800/2*4
-    add     r2, r2, r0, lsl #2
-    ldr     r1, [r2]
-    tst     r1, r1
-    bxne    r1
-    str     r2, [r7, #SSP_OFFS_TMP0]
-    bl      ssp_translate_block
-    ldr     r2, [r7, #SSP_OFFS_TMP0]           @ &block_table_iram[iram_context][rPC]
-    str     r0, [r2]
-    bx      r0
-
-ssp_drc_end:
-    bl      ssp_regfile_store
     mov     r0, r11
     ldmfd   sp!, {r4-r11, lr}
     bx      lr
@@ -171,25 +236,16 @@ ssp_drc_end:
 @ andi    2
 @ bra     z=1, gloc_0800
 ssp_hle_800:
-    @ block prologue
-@    stmfd   sp!, {r4-r11, lr}
-@    bl      regfile_load
-@    mov     r11, #0
-    
     ldr     r0, [r7, #(SSP_OFFS_GR+SSP_PM0*4)]
     ldr     r1, [r7, #SSP_OFFS_EMUSTAT]
     tst     r0, #0x20000
     orreq   r1, r1, #SSP_WAIT_PM0
-    addeq   r11,r11, #1024
+    subeq   r11,r11, #1024
     streq   r1, [r7, #SSP_OFFS_EMUSTAT]
-    movne   r0,     #0x04000000
-    orrne   r0, r0, #0x00040000
-    strne   r0, [r7, #(SSP_OFFS_GR+SSP_PC*4)]
-
-    bl      ssp_drc_next
-@    bl      regfile_store
-@    add     r0, r11, #3
-@    ldmfd   sp!, {r4-r11, lr}
-@    bx      lr
+    mov     r0,     #0x400
+    beq     ssp_drc_end
+    orrne   r0, r0, #0x004
+
+    b       ssp_drc_next