renderer bugfix, minor adjustments
[picodrive.git] / Pico / carthw / svp / compiler.c
index 22c9850..f8bece3 100644 (file)
@@ -4,8 +4,6 @@
 
 #define u32 unsigned int
 
-static u32 *block_table[0x5090/2];
-static u32 *block_table_iram[15][0x800/2];
 static u32 *tcache_ptr = NULL;
 
 static int nblocks = 0;
@@ -21,7 +19,9 @@ extern ssp1601_t *ssp;
 
 #ifndef ARM
 #define DUMP_BLOCK 0x0c9a
-unsigned int tcache[512*1024];
+u32 *ssp_block_table[0x5090/2];
+u32 *ssp_block_table_iram[15][0x800/2];
+u32 tcache[SSP_TCACHE_SIZE/4];
 void ssp_drc_next(void){}
 void ssp_drc_next_patch(void){}
 void ssp_drc_end(void){}
@@ -277,6 +277,14 @@ static void tr_flush_dirty_prs(void)
 {
        int i, ror = 0, reg;
        int dirty = dirty_regb >> 8;
+       if ((dirty&7) == 7) {
+               emit_mov_const(A_COND_AL, 8, known_regs.r[0]|(known_regs.r[1]<<8)|(known_regs.r[2]<<16));
+               dirty &= ~7;
+       }
+       if ((dirty&0x70) == 0x70) {
+               emit_mov_const(A_COND_AL, 9, known_regs.r[4]|(known_regs.r[5]<<8)|(known_regs.r[6]<<16));
+               dirty &= ~0x70;
+       }
        /* r0-r7 */
        for (i = 0; dirty && i < 8; i++, dirty >>= 1)
        {
@@ -558,6 +566,69 @@ static void tr_rX_read2(int op)
        hostreg_r[0] = hostreg_r[2] = -1;
 }
 
+// check if AL is going to be used later in block
+static int tr_predict_al_need(void)
+{
+       int tmpv, tmpv2, op, pc = known_regs.gr[SSP_PC].h;
+
+       while (1)
+       {
+               op = PROGRAM(pc);
+               switch (op >> 9)
+               {
+                       // ld d, s
+                       case 0x00:
+                               tmpv2 = (op >> 4) & 0xf; // dst
+                               tmpv  = op & 0xf; // src
+                               if ((tmpv2 == SSP_A && tmpv == SSP_P) || tmpv2 == SSP_AL) // ld A, P; ld AL, *
+                                       return 0;
+                               break;
+
+                       // ld (ri), s
+                       case 0x02:
+                       // ld ri, s
+                       case 0x0a:
+                       // OP a, s
+                       case 0x10: case 0x30: case 0x40: case 0x60: case 0x70:
+                               tmpv  = op & 0xf; // src
+                               if (tmpv == SSP_AL) // OP *, AL
+                                       return 1;
+                               break;
+
+                       case 0x04:
+                       case 0x06:
+                       case 0x14:
+                       case 0x34:
+                       case 0x44:
+                       case 0x64:
+                       case 0x74: pc++; break;
+
+                       // call cond, addr
+                       case 0x24:
+                       // bra cond, addr
+                       case 0x26:
+                       // mod cond, op
+                       case 0x48:
+                       // mpys?
+                       case 0x1b:
+                       // mpya (rj), (ri), b
+                       case 0x4b: return 1;
+
+                       // mld (rj), (ri), b
+                       case 0x5b: return 0; // cleared anyway
+
+                       // and A, *
+                       case 0x50:
+                               tmpv  = op & 0xf; // src
+                               if (tmpv == SSP_AL) return 1;
+                       case 0x51: case 0x53: case 0x54: case 0x55: case 0x59: case 0x5c:
+                               return 0;
+               }
+               pc++;
+       }
+}
+
+
 /* get ARM cond which would mean that SSP cond is satisfied. No trash. */
 static int tr_cond_check(int op)
 {
@@ -877,9 +948,13 @@ static void tr_r0_to_Y(int const_val)
 
 static void tr_r0_to_A(int const_val)
 {
-       EOP_MOV_REG_LSL(5, 5, 16);              // mov  r5, r5, lsl #16
-       EOP_MOV_REG_LSR(5, 5, 16);              // mov  r5, r5, lsr #16  @ AL
-       EOP_ORR_REG_LSL(5, 5, 0, 16);           // orr  r5, r5, r0, lsl #16
+       if (tr_predict_al_need()) {
+               EOP_MOV_REG_LSL(5, 5, 16);      // mov  r5, r5, lsl #16
+               EOP_MOV_REG_LSR(5, 5, 16);      // mov  r5, r5, lsr #16  @ AL
+               EOP_ORR_REG_LSL(5, 5, 0, 16);   // orr  r5, r5, r0, lsl #16
+       }
+       else
+               EOP_MOV_REG_LSL(5, 0, 16);
        TR_WRITE_R0_TO_REG(SSP_A);
 }
 
@@ -1206,8 +1281,15 @@ static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *j
                        ret = tr_detect_rotate(op, pc, imm);
                        if (ret > 0) break;
                        if (tmpv != 0)
-                            tr_rX_read(r, mod);
-                       else tr_ptrr_mod(r, mod, 1, 1);
+                               tr_rX_read(r, mod);
+                       else {
+                               int cnt = 1;
+                               while (PROGRAM(*pc) == op) {
+                                       (*pc)++; cnt++; ret++;
+                                       n_in_ops++;
+                               }
+                               tr_ptrr_mod(r, mod, 1, cnt); // skip
+                       }
                        tr_write_funcs[tmpv](-1);
                        if (tmpv == SSP_PC) {
                                ret |= 0x10000;
@@ -1594,7 +1676,7 @@ static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
                emit_jump(A_COND_AL, ssp_drc_next);
        }
        else if (cond == A_COND_AL) {
-               u32 *target = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
+               u32 *target = (pc < 0x400) ? ssp_block_table_iram[ssp->drc.iram_context][pc] : ssp_block_table[pc];
                if (target != NULL)
                        emit_jump(A_COND_AL, target);
                else {
@@ -1604,8 +1686,8 @@ static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
                }
        }
        else {
-               u32 *target1 = (pc < 0x400) ? block_table_iram[ssp->drc.iram_context][pc] : block_table[pc];
-               u32 *target2 = (end_pc < 0x400) ? block_table_iram[ssp->drc.iram_context][end_pc] : block_table[end_pc];
+               u32 *target1 = (pc < 0x400) ? ssp_block_table_iram[ssp->drc.iram_context][pc] : ssp_block_table[pc];
+               u32 *target2 = (end_pc < 0x400) ? ssp_block_table_iram[ssp->drc.iram_context][end_pc] : ssp_block_table[end_pc];
                if (target1 != NULL)
                     emit_jump(cond, target1);
                else emit_call(cond, ssp_drc_next_patch);
@@ -1661,7 +1743,7 @@ void *ssp_translate_block(int pc)
        tr_flush_dirty_pmcrs();
        emit_block_epilogue(ccount, end_cond, jump_pc, pc);
 
-       if (tcache_ptr - tcache > TCACHE_SIZE/4) {
+       if (tcache_ptr - tcache > SSP_TCACHE_SIZE/4) {
                printf("tcache overflow!\n");
                fflush(stdout);
                exit(1);
@@ -1698,9 +1780,9 @@ static void ssp1601_state_load(void)
 
 int ssp1601_dyn_startup(void)
 {
-       memset(tcache, 0, TCACHE_SIZE);
-       memset(block_table, 0, sizeof(block_table));
-       memset(block_table_iram, 0, sizeof(block_table_iram));
+       memset(tcache, 0, SSP_TCACHE_SIZE);
+       memset(ssp_block_table, 0, sizeof(ssp_block_table));
+       memset(ssp_block_table_iram, 0, sizeof(ssp_block_table_iram));
        tcache_ptr = tcache;
 
        PicoLoadStateHook = ssp1601_state_load;
@@ -1708,14 +1790,14 @@ int ssp1601_dyn_startup(void)
        n_in_ops = 0;
 #ifdef ARM
        // hle'd blocks
-       block_table[0x800/2] = (void *) ssp_hle_800;
-       block_table[0x902/2] = (void *) ssp_hle_902;
-       block_table_iram[ 7][0x030/2] = (void *) ssp_hle_07_030;
-       block_table_iram[ 7][0x036/2] = (void *) ssp_hle_07_036;
-       block_table_iram[ 7][0x6d6/2] = (void *) ssp_hle_07_6d6;
-       block_table_iram[11][0x12c/2] = (void *) ssp_hle_11_12c;
-       block_table_iram[11][0x384/2] = (void *) ssp_hle_11_384;
-       block_table_iram[11][0x38a/2] = (void *) ssp_hle_11_38a;
+       ssp_block_table[0x800/2] = (void *) ssp_hle_800;
+       ssp_block_table[0x902/2] = (void *) ssp_hle_902;
+       ssp_block_table_iram[ 7][0x030/2] = (void *) ssp_hle_07_030;
+       ssp_block_table_iram[ 7][0x036/2] = (void *) ssp_hle_07_036;
+       ssp_block_table_iram[ 7][0x6d6/2] = (void *) ssp_hle_07_6d6;
+       ssp_block_table_iram[11][0x12c/2] = (void *) ssp_hle_11_12c;
+       ssp_block_table_iram[11][0x384/2] = (void *) ssp_hle_11_384;
+       ssp_block_table_iram[11][0x38a/2] = (void *) ssp_hle_11_38a;
 #endif
 
        return 0;
@@ -1732,12 +1814,12 @@ void ssp1601_dyn_reset(ssp1601_t *ssp)
                fclose(f);
 
                for (i = 0; i < 0x5090/2; i++)
-                       if (block_table[i])
-                               printf("%06x -> __:%04x\n", (block_table[i] - tcache)*4, i<<1);
+                       if (ssp_block_table[i])
+                               printf("%06x -> __:%04x\n", (ssp_block_table[i] - tcache)*4, i<<1);
                for (u = 1; u < 15; u++)
                        for (i = 0; i < 0x800/2; i++)
-                               if (block_table_iram[u][i])
-                                       printf("%06x -> %02i:%04x\n", (block_table_iram[u][i] - tcache)*4, u, i<<1);
+                               if (ssp_block_table_iram[u][i])
+                                       printf("%06x -> %02i:%04x\n", (ssp_block_table_iram[u][i] - tcache)*4, u, i<<1);
        }
 
        ssp1601_reset(ssp);
@@ -1747,8 +1829,8 @@ void ssp1601_dyn_reset(ssp1601_t *ssp)
        ssp->drc.ptr_rom = (u32) Pico.rom;
        ssp->drc.ptr_iram_rom = (u32) svp->iram_rom;
        ssp->drc.ptr_dram = (u32) svp->dram;
-       ssp->drc.ptr_btable = (u32) block_table;
-       ssp->drc.ptr_btable_iram = (u32) block_table_iram;
+       ssp->drc.ptr_btable = (u32) ssp_block_table;
+       ssp->drc.ptr_btable_iram = (u32) ssp_block_table_iram;
 
        // prevent new versions of IRAM from appearing
        memset(svp->iram_rom, 0, 0x800);