timers implemented for new z80 mode
[picodrive.git] / Pico / carthw / svp / compiler.c
index 63bc9ab..2db22b1 100644 (file)
@@ -1,11 +1,13 @@
+// SSP1601 to ARM recompiler
+
+// (c) Copyright 2008, Grazvydas "notaz" Ignotas
+// Free for non-commercial use.
 
 #include "../../PicoInt.h"
 #include "compiler.h"
 
 #define u32 unsigned int
 
-static u32 *block_table[0x5090/2];
-static u32 *block_table_iram[15][0x800/2];
 static u32 *tcache_ptr = NULL;
 
 static int nblocks = 0;
@@ -21,9 +23,12 @@ extern ssp1601_t *ssp;
 
 #ifndef ARM
 #define DUMP_BLOCK 0x0c9a
-unsigned int tcache[512*1024];
-void regfile_load(void){}
-void regfile_store(void){}
+u32 *ssp_block_table[0x5090/2];
+u32 *ssp_block_table_iram[15][0x800/2];
+u32 tcache[SSP_TCACHE_SIZE/4];
+void ssp_drc_next(void){}
+void ssp_drc_next_patch(void){}
+void ssp_drc_end(void){}
 #endif
 
 #include "gen_arm.c"
@@ -41,7 +46,7 @@ static int get_inc(int mode)
        return inc;
 }
 
-static u32 ssp_pm_read(int reg)
+u32 ssp_pm_read(int reg)
 {
        u32 d = 0, mode;
 
@@ -83,7 +88,7 @@ static u32 ssp_pm_read(int reg)
        if (d & 0x000f) { dst &= ~0x000f; dst |= d & 0x000f; } \
 }
 
-static void ssp_pm_write(u32 d, int reg)
+void ssp_pm_write(u32 d, int reg)
 {
        unsigned short *dram;
        int mode, addr;
@@ -114,7 +119,7 @@ static void ssp_pm_write(u32 d, int reg)
                if (mode & 0x0400) {
                       overwrite_write(dram[addr], d);
                } else dram[addr] = d;
-               ssp->pmac_write[reg] += (addr&1) ? 31 : 1;
+               ssp->pmac_write[reg] += (addr&1) ? 0x1f : 1;
        }
        else if ((mode & 0x47ff) == 0x001c) // IRAM
        {
@@ -150,11 +155,10 @@ int ssp_get_iram_context(void)
        val1 = iram_context_map[(val>>1)&0x3f];
 
        if (val1 == 0) {
-               printf("val: %02x PC=%04x\n", (val>>1)&0x3f, rPC);
+               elprintf(EL_ANOMALY, "svp: iram ctx val: %02x PC=%04x\n", (val>>1)&0x3f, rPC);
                //debug_dump2file(name, svp->iram_rom, 0x800);
-               exit(1);
+               //exit(1);
        }
-//     elprintf(EL_ANOMALY, "iram_context: %02i", val1);
        return val1;
 }
 
@@ -230,13 +234,13 @@ static void hostreg_sspreg_changed(int sspreg)
 #define PROGRAM(x)   ((unsigned short *)svp->iram_rom)[x]
 #define PROGRAM_P(x) ((unsigned short *)svp->iram_rom + (x))
 
-static void tr_unhandled(void)
+void tr_unhandled(void)
 {
-       FILE *f = fopen("tcache.bin", "wb");
-       fwrite(tcache, 1, (tcache_ptr - tcache)*4, f);
-       fclose(f);
-       printf("unhandled @ %04x\n", known_regs.gr[SSP_PC].h<<1);
-       exit(1);
+       //FILE *f = fopen("tcache.bin", "wb");
+       //fwrite(tcache, 1, (tcache_ptr - tcache)*4, f);
+       //fclose(f);
+       elprintf(EL_ANOMALY, "unhandled @ %04x\n", known_regs.gr[SSP_PC].h<<1);
+       //exit(1);
 }
 
 /* update P, if needed. Trashes r0 */
@@ -276,6 +280,14 @@ static void tr_flush_dirty_prs(void)
 {
        int i, ror = 0, reg;
        int dirty = dirty_regb >> 8;
+       if ((dirty&7) == 7) {
+               emit_mov_const(A_COND_AL, 8, known_regs.r[0]|(known_regs.r[1]<<8)|(known_regs.r[2]<<16));
+               dirty &= ~7;
+       }
+       if ((dirty&0x70) == 0x70) {
+               emit_mov_const(A_COND_AL, 9, known_regs.r[4]|(known_regs.r[5]<<8)|(known_regs.r[6]<<16));
+               dirty &= ~0x70;
+       }
        /* r0-r7 */
        for (i = 0; dirty && i < 8; i++, dirty >>= 1)
        {
@@ -343,7 +355,7 @@ static void tr_mov16_cond(int cond, int r, int val)
        hostreg_r[r] = -1;
 }
 
-/* trashes r0 */
+/* trashes r1 */
 static void tr_flush_dirty_pmcrs(void)
 {
        u32 i, val = (u32)-1;
@@ -355,7 +367,7 @@ static void tr_flush_dirty_pmcrs(void)
                EOP_STR_IMM(1,7,0x400+SSP_PMC*4);
 
                if (known_regs.emu_status & (SSP_PMC_SET|SSP_PMC_HAVE_ADDR)) {
-                       printf("!! SSP_PMC_SET|SSP_PMC_HAVE_ADDR set on flush\n");
+                       elprintf(EL_ANOMALY, "!! SSP_PMC_SET|SSP_PMC_HAVE_ADDR set on flush\n");
                        tr_unhandled();
                }
        }
@@ -557,6 +569,69 @@ static void tr_rX_read2(int op)
        hostreg_r[0] = hostreg_r[2] = -1;
 }
 
+// check if AL is going to be used later in block
+static int tr_predict_al_need(void)
+{
+       int tmpv, tmpv2, op, pc = known_regs.gr[SSP_PC].h;
+
+       while (1)
+       {
+               op = PROGRAM(pc);
+               switch (op >> 9)
+               {
+                       // ld d, s
+                       case 0x00:
+                               tmpv2 = (op >> 4) & 0xf; // dst
+                               tmpv  = op & 0xf; // src
+                               if ((tmpv2 == SSP_A && tmpv == SSP_P) || tmpv2 == SSP_AL) // ld A, P; ld AL, *
+                                       return 0;
+                               break;
+
+                       // ld (ri), s
+                       case 0x02:
+                       // ld ri, s
+                       case 0x0a:
+                       // OP a, s
+                       case 0x10: case 0x30: case 0x40: case 0x60: case 0x70:
+                               tmpv  = op & 0xf; // src
+                               if (tmpv == SSP_AL) // OP *, AL
+                                       return 1;
+                               break;
+
+                       case 0x04:
+                       case 0x06:
+                       case 0x14:
+                       case 0x34:
+                       case 0x44:
+                       case 0x64:
+                       case 0x74: pc++; break;
+
+                       // call cond, addr
+                       case 0x24:
+                       // bra cond, addr
+                       case 0x26:
+                       // mod cond, op
+                       case 0x48:
+                       // mpys?
+                       case 0x1b:
+                       // mpya (rj), (ri), b
+                       case 0x4b: return 1;
+
+                       // mld (rj), (ri), b
+                       case 0x5b: return 0; // cleared anyway
+
+                       // and A, *
+                       case 0x50:
+                               tmpv  = op & 0xf; // src
+                               if (tmpv == SSP_AL) return 1;
+                       case 0x51: case 0x53: case 0x54: case 0x55: case 0x59: case 0x5c:
+                               return 0;
+               }
+               pc++;
+       }
+}
+
+
 /* get ARM cond which would mean that SSP cond is satisfied. No trash. */
 static int tr_cond_check(int op)
 {
@@ -572,7 +647,7 @@ static int tr_cond_check(int op)
                        EOP_TST_IMM(6, 0, 8);
                        return f ? A_COND_NE : A_COND_EQ;
                default:
-                       printf("unimplemented cond?\n");
+                       elprintf(EL_ANOMALY, "unimplemented cond?\n");
                        tr_unhandled();
                        return 0;
        }
@@ -581,12 +656,12 @@ static int tr_cond_check(int op)
 static int tr_neg_cond(int cond)
 {
        switch (cond) {
-               case A_COND_AL: printf("neg for AL?\n"); exit(1);
+               case A_COND_AL: elprintf(EL_ANOMALY, "neg for AL?\n"); exit(1);
                case A_COND_EQ: return A_COND_NE;
                case A_COND_NE: return A_COND_EQ;
                case A_COND_MI: return A_COND_PL;
                case A_COND_PL: return A_COND_MI;
-               default:        printf("bad cond for neg\n"); exit(1);
+               default:        elprintf(EL_ANOMALY, "bad cond for neg\n"); exit(1);
        }
        return 0;
 }
@@ -756,7 +831,7 @@ static void tr_PMX_to_r0(int reg)
        tr_flush_dirty_ST();
        //tr_flush_dirty_pmcrs();
        tr_mov16(0, reg);
-       emit_call(ssp_pm_read);
+       emit_call(A_COND_AL, ssp_pm_read);
        hostreg_clear();
 }
 
@@ -876,9 +951,13 @@ static void tr_r0_to_Y(int const_val)
 
 static void tr_r0_to_A(int const_val)
 {
-       EOP_MOV_REG_LSL(5, 5, 16);              // mov  r5, r5, lsl #16
-       EOP_MOV_REG_LSR(5, 5, 16);              // mov  r5, r5, lsr #16  @ AL
-       EOP_ORR_REG_LSL(5, 5, 0, 16);           // orr  r5, r5, r0, lsl #16
+       if (tr_predict_al_need()) {
+               EOP_MOV_REG_LSL(5, 5, 16);      // mov  r5, r5, lsl #16
+               EOP_MOV_REG_LSR(5, 5, 16);      // mov  r5, r5, lsr #16  @ AL
+               EOP_ORR_REG_LSL(5, 5, 0, 16);   // orr  r5, r5, r0, lsl #16
+       }
+       else
+               EOP_MOV_REG_LSL(5, 0, 16);
        TR_WRITE_R0_TO_REG(SSP_A);
 }
 
@@ -906,9 +985,12 @@ static void tr_r0_to_STACK(int const_val)
 
 static void tr_r0_to_PC(int const_val)
 {
+/*
+ * do nothing - dispatcher will take care of this
        EOP_MOV_REG_LSL(1, 0, 16);              // mov  r1, r0, lsl #16
        EOP_STR_IMM(1,7,0x400+6*4);             // str  r1, [r7, #(0x400+6*8)]
        hostreg_r[1] = -1;
+*/
 }
 
 static void tr_r0_to_AL(int const_val)
@@ -990,7 +1072,7 @@ static void tr_r0_to_PMX(int reg)
        tr_flush_dirty_ST();
        //tr_flush_dirty_pmcrs();
        tr_mov16(1, reg);
-       emit_call(ssp_pm_write);
+       emit_call(A_COND_AL, ssp_pm_write);
        hostreg_clear();
 }
 
@@ -1166,7 +1248,7 @@ static int tr_detect_rotate(unsigned int op, int *pc, int imm)
 
 // -----------------------------------------------------
 
-static int translate_op(unsigned int op, int *pc, int imm)
+static int translate_op(unsigned int op, int *pc, int imm, int *end_cond, int *jump_pc)
 {
        u32 tmpv, tmpv2, tmpv3;
        int ret = 0;
@@ -1188,7 +1270,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        }
                        tr_read_funcs[tmpv](op);
                        tr_write_funcs[tmpv2]((known_regb & (1 << tmpv)) ? known_regs.gr[tmpv].h : -1);
-                       if (tmpv2 == SSP_PC) ret |= 0x10000;
+                       if (tmpv2 == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret++; break;
 
                // ld d, (ri)
@@ -1199,10 +1284,20 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        ret = tr_detect_rotate(op, pc, imm);
                        if (ret > 0) break;
                        if (tmpv != 0)
-                            tr_rX_read(r, mod);
-                       else tr_ptrr_mod(r, mod, 1, 1);
+                               tr_rX_read(r, mod);
+                       else {
+                               int cnt = 1;
+                               while (PROGRAM(*pc) == op) {
+                                       (*pc)++; cnt++; ret++;
+                                       n_in_ops++;
+                               }
+                               tr_ptrr_mod(r, mod, 1, cnt); // skip
+                       }
                        tr_write_funcs[tmpv](-1);
-                       if (tmpv == SSP_PC) ret |= 0x10000;
+                       if (tmpv == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret++; break;
                }
 
@@ -1228,7 +1323,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        if (ret > 0) break;
                        tr_mov16(0, imm);
                        tr_write_funcs[tmpv](imm);
-                       if (tmpv == SSP_PC) ret |= 0x10000;
+                       if (tmpv == SSP_PC) {
+                               ret |= 0x10000;
+                               *jump_pc = imm;
+                       }
                        ret += 2; break;
 
                // ld d, ((ri))
@@ -1236,7 +1334,10 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        tmpv2 = (op >> 4) & 0xf;  // dst
                        tr_rX_read2(op);
                        tr_write_funcs[tmpv2](-1);
-                       if (tmpv2 == SSP_PC) ret |= 0x10000;
+                       if (tmpv2 == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret += 3; break;
 
                // ldi (ri), imm
@@ -1297,7 +1398,7 @@ static int translate_op(unsigned int op, int *pc, int imm)
                }
 
                // ldi ri, simm
-               case 0x0c ... 0x0f:
+               case 0x0c: case 0x0d: case 0x0e: case 0x0f:
                        tmpv = (op>>8)&7;
                        known_regs.r[tmpv] = op;
                        known_regb |= 1 << (tmpv + 8);
@@ -1321,11 +1422,12 @@ static int translate_op(unsigned int op, int *pc, int imm)
                                tcache_ptr = real_ptr;
                        }
                        tr_mov16_cond(tmpv, 0, imm);
-                       if (tmpv != A_COND_AL) {
+                       if (tmpv != A_COND_AL)
                                tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
-                       }
                        tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
                        ret |= 0x10000;
+                       *end_cond = tmpv;
+                       *jump_pc = imm;
                        ret += 2; break;
                }
 
@@ -1338,18 +1440,22 @@ static int translate_op(unsigned int op, int *pc, int imm)
                        EOP_LDRH_SIMPLE(0,0);                                   // ldrh r0, [r0]
                        hostreg_r[0] = hostreg_r[1] = -1;
                        tr_write_funcs[tmpv2](-1);
-                       if (tmpv2 == SSP_PC) ret |= 0x10000;
+                       if (tmpv2 == SSP_PC) {
+                               ret |= 0x10000;
+                               *end_cond = -A_COND_AL;
+                       }
                        ret += 3; break;
 
                // bra cond, addr
                case 0x26:
                        tmpv = tr_cond_check(op);
                        tr_mov16_cond(tmpv, 0, imm);
-                       if (tmpv != A_COND_AL) {
+                       if (tmpv != A_COND_AL)
                                tr_mov16_cond(tr_neg_cond(tmpv), 0, *pc);
-                       }
                        tr_r0_to_PC(tmpv == A_COND_AL ? imm : -1);
                        ret |= 0x10000;
+                       *end_cond = tmpv;
+                       *jump_pc = imm;
                        ret += 2; break;
 
                // mod cond, op
@@ -1551,13 +1657,56 @@ static int translate_op(unsigned int op, int *pc, int imm)
        return ret;
 }
 
+static void emit_block_prologue(void)
+{
+       // check if there are enough cycles..
+       // note: r0 must contain PC of current block
+       EOP_CMP_IMM(11,0,0);                    // cmp r11, #0
+       emit_call(A_COND_LE, ssp_drc_end);
+}
+
+/* cond:
+ * >0: direct (un)conditional jump
+ * <0: indirect jump
+ */
+static void emit_block_epilogue(int cycles, int cond, int pc, int end_pc)
+{
+       if (cycles > 0xff) { elprintf(EL_ANOMALY, "large cycle count: %i\n", cycles); cycles = 0xff; }
+       EOP_SUB_IMM(11,11,0,cycles);            // sub r11, r11, #cycles
+
+       if (cond < 0 || (end_pc >= 0x400 && pc < 0x400)) {
+               // indirect jump, or rom -> iram jump, must use dispatcher
+               emit_jump(A_COND_AL, ssp_drc_next);
+       }
+       else if (cond == A_COND_AL) {
+               u32 *target = (pc < 0x400) ? ssp_block_table_iram[ssp->drc.iram_context][pc] : ssp_block_table[pc];
+               if (target != NULL)
+                       emit_jump(A_COND_AL, target);
+               else {
+                       emit_jump(A_COND_AL, ssp_drc_next);
+                       // cause the next block to be emitted over jump instrction
+                       tcache_ptr--;
+               }
+       }
+       else {
+               u32 *target1 = (pc < 0x400) ? ssp_block_table_iram[ssp->drc.iram_context][pc] : ssp_block_table[pc];
+               u32 *target2 = (end_pc < 0x400) ? ssp_block_table_iram[ssp->drc.iram_context][end_pc] : ssp_block_table[end_pc];
+               if (target1 != NULL)
+                    emit_jump(cond, target1);
+               else emit_call(cond, ssp_drc_next_patch);
+               if (target2 != NULL)
+                    emit_jump(tr_neg_cond(cond), target2); // neg_cond, to be able to swap jumps if needed
+               else emit_call(tr_neg_cond(cond), ssp_drc_next_patch);
+       }
+}
+
 void *ssp_translate_block(int pc)
 {
        unsigned int op, op1, imm, ccount = 0;
        unsigned int *block_start;
-       int ret, ret_prev = -1, tpc;
+       int ret, end_cond = A_COND_AL, jump_pc = -1;
 
-       printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<2);
+       //printf("translate %04x -> %04x\n", pc<<1, (tcache_ptr-tcache)<<2);
        block_start = tcache_ptr;
        known_regb = 0;
        dirty_regb = KRREG_P;
@@ -1574,42 +1723,39 @@ void *ssp_translate_block(int pc)
 
                if ((op1 & 0xf) == 4 || (op1 & 0xf) == 6)
                        imm = PROGRAM(pc++); // immediate
-               tpc = pc;
 
-               ret = translate_op(op, &pc, imm);
+               ret = translate_op(op, &pc, imm, &end_cond, &jump_pc);
                if (ret <= 0)
                {
-                       printf("NULL func! op=%08x (%02x)\n", op, op1);
-                       exit(1);
-               }
-               else
-               {
-                       ccount += ret & 0xffff;
-                       if (ret & 0x10000) break;
+                       elprintf(EL_ANOMALY, "NULL func! op=%08x (%02x)\n", op, op1);
+                       //exit(1);
                }
 
-               ret_prev = ret;
+               ccount += ret & 0xffff;
+               if (ret & 0x10000) break;
        }
 
-       if (ccount >= 100)
-               emit_pc_dump(pc);
+       if (ccount >= 100) {
+               end_cond = A_COND_AL;
+               jump_pc = pc;
+               emit_mov_const(A_COND_AL, 0, pc);
+       }
 
        tr_flush_dirty_prs();
        tr_flush_dirty_ST();
        tr_flush_dirty_pmcrs();
-       emit_block_epilogue(ccount + 1);
-       *tcache_ptr++ = 0xffffffff; // end of block
+       emit_block_epilogue(ccount, end_cond, jump_pc, pc);
 
-       if (tcache_ptr - tcache > TCACHE_SIZE/4) {
-               printf("tcache overflow!\n");
+       if (tcache_ptr - tcache > SSP_TCACHE_SIZE/4) {
+               elprintf(EL_ANOMALY, "tcache overflow!\n");
                fflush(stdout);
                exit(1);
        }
 
        // stats
        nblocks++;
-       printf("%i blocks, %i bytes, k=%.3f\n", nblocks, (tcache_ptr - tcache)*4,
-               (double)(tcache_ptr - tcache) / (double)n_in_ops);
+       //printf("%i blocks, %i bytes, k=%.3f\n", nblocks, (tcache_ptr - tcache)*4,
+       //      (double)(tcache_ptr - tcache) / (double)n_in_ops);
 
 #ifdef DUMP_BLOCK
        {
@@ -1617,6 +1763,7 @@ void *ssp_translate_block(int pc)
                fwrite(tcache, 1, (tcache_ptr - tcache)*4, f);
                fclose(f);
        }
+       printf("dumped tcache.bin\n");
        exit(0);
 #endif
 
@@ -1629,18 +1776,32 @@ void *ssp_translate_block(int pc)
 
 // -----------------------------------------------------
 
+static void ssp1601_state_load(void)
+{
+       ssp->drc.iram_dirty = 1;
+       ssp->drc.iram_context = 0;
+}
+
 int ssp1601_dyn_startup(void)
 {
-       memset(tcache, 0, TCACHE_SIZE);
-       memset(block_table, 0, sizeof(block_table));
-       memset(block_table_iram, 0, sizeof(block_table_iram));
+       memset(tcache, 0, SSP_TCACHE_SIZE);
+       memset(ssp_block_table, 0, sizeof(ssp_block_table));
+       memset(ssp_block_table_iram, 0, sizeof(ssp_block_table_iram));
        tcache_ptr = tcache;
-       *tcache_ptr++ = 0xffffffff;
 
+       PicoLoadStateHook = ssp1601_state_load;
+
+       n_in_ops = 0;
 #ifdef ARM
        // hle'd blocks
-       block_table[0x400] = (void *) ssp_hle_800;
-       n_in_ops = 3; // # of hled ops
+       ssp_block_table[0x800/2] = (void *) ssp_hle_800;
+       ssp_block_table[0x902/2] = (void *) ssp_hle_902;
+       ssp_block_table_iram[ 7][0x030/2] = (void *) ssp_hle_07_030;
+       ssp_block_table_iram[ 7][0x036/2] = (void *) ssp_hle_07_036;
+       ssp_block_table_iram[ 7][0x6d6/2] = (void *) ssp_hle_07_6d6;
+       ssp_block_table_iram[11][0x12c/2] = (void *) ssp_hle_11_12c;
+       ssp_block_table_iram[11][0x384/2] = (void *) ssp_hle_11_384;
+       ssp_block_table_iram[11][0x38a/2] = (void *) ssp_hle_11_38a;
 #endif
 
        return 0;
@@ -1656,14 +1817,22 @@ void ssp1601_dyn_reset(ssp1601_t *ssp)
        ssp->drc.ptr_rom = (u32) Pico.rom;
        ssp->drc.ptr_iram_rom = (u32) svp->iram_rom;
        ssp->drc.ptr_dram = (u32) svp->dram;
-       ssp->drc.ptr_btable = (u32) block_table;
-       ssp->drc.ptr_btable_iram = (u32) block_table_iram;
+       ssp->drc.ptr_btable = (u32) ssp_block_table;
+       ssp->drc.ptr_btable_iram = (u32) ssp_block_table_iram;
+
+       // prevent new versions of IRAM from appearing
+       memset(svp->iram_rom, 0, 0x800);
 }
 
 void ssp1601_dyn_run(int cycles)
 {
        if (ssp->emu_status & SSP_WAIT_MASK) return;
 
+#ifdef DUMP_BLOCK
+       ssp_translate_block(DUMP_BLOCK >> 1);
+#endif
+#ifdef ARM
        ssp_drc_entry(cycles);
+#endif
 }