From: notaz Date: Sun, 24 Feb 2008 23:49:06 +0000 (+0000) Subject: svp compiler: added first wait loop detection X-Git-Tag: v1.85~567 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=b9c1d0129a0842bb545dd30cef73aa975acbb1ac;p=picodrive.git svp compiler: added first wait loop detection git-svn-id: file:///home/notaz/opt/svn/PicoDrive@363 be3aeb3a-fb24-0410-a615-afba39da0efa --- diff --git a/Pico/carthw/svp/compiler.c b/Pico/carthw/svp/compiler.c index 211fa173..577e6992 100644 --- a/Pico/carthw/svp/compiler.c +++ b/Pico/carthw/svp/compiler.c @@ -13,7 +13,7 @@ static int nblocks = 0; static int iram_context = 0; #ifndef ARM -#define DUMP_BLOCK 0x40b0 +#define DUMP_BLOCK 0x84a unsigned int tcache[512*1024]; void regfile_load(void){} void regfile_store(void){} @@ -635,7 +635,7 @@ static void tr_bank_write(int addr) } breg = 1; } - EOP_STRH_IMM(0,breg,(addr&0x7f)<<1); // str r0, [r1, (op&0x7f)<<1] + EOP_STRH_IMM(0,breg,(addr&0x7f)<<1); // strh r0, [r1, (op&0x7f)<<1] } /* handle RAM bank pointer modifiers. Nothing is trashed. */ @@ -669,6 +669,89 @@ static void tr_ptrr_mod(int r, int mod, int need_modulo) } } +// SSP_GR0, SSP_X, SSP_Y, SSP_A, +// SSP_ST, SSP_STACK, SSP_PC, SSP_P, +//@ r4: XXYY +//@ r5: A +//@ r6: STACK and emu flags +//@ r7: SSP context +//@ r10: P + +// write r0 to general reg handlers. Trashes r1 +static void tr_r0_unhandled(void) +{ + printf("unhandled\n"); + exit(1); +} + +static void tr_r0_to_GR0(void) +{ + // do nothing +} + +static void tr_r0_to_X(void) +{ + EOP_MOV_REG_LSL(4, 4, 16); // mov r4, r4, lsl #16 + EOP_MOV_REG_LSR(4, 4, 16); // mov r4, r4, lsr #16 + EOP_ORR_REG_LSL(4, 4, 0, 16); // orr r4, r4, r0, lsl #16 +} + +static void tr_r0_to_Y(void) +{ + EOP_MOV_REG_LSR(4, 4, 16); // mov r4, r4, lsr #16 + EOP_ORR_REG_LSL(4, 4, 0, 16); // orr r4, r4, r0, lsl #16 + EOP_MOV_REG_ROR(4, 4, 16); // mov r4, r4, ror #16 +} + +static void tr_r0_to_A(void) +{ + EOP_MOV_REG_LSL(5, 5, 16); // mov r5, r5, lsl #16 + EOP_MOV_REG_LSR(5, 5, 16); // mov r5, r5, lsl #16 @ AL + EOP_ORR_REG_LSL(5, 5, 0, 16); // orr r5, r5, r0, lsl #16 + hostreg_r[0] = 0x20000; +} + +static void tr_r0_to_ST(void) +{ + // VR doesn't need much accuracy here.. + EOP_AND_IMM(1, 0, 0, 0x67); // and r1, r0, #0x67 + EOP_AND_IMM(6, 6, 8/2, 0xe0); // and r6, r6, #7<<29 @ preserve STACK + EOP_ORR_REG_LSL(6, 6, 1, 4); // orr r6, r6, r1, lsl #4 + hostreg_r[1] = -1; +} + +static void tr_r0_to_STACK(void) +{ + // 448 + EOP_ADD_IMM(1, 7, 24/2, 0x04); // add r1, r7, 0x400 + EOP_ADD_IMM(1, 1, 0, 0x48); // add r1, r1, 0x048 + EOP_ADD_REG_LSR(1, 1, 6, 28); // add r1, r1, r6, lsr #26 + EOP_STRH_SIMPLE(0, 1); // strh r0, [r1] + EOP_ADD_IMM(6, 6, 24/2, 0x20); // add r6, r6, #1<<29 + hostreg_r[1] = -1; +} + +static void tr_r0_to_PC(void) +{ + EOP_MOV_REG_LSL(1, 0, 16); // mov r1, r0, lsl #16 + EOP_STR_IMM(0,7,0x400+6*4); // str r0, [r7, #(0x400+6*8)] + hostreg_r[1] = -1; +} + +typedef void (tr_write_func)(void); + +static tr_write_func *tr_write_funcs[8] = +{ + tr_r0_to_GR0, + tr_r0_to_X, + tr_r0_to_Y, + tr_r0_to_A, + tr_r0_to_ST, + tr_r0_to_STACK, + tr_r0_to_PC, + tr_r0_unhandled +}; + static int translate_op(unsigned int op, int *pc, int imm) { @@ -685,16 +768,50 @@ static int translate_op(unsigned int op, int *pc, int imm) // ld a, adr case 0x03: tr_bank_read(op&0x1ff); - EOP_MOV_REG_LSL(5, 5, 16); // mov r5, r5, lsl #16 - EOP_MOV_REG_LSR(5, 5, 16); // mov r5, r5, lsl #16 @ AL - EOP_ORR_REG_LSL(5, 5, 0, 16); // orr r5, r5, r0, lsl #16 + tr_r0_to_A(); const_regb &= ~CRREG_A; hostreg_r[0] = 0x20000; ret++; break; + // ldi d, imm + case 0x04: + tmpv = (op & 0xf0) >> 4; + if (tmpv < 8) + { + tr_mov16(0, imm); + tr_write_funcs[tmpv](); + const_regs.gr[tmpv].h = imm; + const_regb |= 1 << tmpv; + ret++; break; + } + else if (tmpv == 0xe && (PROGRAM(*pc) >> 9) == 4) + { + // programming PMC.. + (*pc)++; + tmpv = imm | (PROGRAM((*pc)++) << 16); + emit_mov_const(0, tmpv); + EOP_LDR_IMM(1,7,0x484); // ldr r0, [r7, #0x484] // emu_status + EOP_STR_IMM(0,7,0x400+14*4); // PMC + // TODO: do this only on reads + if (tmpv == 0x187f04) { // fe08 + EOP_LDR_IMM(0,7,0x490); // dram_ptr + EOP_ADD_IMM(0,0,24/2,0xfe); // add r0, r0, #0xfe00 + EOP_LDRH_IMM(0,0,8); // ldrh r0, [r0, #8] + EOP_TST_REG_SIMPLE(0,0); + EOP_C_DOP_IMM(A_COND_EQ,A_OP_ADD,0,11,11,22/2,1); // add r11, r11, #1024 + EOP_C_DOP_IMM(A_COND_EQ,A_OP_ORR,0, 1, 1,24/2,SSP_WAIT_30FE08>>8); // orr r1, r1, #SSP_WAIT_30FE08 + } + EOP_ORR_IMM(1,1,0,SSP_PMC_SET); // orr r1, r1, #SSP_PMC_SET + EOP_STR_IMM(1,7,0x484); // str r1, [r7, #0x484] // emu_status + hostreg_r[0] = hostreg_r[1] = -1; + ret += 2; break; + } + else + return -1; /* TODO.. */ + + // ldi (ri), imm case 0x06: - //tmpv = *PC++; ptr1_write(op, tmpv); break; // int t = (op&3) | ((op>>6)&4) | ((op<<1)&0x18); tr_mov16(0, imm); if ((op&3) == 3) @@ -860,10 +977,17 @@ int ssp1601_dyn_startup(void) void ssp1601_dyn_reset(ssp1601_t *ssp) { ssp1601_reset_local(ssp); + ssp->rom_ptr = (unsigned int) Pico.rom; + ssp->iram_ptr = (unsigned int) svp->iram_rom; + ssp->dram_ptr = (unsigned int) svp->dram; } void ssp1601_dyn_run(int cycles) { + if (ssp->emu_status & SSP_WAIT_MASK) return; + //{ printf("%i wait\n", Pico.m.frame_count); return; } + //printf("%i %04x\n", Pico.m.frame_count, rPC<<1); + #ifdef DUMP_BLOCK rPC = DUMP_BLOCK >> 1; #endif diff --git a/Pico/carthw/svp/gen_arm.c b/Pico/carthw/svp/gen_arm.c index 76a0304e..f33d3692 100644 --- a/Pico/carthw/svp/gen_arm.c +++ b/Pico/carthw/svp/gen_arm.c @@ -11,6 +11,7 @@ #define A_R14M (1 << 14) #define A_COND_AL 0xe +#define A_COND_EQ 0x0 /* addressing mode 1 */ #define A_AM1_LSL 0 @@ -25,6 +26,7 @@ #define A_OP_AND 0x0 #define A_OP_SUB 0x2 #define A_OP_ADD 0x4 +#define A_OP_TST 0x8 #define A_OP_ORR 0xc #define A_OP_MOV 0xd #define A_OP_BIC 0xe @@ -44,6 +46,7 @@ #define EOP_MOV_REG(s, rd,shift_imm,shift_op,rm) EOP_C_DOP_REG(A_COND_AL,A_OP_MOV,s, 0,rd,shift_imm,shift_op,rm) #define EOP_ORR_REG(s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_REG(A_COND_AL,A_OP_ORR,s,rn,rd,shift_imm,shift_op,rm) #define EOP_ADD_REG(s,rn,rd,shift_imm,shift_op,rm) EOP_C_DOP_REG(A_COND_AL,A_OP_ADD,s,rn,rd,shift_imm,shift_op,rm) +#define EOP_TST_REG( rn, shift_imm,shift_op,rm) EOP_C_DOP_REG(A_COND_AL,A_OP_TST,1,rn, 0,shift_imm,shift_op,rm) #define EOP_MOV_REG_SIMPLE(rd,rm) EOP_MOV_REG(0,rd,0,A_AM1_LSL,rm) #define EOP_MOV_REG_LSL(rd, rm,shift_imm) EOP_MOV_REG(0,rd,shift_imm,A_AM1_LSL,rm) @@ -61,6 +64,8 @@ #define EOP_ADD_REG_LSL(rd,rn,rm,shift_imm) EOP_ADD_REG(0,rn,rd,shift_imm,A_AM1_LSL,rm) #define EOP_ADD_REG_LSR(rd,rn,rm,shift_imm) EOP_ADD_REG(0,rn,rd,shift_imm,A_AM1_LSR,rm) +#define EOP_TST_REG_SIMPLE(rn,rm) EOP_TST_REG( rn, 0,A_AM1_LSL,rm) + /* addressing mode 2 */ #define EOP_C_AM2_IMM(cond,u,b,l,rn,rd,offset_12) \ EMIT(((cond)<<28) | 0x05000000 | ((u)<<23) | ((b)<<22) | ((l)<<20) | ((rn)<<16) | ((rd)<<12) | (offset_12)) @@ -150,13 +155,15 @@ static void emit_block_prologue(void) // stack regs EOP_STMFD_ST(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R14M); // stmfd r13!, {r4-r11,lr} emit_call(regfile_load); + EOP_MOV_IMM(11, 0, 0); // mov r11, #0 } static void emit_block_epilogue(int icount) { + if (icount > 0xff) { printf("large icount: %i\n", icount); icount = 0xff; } emit_call(regfile_store); + EOP_ADD_IMM(0,11,0,icount); // add r0, r11, #icount EOP_LDMFD_ST(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R14M); // ldmfd r13!, {r4-r11,lr} - emit_mov_const(0, icount); EOP_BX(14); // bx r14 } diff --git a/Pico/carthw/svp/ssp16.h b/Pico/carthw/svp/ssp16.h index 3d8a764d..f4a5746a 100644 --- a/Pico/carthw/svp/ssp16.h +++ b/Pico/carthw/svp/ssp16.h @@ -47,11 +47,14 @@ typedef struct #define SSP_PMC_HAVE_ADDR 0x0001 // address written to PMAC, waiting for mode #define SSP_PMC_SET 0x0002 // PMAC is set #define SSP_WAIT_PM0 0x2000 // bit1 in PM0 - #define SSP_WAIT_30FE06 0x4000 // ssp tight loops on 30FE08 to become non-zero - #define SSP_WAIT_30FE08 0x8000 // same for 30FE06 + #define SSP_WAIT_30FE06 0x4000 // ssp tight loops on 30FE06 to become non-zero + #define SSP_WAIT_30FE08 0x8000 // same for 30FE08 #define SSP_WAIT_MASK 0xe000 unsigned int emu_status; // 484 - unsigned int pad[30]; + unsigned int rom_ptr; // 488 recompiler convenience + unsigned int iram_ptr; // 48c + unsigned int dram_ptr; // 490 + unsigned int pad[27]; } ssp1601_t; diff --git a/Pico/carthw/svp/stub_arm.S b/Pico/carthw/svp/stub_arm.S index 0d58d6bb..b812b61b 100644 --- a/Pico/carthw/svp/stub_arm.S +++ b/Pico/carthw/svp/stub_arm.S @@ -36,7 +36,7 @@ flush_inval_caches: @ register map: @ r4: XXYY @ r5: A -@ r6: STACK and emu flags +@ r6: STACK and emu flags: sss0 * .uu. .lll NZCV (NZCV is PSR bits from ARM) @ r7: SSP context @ r8: r0-r2 (.210) @ r9: r4-r6 (.654) @@ -54,8 +54,16 @@ regfile_load: mov r3, r3, lsr #16 mov r3, r3, lsl #16 orr r4, r3, r4, lsr #16 @ XXYY - bic r6, r6, #0xff - orr r6, r6, r8, lsr #16 @ flags + STACK + + and r8, r8, #0x0f0000 + mov r8, r8, lsl #13 @ sss0 * + and r9, r6, #0x670000 + tst r6, #0x80000000 + orrne r8, r8, #0x8 + tst r6, #0x20000000 + orrne r8, r8, #0x4 @ sss0 * NZ.. + orr r6, r8, r9, lsr #12 @ sss0 * .uu. .lll .... + ldr r8, [r7, #0x440] @ r0-r2 ldr r9, [r7, #0x444] @ r4-r6 ldr r10,[r7, #(0x400+7*4)] @ P @@ -66,9 +74,16 @@ regfile_store: str r10,[r7, #(0x400+7*4)] @ P str r8, [r7, #0x440] @ r0-r2 str r9, [r7, #0x444] @ r4-r6 - mov r9, r6, lsl #16 + + mov r9, r6, lsr #13 and r9, r9, #(7<<16) @ STACK - bic r6, r6, #0xff @ ST + mov r3, r6, lsl #28 + msr cpsr_flg, r3 @ to to ARM PSR + and r6, r6, #0x670 + mov r6, r6, lsl #12 + orrmi r6, r6, #0x80000000 @ N + orreq r6, r6, #0x20000000 @ Z + mov r3, r4, lsl #16 @ Y mov r2, r4, lsr #16 mov r2, r2, lsl #16 @ X