From a2b8c5a54568093b247ced39f0754cbb30324830 Mon Sep 17 00:00:00 2001 From: notaz Date: Thu, 21 Jan 2010 22:11:54 +0000 Subject: [PATCH] 32x: drc: new smc handling, some bugfixes + refactoring git-svn-id: file:///home/notaz/opt/svn/PicoDrive@864 be3aeb3a-fb24-0410-a615-afba39da0efa --- cpu/debug_net.c | 31 ++- cpu/drc/cmn.h | 2 +- cpu/drc/emit_arm.c | 24 +- cpu/drc/emit_x86.c | 61 ++++- cpu/sh2/compiler.c | 488 +++++++++++++++++++++------------------- pico/32x/draw.c | 2 +- pico/cart.c | 2 + pico/draw.c | 2 +- platform/gp2x/version.h | 2 +- 9 files changed, 350 insertions(+), 264 deletions(-) diff --git a/cpu/debug_net.c b/cpu/debug_net.c index 0305a1a..5921354 100644 --- a/cpu/debug_net.c +++ b/cpu/debug_net.c @@ -23,8 +23,12 @@ int main(int argc, char *argv[]) struct sockaddr_in6 sa; packet_t packet1, packet2; int i, ret, cnt, cpuid; + int check_len_override = 0; socklen_t sal; + if (argv[1] != NULL) + check_len_override = atoi(argv[1]); + memset(&hints, 0, sizeof(hints)); hints.ai_flags = AI_CANONNAME; hints.ai_family = PF_UNSPEC; @@ -78,6 +82,7 @@ int main(int argc, char *argv[]) for (cnt = 0; ; cnt++) { + int len; #define tmp_size (4+4 + 24*4 + 2*4) ret = recv(sock1, &packet1, tmp_size, MSG_WAITALL); if (ret != tmp_size) { @@ -97,12 +102,18 @@ int main(int argc, char *argv[]) } cpuid = packet1.header.cpuid; - if (memcmp(&packet1, &packet2, sizeof(packet1.header) + packet1.header.len) == 0) { + len = sizeof(packet1.header) + packet1.header.len; + if (check_len_override > 0) + len = check_len_override; + + if (memcmp(&packet1, &packet2, len) == 0) { pc_trace[cpuid][pc_trace_p[cpuid]++ & 3] = packet1.regs[0]; continue; } - if (*(int *)&packet1.header != *(int *)&packet2.header) + if (packet1.header.cpuid != packet2.header.cpuid) + printf("%d: CPU %d %d\n", packet1.header.cpuid & 0xff, packet2.header.cpuid & 0xff); + else if (*(int *)&packet1.header != *(int *)&packet2.header) printf("%d: header\n", cnt); // check regs (and stuff) @@ -113,10 +124,18 @@ int main(int argc, char *argv[]) break; } - printf("--\nCPU %d, trace:", cpuid); - for (i = 0; i < 4; i++) - printf(" %08x", pc_trace[cpuid][pc_trace_p[cpuid]++ & 3]); - printf(" %08x\n", packet1.regs[0]); + printf("--\nCPU %d\n", cpuid); + for (cpuid = 0; cpuid < 2; cpuid++) { + printf("trace%d: ", cpuid); + for (i = 0; i < 4; i++) + printf(" %08x", pc_trace[cpuid][pc_trace_p[cpuid]++ & 3]); + + if (packet1.header.cpuid == cpuid) + printf(" %08x", packet1.regs[0]); + else if (packet2.header.cpuid == cpuid) + printf(" %08x", packet2.regs[0]); + printf("\n"); + } for (i = 0; i < 24+1; i++) printf("%3s: %08x %08x\n", regnames[i], packet1.regs[i], packet2.regs[i]); diff --git a/cpu/drc/cmn.h b/cpu/drc/cmn.h index d5ed9ba..9ddc12b 100644 --- a/cpu/drc/cmn.h +++ b/cpu/drc/cmn.h @@ -2,7 +2,7 @@ typedef unsigned char u8; typedef unsigned short u16; typedef unsigned int u32; -#define DRC_TCACHE_SIZE (512*1024) +#define DRC_TCACHE_SIZE (2*1024*1024) extern u8 tcache[DRC_TCACHE_SIZE]; diff --git a/cpu/drc/emit_arm.c b/cpu/drc/emit_arm.c index d12db86..06483ea 100644 --- a/cpu/drc/emit_arm.c +++ b/cpu/drc/emit_arm.c @@ -332,8 +332,8 @@ static int emith_xbranch(int cond, void *target, int is_call) tcache_ptr += sizeof(u32) #define JMP_EMIT(cond, ptr) { \ - int val = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \ - EOP_C_B_PTR(ptr, cond, 0, val & 0xffffff); \ + u32 val_ = (u32 *)tcache_ptr - (u32 *)(ptr) - 2; \ + EOP_C_B_PTR(ptr, cond, 0, val_ & 0xffffff); \ } #define EMITH_JMP_START(cond) { \ @@ -630,9 +630,6 @@ static int emith_xbranch(int cond, void *target, int is_call) EOP_MOV_REG_ASR(d,d,32 - (bits)); \ } -#define host_arg2reg(rd, arg) \ - rd = arg - // upto 4 args #define emith_pass_arg_r(arg, reg) \ EOP_MOV_REG_SIMPLE(arg, reg) @@ -658,6 +655,11 @@ static int emith_xbranch(int cond, void *target, int is_call) *ptr_ = (*ptr_ & 0xff000000) | (val_ & 0x00ffffff); \ } while (0) +#define emith_jump_at(ptr, target) { \ + u32 val_ = (u32 *)(target) - (u32 *)(ptr) - 2; \ + EOP_C_B_PTR(ptr, A_COND_AL, 0, val_ & 0xffffff); \ +} + #define emith_jump_reg_c(cond, r) \ EOP_C_BX(cond, r) @@ -690,6 +692,18 @@ static int emith_xbranch(int cond, void *target, int is_call) #define emith_ret_to_ctx(offs) \ emith_ctx_write(14, offs) +#define emith_push_ret() \ + EOP_STMFD_SP(A_R14M) + +#define emith_pop_and_ret() \ + EOP_LDMFD_SP(A_R15M) + +#define host_instructions_updated(base, end) \ + cache_flush_d_inval_i(base, end) + +#define host_arg2reg(rd, arg) \ + rd = arg + /* SH2 drc specific */ #define emith_sh2_drc_entry() \ EOP_STMFD_SP(A_R4M|A_R5M|A_R6M|A_R7M|A_R8M|A_R9M|A_R10M|A_R11M|A_R14M) diff --git a/cpu/drc/emit_x86.c b/cpu/drc/emit_x86.c index 6c9a414..2257359 100644 --- a/cpu/drc/emit_x86.c +++ b/cpu/drc/emit_x86.c @@ -1,6 +1,6 @@ /* * note: - * temp registers must be eax-edx due to use of SETcc. + * temp registers must be eax-edx due to use of SETcc and r/w 8/16. * note about silly things like emith_eor_r_r_r: * these are here because the compiler was designed * for ARM as it's primary target. @@ -416,6 +416,30 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; } \ } while (0) +#define is_abcdx(r) (xAX <= (r) && (r) <= xDX) + +#define emith_read_op_8_16(op, r, rs, offs) do { \ + int r_ = r; \ + if (!is_abcdx(r)) \ + r_ = rcache_get_tmp(); \ + emith_deref_op(op, r_, rs, offs); \ + if ((r) != r_) { \ + emith_move_r_r(r, r_); \ + rcache_free_tmp(r_); \ + } \ +} while (0) + +#define emith_write_op_8_16(op, r, rs, offs) do { \ + int r_ = r; \ + if (!is_abcdx(r)) { \ + r_ = rcache_get_tmp(); \ + emith_move_r_r(r_, r); \ + } \ + emith_deref_op(op, r_, rs, offs); \ + if ((r) != r_) \ + rcache_free_tmp(r_); \ +} while (0) + #define emith_read_r_r_offs(r, rs, offs) \ emith_deref_op(0x8b, r, rs, offs) @@ -423,19 +447,19 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_deref_op(0x89, r, rs, offs) #define emith_read8_r_r_offs(r, rs, offs) \ - emith_deref_op(0x8a, r, rs, offs) + emith_read_op_8_16(0x8a, r, rs, offs) #define emith_write8_r_r_offs(r, rs, offs) \ - emith_deref_op(0x88, r, rs, offs) + emith_write_op_8_16(0x88, r, rs, offs) #define emith_read16_r_r_offs(r, rs, offs) { \ EMIT(0x66, u8); /* operand override */ \ - emith_read_r_r_offs(r, rs, offs); \ + emith_read_op_8_16(0x8b, r, rs, offs); \ } #define emith_write16_r_r_offs(r, rs, offs) { \ EMIT(0x66, u8); \ - emith_write16_r_r_offs(r, rs, offs) \ + emith_read_op_8_16(0x89, r, rs, offs); \ } #define emith_ctx_read(r, offs) \ @@ -487,6 +511,12 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT_PTR((u8 *)(ptr) + offs_, disp_ - offs_, u32); \ } while (0) +#define emith_jump_at(ptr, target) { \ + u32 disp_ = (u32)(target) - ((u32)(ptr) + 5); \ + EMIT_PTR(ptr, 0xe9, u8); \ + EMIT_PTR((u8 *)(ptr) + 1, disp_, u32); \ +} + #define emith_call(ptr) { \ u32 disp = (u32)(ptr) - ((u32)tcache_ptr + 5); \ EMIT_OP(0xe8); \ @@ -515,6 +545,11 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; EMIT(offs, u32); \ } +#define emith_push_ret() + +#define emith_pop_and_ret() \ + emith_ret() + #define EMITH_JMP_START(cond) { \ u8 *cond_ptr; \ JMP8_POS(cond_ptr) @@ -544,13 +579,6 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; #define EMITH_SJMP3_MID EMITH_JMP3_MID #define EMITH_SJMP3_END EMITH_JMP3_END -#define host_arg2reg(rd, arg) \ - switch (arg) { \ - case 0: rd = xAX; break; \ - case 1: rd = xDX; break; \ - case 2: rd = xCX; break; \ - } - #define emith_pass_arg_r(arg, reg) { \ int rd = 7; \ host_arg2reg(rd, arg); \ @@ -563,6 +591,15 @@ enum { xAX = 0, xCX, xDX, xBX, xSP, xBP, xSI, xDI }; emith_move_r_imm(rd, imm); \ } +#define host_instructions_updated(base, end) + +#define host_arg2reg(rd, arg) \ + switch (arg) { \ + case 0: rd = xAX; break; \ + case 1: rd = xDX; break; \ + case 2: rd = xCX; break; \ + } + /* SH2 drc specific */ #define emith_sh2_drc_entry() { \ emith_push(xBX); \ diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index eea9a60..f10a70c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -104,7 +104,6 @@ static u8 *tcache_ptr; typedef struct block_desc_ { u32 addr; // SH2 PC address - u32 end_addr; // TODO rm? void *tcache_ptr; // translated block for above PC struct block_desc_ *next; // next block with the same PC hash #if (DRC_DEBUG & 1) @@ -114,7 +113,7 @@ typedef struct block_desc_ { typedef struct block_link_ { u32 target_pc; - void *jump; + void *jump; // insn address // struct block_link_ *next; } block_link; @@ -231,13 +230,108 @@ static int REGPARM(3) (*sh2_drc_write32)(u32 a, u32 d, SH2 *sh2); extern void REGPARM(2) sh2_do_op(SH2 *sh2, int opcode); -static void flush_tcache(int tcid) +// address space stuff +static void *dr_get_pc_base(u32 pc, int is_slave) +{ + void *ret = NULL; + u32 mask = 0; + + if ((pc & ~0x7ff) == 0) { + // BIOS + ret = is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m; + mask = 0x7ff; + } + else if ((pc & 0xfffff000) == 0xc0000000) { + // data array + ret = Pico32xMem->data_array[is_slave]; + mask = 0xfff; + } + else if ((pc & 0xc6000000) == 0x06000000) { + // SDRAM + ret = Pico32xMem->sdram; + mask = 0x03ffff; + } + else if ((pc & 0xc6000000) == 0x02000000) { + // ROM + ret = Pico.rom; + mask = 0x3fffff; + } + + if (ret == NULL) + return (void *)-1; // NULL is valid value + + return (char *)ret - (pc & ~mask); +} + +static int dr_ctx_get_mem_ptr(u32 a, u32 *mask) +{ + int poffs = -1; + + if ((a & ~0x7ff) == 0) { + // BIOS + poffs = offsetof(SH2, p_bios); + *mask = 0x7ff; + } + else if ((a & 0xfffff000) == 0xc0000000) { + // data array + poffs = offsetof(SH2, p_da); + *mask = 0xfff; + } + else if ((a & 0xc6000000) == 0x06000000) { + // SDRAM + poffs = offsetof(SH2, p_sdram); + *mask = 0x03ffff; + } + else if ((a & 0xc6000000) == 0x02000000) { + // ROM + poffs = offsetof(SH2, p_rom); + *mask = 0x3fffff; + } + + return poffs; +} + +static block_desc *dr_get_bd(u32 pc, int is_slave, int *tcache_id) +{ + *tcache_id = 0; + + // we have full block id tables for data_array and RAM + // BIOS goes to data_array table too + if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) { + int blkid = Pico32xMem->drcblk_da[is_slave][(pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT]; + *tcache_id = 1 + is_slave; + if (blkid & 1) + return &block_tables[*tcache_id][blkid >> 1]; + } + // RAM + else if ((pc & 0xc6000000) == 0x06000000) { + int blkid = Pico32xMem->drcblk_ram[(pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT]; + if (blkid & 1) + return &block_tables[0][blkid >> 1]; + } + // ROM + else if ((pc & 0xc6000000) == 0x02000000) { + block_desc *bd = HASH_FUNC(hash_table, pc); + + for (; bd != NULL; bd = bd->next) + if (bd->addr == pc) + return bd; + } + + return NULL; +} + +// --------------------------------------------------------------- + +// block management +static void REGPARM(1) flush_tcache(int tcid) { dbg(1, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tcache_ptrs[tcid] - tcache_bases[tcid], tcache_sizes[tcid], block_counts[tcid], block_max_counts[tcid]); block_counts[tcid] = 0; + block_link_counts[tcid] = 0; tcache_ptrs[tcid] = tcache_bases[tcid]; if (tcid == 0) { // ROM, RAM memset(hash_table, 0, sizeof(hash_table[0]) * MAX_HASH_ENTRIES); @@ -270,28 +364,27 @@ static int dr_add_block_link(u32 target_pc, void *jump, int tcache_id) } #endif -static void *dr_find_block(block_desc *tab, u32 addr) -{ - for (tab = tab->next; tab != NULL; tab = tab->next) - if (tab->addr == addr) - break; - - if (tab != NULL) - return tab->tcache_ptr; - - printf("block miss for %08x\n", addr); - return NULL; -} - -static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id) +static block_desc *dr_add_block(u32 addr, int is_slave, int *blk_id) { - int *bcount = &block_counts[tcache_id]; block_desc *bd; + int tcache_id; + int *bcount; + + bd = dr_get_bd(addr, is_slave, &tcache_id); + if (bd != NULL) { + dbg(1, "block override for %08x", addr); + bd->tcache_ptr = tcache_ptr; + *blk_id = bd - block_tables[tcache_id]; + return bd; + } + bcount = &block_counts[tcache_id]; if (*bcount >= block_max_counts[tcache_id]) { printf("bd overflow for tcache %d\n", tcache_id); return NULL; } + if (*bcount == 0) + (*bcount)++; // not using descriptor 0 bd = &block_tables[tcache_id][*bcount]; bd->addr = addr; @@ -313,6 +406,62 @@ static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id) return bd; } +static void REGPARM(3) *dr_lookup_block(u32 pc, int is_slave, int *tcache_id) +{ + block_desc *bd = NULL; + void *block = NULL; + + bd = dr_get_bd(pc, is_slave, tcache_id); + if (bd != NULL) + block = bd->tcache_ptr; + +#if (DRC_DEBUG & 1) + if (bd != NULL) + bd->refcount++; +#endif + return block; +} + +static void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) +{ +#if LINK_BRANCHES + int target_tcache_id; + void *target; + int ret; + + target = dr_lookup_block(pc, sh2->is_slave, &target_tcache_id); + if (target_tcache_id == tcache_id) { + // allow linking blocks only from local cache + ret = dr_add_block_link(pc, tcache_ptr, tcache_id); + if (ret < 0) + return NULL; + } + if (target == NULL || target_tcache_id != tcache_id) + target = sh2_drc_dispatcher; + + return target; +#else + return sh2_drc_dispatcher; +#endif +} + +static void dr_link_blocks(void *target, u32 pc, int tcache_id) +{ +#if LINK_BRANCHES + block_link *bl = block_links[tcache_id]; + int cnt = block_link_counts[tcache_id]; + int i; + + for (i = 0; i < cnt; i++) { + if (bl[i].target_pc == pc) { + dbg(1, "- link from %p", bl[i].jump); + emith_jump_patch(bl[i].jump, target); + // XXX: sync ARM caches (old jump should be fine)? + } + } +#endif +} + #define ADD_TO_ARRAY(array, count, item, failcode) \ array[count++] = item; \ if (count >= ARRAY_SIZE(array)) { \ @@ -320,7 +469,7 @@ static block_desc *dr_add_block(u32 addr, int tcache_id, int *blk_id) failcode; \ } -int find_in_array(u32 *array, size_t size, u32 what) +static int find_in_array(u32 *array, size_t size, u32 what) { size_t i; for (i = 0; i < size; i++) @@ -332,6 +481,7 @@ int find_in_array(u32 *array, size_t size, u32 what) // --------------------------------------------------------------- +// register cache / constant propagation stuff typedef enum { RC_GR_READ, RC_GR_WRITE, @@ -346,9 +496,9 @@ static u32 dr_gcregs[24]; static u32 dr_gcregs_mask; static u32 dr_gcregs_dirty; +#if PROPAGATE_CONSTANTS static void gconst_new(sh2_reg_e r, u32 val) { -#if PROPAGATE_CONSTANTS int i; dr_gcregs_mask |= 1 << r; @@ -363,8 +513,8 @@ static void gconst_new(sh2_reg_e r, u32 val) reg_temp[i].flags = 0; } } -#endif } +#endif static int gconst_get(sh2_reg_e r, u32 *val) { @@ -423,7 +573,6 @@ static void gconst_invalidate(void) dr_gcregs_mask = dr_gcregs_dirty = 0; } -// register chache static u16 rcache_counter; static temp_reg_t *rcache_evict(void) @@ -701,70 +850,17 @@ static void rcache_flush(void) // --------------------------------------------------------------- -// address space stuff -static void *dr_get_pc_base(u32 pc, int is_slave) -{ - void *ret = NULL; - u32 mask = 0; - - if ((pc & ~0x7ff) == 0) { - // BIOS - ret = is_slave ? Pico32xMem->sh2_rom_s : Pico32xMem->sh2_rom_m; - mask = 0x7ff; - } - else if ((pc & 0xfffff000) == 0xc0000000) { - // data array - ret = Pico32xMem->data_array[is_slave]; - mask = 0xfff; - } - else if ((pc & 0xc6000000) == 0x06000000) { - // SDRAM - ret = Pico32xMem->sdram; - mask = 0x03ffff; - } - else if ((pc & 0xc6000000) == 0x02000000) { - // ROM - ret = Pico.rom; - mask = 0x3fffff; - } - - if (ret == NULL) - return (void *)-1; // NULL is valid value - - return (char *)ret - (pc & ~mask); -} - static int emit_get_rbase_and_offs(u32 a, u32 *offs) { - int poffs = -1; u32 mask = 0; + int poffs; int hr; - if ((a & ~0x7ff) == 0) { - // BIOS - poffs = offsetof(SH2, p_bios); - mask = 0x7ff; - } - else if ((a & 0xfffff000) == 0xc0000000) { - // data array - poffs = offsetof(SH2, p_da); - mask = 0xfff; - } - else if ((a & 0xc6000000) == 0x06000000) { - // SDRAM - poffs = offsetof(SH2, p_sdram); - mask = 0x03ffff; - } - else if ((a & 0xc6000000) == 0x02000000) { - // ROM - poffs = offsetof(SH2, p_rom); - mask = 0x3fffff; - } - + poffs = dr_ctx_get_mem_ptr(a, &mask); if (poffs == -1) return -1; - // XXX: could use related reg + // XXX: could use some related reg hr = rcache_get_tmp(); emith_ctx_read(hr, poffs); emith_add_r_imm(hr, a & mask & ~0xff); @@ -772,49 +868,6 @@ static int emit_get_rbase_and_offs(u32 a, u32 *offs) return hr; } -static void REGPARM(3) *lookup_block(u32 pc, int is_slave, int *tcache_id) -{ - block_desc *bd = NULL; - void *block = NULL; - *tcache_id = 0; - - // we have full block id tables for data_array and RAM - // BIOS goes to data_array table too - if ((pc & 0xe0000000) == 0xc0000000 || (pc & ~0xfff) == 0) { - int blkid = Pico32xMem->drcblk_da[is_slave][(pc & 0xfff) >> SH2_DRCBLK_DA_SHIFT]; - *tcache_id = 1 + is_slave; - if (blkid & 1) { - bd = &block_tables[*tcache_id][blkid >> 1]; - block = bd->tcache_ptr; - } - } - // RAM - else if ((pc & 0xc6000000) == 0x06000000) { - int blkid = Pico32xMem->drcblk_ram[(pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT]; - if (blkid & 1) { - bd = &block_tables[0][blkid >> 1]; - block = bd->tcache_ptr; - } - } - // ROM - else if ((pc & 0xc6000000) == 0x02000000) { - bd = HASH_FUNC(hash_table, pc); - - if (bd != NULL) { - if (bd->addr == pc) - block = bd->tcache_ptr; - else - block = dr_find_block(bd, pc); - } - } - -#if (DRC_DEBUG & 1) - if (bd != NULL) - bd->refcount++; -#endif - return block; -} - static void emit_move_r_imm32(sh2_reg_e dst, u32 imm) { #if PROPAGATE_CONSTANTS @@ -1073,46 +1126,6 @@ static void emit_block_entry(void) EMITH_SJMP_END(DCOND_EQ); } -void dr_link_blocks(void *target, u32 pc, int tcache_id) -{ -#if LINK_BRANCHES - block_link *bl = block_links[tcache_id]; - int cnt = block_link_counts[tcache_id]; - int i; - - for (i = 0; i < cnt; i++) { - if (bl[i].target_pc == pc) { - dbg(1, "- link from %p", bl[i].jump); - emith_jump_patch(bl[i].jump, target); - // XXX: sync ARM caches (old jump should be fine)? - } - } -#endif -} - -void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) -{ -#if LINK_BRANCHES - int target_tcache_id; - void *target; - int ret; - - target = lookup_block(pc, sh2->is_slave, &target_tcache_id); - if (target_tcache_id == tcache_id) { - // allow linking blocks only from local cache - ret = dr_add_block_link(pc, tcache_ptr, tcache_id); - if (ret < 0) - return NULL; - } - if (target == NULL || target_tcache_id != tcache_id) - target = sh2_drc_dispatcher; - - return target; -#else - return sh2_drc_dispatcher; -#endif -} - #define DELAYED_OP \ drcf.delayed_op = 2 @@ -1163,8 +1176,9 @@ void *dr_prepare_ext_branch(u32 pc, SH2 *sh2, int tcache_id) static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) { // XXX: maybe use structs instead? - void *branch_target_ptr[MAX_LOCAL_BRANCHES]; u32 branch_target_pc[MAX_LOCAL_BRANCHES]; + void *branch_target_ptr[MAX_LOCAL_BRANCHES]; + int branch_target_blkid[MAX_LOCAL_BRANCHES]; int branch_target_count = 0; void *branch_patch_ptr[MAX_LOCAL_BRANCHES]; u32 branch_patch_pc[MAX_LOCAL_BRANCHES]; @@ -1202,7 +1216,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } tcache_ptr = tcache_ptrs[tcache_id]; - this_block = dr_add_block(base_pc, tcache_id, &blkid_main); + this_block = dr_add_block(base_pc, sh2->is_slave, &blkid_main); if (this_block == NULL) return NULL; @@ -1267,6 +1281,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) } branch_target_count = tmp; memset(branch_target_ptr, 0, sizeof(branch_target_ptr[0]) * branch_target_count); + memset(branch_target_blkid, 0, sizeof(branch_target_blkid[0]) * branch_target_count); // ------------------------------------------------- // 2nd pass: actual compilation @@ -1284,12 +1299,10 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) i = find_in_array(branch_target_pc, branch_target_count, pc); if (i >= 0) { - if (pc != sh2->pc) + if (pc != base_pc) { /* make "subblock" - just a mid-block entry */ block_desc *subblock; - u16 *drcblk; - int blkid; sr = rcache_get_reg(SHR_SR, RC_GR_RMW); FLUSH_CYCLES(sr); @@ -1300,23 +1313,12 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) rcache_flush(); do_host_disasm(tcache_id); - subblock = dr_add_block(pc, tcache_id, &blkid); + subblock = dr_add_block(pc, sh2->is_slave, &branch_target_blkid[i]); if (subblock == NULL) return NULL; - subblock->end_addr = pc; - - if (tcache_id != 0) { // data array, BIOS - drcblk = Pico32xMem->drcblk_da[sh2->is_slave]; - drcblk += (pc & 0x00fff) >> SH2_DRCBLK_DA_SHIFT; - *drcblk = (blkid << 1) | 1; - } else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM - drcblk = Pico32xMem->drcblk_ram; - drcblk += (pc & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT; - *drcblk = (blkid << 1) | 1; - } dbg(1, "-- %csh2 subblock #%d,%d %08x -> %p", sh2->is_slave ? 's' : 'm', - tcache_id, blkid, pc, tcache_ptr); + tcache_id, branch_target_blkid[i], pc, tcache_ptr); // since we made a block entry, link any other blocks that jump to current pc dr_link_blocks(tcache_ptr, pc, tcache_id); @@ -1848,7 +1850,7 @@ static void REGPARM(2) *sh2_translate(SH2 *sh2, int tcache_id) // XXX: limit burned cycles emit_move_r_imm32(GET_Rn(), 0); emith_or_r_imm(sr, T); - cycles += tmp * 4; + cycles += tmp * 4 + 1; // +1 syncs with noconst version, not sure why skip_op = 1; } else @@ -2572,39 +2574,44 @@ end_op: emith_jump_patch(branch_patch_ptr[i], target); } - this_block->end_addr = pc; - if (last_inlined_literal > pc) - this_block->end_addr = last_inlined_literal + 4; + end_pc = pc; + if (last_inlined_literal > end_pc) + end_pc = last_inlined_literal + 4; // mark memory blocks as containing compiled code - if (tcache_id != 0) { - // data array, BIOS - u16 *drcblk = Pico32xMem->drcblk_da[sh2->is_slave]; - tmp = (this_block->addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT; - tmp2 = (this_block->end_addr & 0xfff) >> SH2_DRCBLK_DA_SHIFT; - drcblk[tmp] = (blkid_main << 1) | 1; - for (++tmp; tmp < tmp2; tmp++) { - if (drcblk[tmp]) - continue; // dont overwrite overlay block(s) - drcblk[tmp] = blkid_main << 1; + // override any overlay blocks as they become unreachable anyway + if (tcache_id != 0 || (this_block->addr & 0xc7fc0000) == 0x06000000) + { + u16 *drc_ram_blk = NULL; + u32 mask = 0, shift = 0; + + if (tcache_id != 0) { + // data array, BIOS + drc_ram_blk = Pico32xMem->drcblk_da[sh2->is_slave]; + shift = SH2_DRCBLK_DA_SHIFT; + mask = 0xfff; } - } - else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { // DRAM - tmp = (this_block->addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT; - tmp2 = (this_block->end_addr & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT; - Pico32xMem->drcblk_ram[tmp] = (blkid_main << 1) | 1; - for (++tmp; tmp < tmp2; tmp++) { - if (Pico32xMem->drcblk_ram[tmp]) - continue; - Pico32xMem->drcblk_ram[tmp] = blkid_main << 1; + else if ((this_block->addr & 0xc7fc0000) == 0x06000000) { + // SDRAM + drc_ram_blk = Pico32xMem->drcblk_ram; + shift = SH2_DRCBLK_RAM_SHIFT; + mask = 0x3ffff; } + + drc_ram_blk[(base_pc >> shift) & mask] = (blkid_main << 1) | 1; + for (pc = base_pc + 2; pc < end_pc; pc += 2) + drc_ram_blk[(pc >> shift) & mask] = blkid_main << 1; + + // mark subblocks too + for (i = 0; i < branch_target_count; i++) + if (branch_target_blkid[i] != 0) + drc_ram_blk[(branch_target_pc[i] >> shift) & mask] = + branch_target_blkid[i] << 1; } tcache_ptrs[tcache_id] = tcache_ptr; -#ifdef ARM - cache_flush_d_inval_i(block_entry, tcache_ptr); -#endif + host_instructions_updated(block_entry, tcache_ptr); do_host_disasm(tcache_id); dbg(1, " block #%d,%d tcache %d/%d, insns %d -> %d %.3f", @@ -2656,7 +2663,7 @@ static void sh2_generate_utils(void) emith_ctx_read(arg0, SHR_PC * 4); emith_ctx_read(arg1, offsetof(SH2, is_slave)); emith_add_r_r_imm(arg2, CONTEXT_REG, offsetof(SH2, drc_tmp)); - emith_call(lookup_block); + emith_call(dr_lookup_block); emit_block_entry(); // lookup failed, call sh2_translate() emith_move_r_r(arg0, CONTEXT_REG); @@ -2781,7 +2788,7 @@ static void sh2_generate_utils(void) // debug #define MAKE_READ_WRAPPER(func) { \ void *tmp = (void *)tcache_ptr; \ - emith_ret_to_ctx(offsetof(SH2, drc_tmp)); \ + emith_push_ret(); \ emith_call(func); \ emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[0])); \ emith_addf_r_r(arg2, arg0); \ @@ -2789,7 +2796,7 @@ static void sh2_generate_utils(void) emith_ctx_read(arg2, offsetof(SH2, pdb_io_csum[1])); \ emith_adc_r_imm(arg2, 0x01000000); \ emith_ctx_write(arg2, offsetof(SH2, pdb_io_csum[1])); \ - emith_jump_ctx(offsetof(SH2, drc_tmp)); \ + emith_pop_and_ret(); \ func = tmp; \ } #define MAKE_WRITE_WRAPPER(func) { \ @@ -2836,48 +2843,57 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block(u16 *drcblk, u16 *p, block_desc *btab, u32 a) +static void *sh2_smc_rm_block_entry(block_desc *bd, int tcache_id) { - u16 id = *p >> 1; - block_desc *bd = btab + id; - - // FIXME: skip subblocks; do both directions - // FIXME: collect all branches - dbg(1, " killing block %08x", bd->addr); - bd->addr = bd->end_addr = 0; + // XXX: kill links somehow? + dbg(1, " killing entry %08x, blkid %d", bd->addr, bd - block_tables[tcache_id]); + bd->addr = 0; + // since we never reuse space of dead blocks, + // insert jump to dispatcher for blocks that are linked to this point + emith_jump_at(bd->tcache_ptr, sh2_drc_dispatcher); + return bd->tcache_ptr; +} - while (p > drcblk && (p[-1] >> 1) == id) +static void sh2_smc_rm_block(u32 a, u16 *drc_ram_blk, int tcache_id, u32 shift, u32 mask) +{ + //block_link *bl = block_links[tcache_id]; + //int bl_count = block_link_counts[tcache_id]; + block_desc *btab = block_tables[tcache_id]; + u16 *p = drc_ram_blk + ((a & mask) >> shift); + u16 *pe = drc_ram_blk + (mask >> shift); + void *tcache_min, *tcache_max; + int main_id, prev_id = 0; + + while (p > drc_ram_blk && (*p & 1) == 0) p--; - // check for possible overlay block - if (p > 0 && p[-1] != 0) { - bd = btab + (p[-1] >> 1); - if (bd->addr <= a && a < bd->end_addr) - sh2_smc_rm_block(drcblk, p - 1, btab, a); + if (!(*p & 1)) + printf("smc rm: missing block start for %08x?\n", a); + main_id = *p >> 1; + tcache_min = tcache_max = sh2_smc_rm_block_entry(&btab[main_id], tcache_id); + + for (*p++ = 0; p <= pe && *p != 0 && !(*p & 1); p++) { + int id = *p >> 1; + if (id != main_id && id != prev_id) + tcache_max = sh2_smc_rm_block_entry(&btab[*p >> 1], tcache_id); + *p = 0; + prev_id = id; } - do { - *p++ = 0; - } - while ((*p >> 1) == id); + host_instructions_updated(tcache_min, (void *)((char *)tcache_max + 4)); } void sh2_drc_wcheck_ram(unsigned int a, int val, int cpuid) { - u16 *drcblk = Pico32xMem->drcblk_ram; - u16 *p = drcblk + ((a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT); - dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(drcblk, p, block_tables[0], a); + sh2_smc_rm_block(a, Pico32xMem->drcblk_ram, 0, SH2_DRCBLK_RAM_SHIFT, 0x3ffff); } void sh2_drc_wcheck_da(unsigned int a, int val, int cpuid) { - u16 *drcblk = Pico32xMem->drcblk_da[cpuid]; - u16 *p = drcblk + ((a & 0xfff) >> SH2_DRCBLK_DA_SHIFT); - dbg(1, "%csh2 smc check @%08x", cpuid ? 's' : 'm', a); - sh2_smc_rm_block(drcblk, p, block_tables[1 + cpuid], a); + sh2_smc_rm_block(a, Pico32xMem->drcblk_da[cpuid], + 1 + cpuid, SH2_DRCBLK_DA_SHIFT, 0xfff); } void sh2_execute(SH2 *sh2c, int cycles) @@ -2979,9 +2995,7 @@ int sh2_drc_init(SH2 *sh2) drc_cmn_init(); tcache_ptr = tcache; sh2_generate_utils(); -#ifdef ARM - cache_flush_d_inval_i(tcache, tcache_ptr); -#endif + host_instructions_updated(tcache, tcache_ptr); tcache_bases[0] = tcache_ptrs[0] = tcache_ptr; for (i = 1; i < ARRAY_SIZE(tcache_bases); i++) diff --git a/pico/32x/draw.c b/pico/32x/draw.c index 48908d8..7b431d2 100644 --- a/pico/32x/draw.c +++ b/pico/32x/draw.c @@ -308,7 +308,7 @@ void PicoDraw32xSetFrameMode(int is_on, int only_32x) if (is_on) { // use the same layout as alt renderer - PicoDrawSetInternalBuf(PicoDraw2FB + 328*8, 328); + PicoDrawSetInternalBuf(PicoDraw2FB, 328); Pico32xDrawMode = only_32x ? PDM32X_32X_ONLY : PDM32X_BOTH; } else { PicoDrawSetInternalBuf(NULL, 0); diff --git a/pico/cart.c b/pico/cart.c index 8d30a93..42d0817 100644 --- a/pico/cart.c +++ b/pico/cart.c @@ -9,6 +9,7 @@ #include "pico_int.h" #include "../zlib/zlib.h" +#include "../cpu/debug.h" #include "../unzip/unzip.h" #include "../unzip/unzip_stream.h" @@ -555,6 +556,7 @@ int PicoCartInsert(unsigned char *rom, unsigned int romsize, const char *carthw_ PicoCartUnloadHook(); PicoCartUnloadHook = NULL; } + pdb_cleanup(); PicoAHW &= PAHW_MCD|PAHW_SMS; diff --git a/pico/draw.c b/pico/draw.c index 1781b9e..5094e5c 100644 --- a/pico/draw.c +++ b/pico/draw.c @@ -1421,7 +1421,7 @@ PICO_INTERNAL void PicoFrameStart(void) lines = 240; } - HighCol = HighColBase; + HighCol = HighColBase + offs * HighColIncrement; DrawLineDest = (char *)DrawLineDestBase + offs * DrawLineDestIncrement; DrawScanline = 0; skip_next_line = 0; diff --git a/platform/gp2x/version.h b/platform/gp2x/version.h index 69cf800..1fddda5 100644 --- a/platform/gp2x/version.h +++ b/platform/gp2x/version.h @@ -1,2 +1,2 @@ -#define VERSION "1.70" +#define VERSION "1.70b2" -- 2.39.2