From 7e940f142e4f9840e76a86f9c0c30ad90bb2684f Mon Sep 17 00:00:00 2001 From: kub Date: Sat, 19 Oct 2019 08:53:28 +0200 Subject: [PATCH] 32x, finetuning --- cpu/sh2/compiler.c | 31 ++++++++++++++----------------- cpu/sh2/compiler.h | 4 ++-- pico/32x/memory.c | 41 ++++++++++++++++++++--------------------- pico/32x/memory_arm.S | 8 +++++--- 4 files changed, 41 insertions(+), 43 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index b2306cf2..e9173c4c 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -272,9 +272,9 @@ static void REGPARM(3) *sh2_drc_log_entry(void *block, SH2 *sh2, u32 sr) // and can be discarded early // XXX: need to tune sizes static const int tcache_sizes[TCACHE_BUFFERS] = { - DRC_TCACHE_SIZE * 14 / 16, // ROM (rarely used), DRAM - DRC_TCACHE_SIZE / 16, // BIOS, data array in master sh2 - DRC_TCACHE_SIZE / 16, // ... slave + DRC_TCACHE_SIZE * 30 / 32, // ROM (rarely used), DRAM + DRC_TCACHE_SIZE / 32, // BIOS, data array in master sh2 + DRC_TCACHE_SIZE / 32, // ... slave }; static u8 *tcache_bases[TCACHE_BUFFERS]; @@ -332,13 +332,13 @@ struct block_desc { struct block_entry entryp[MAX_BLOCK_ENTRIES]; }; -#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 16*256) +#define BLOCK_MAX_COUNT(tcid) ((tcid) ? 256 : 32*256) static struct block_desc *block_tables[TCACHE_BUFFERS]; static int block_counts[TCACHE_BUFFERS]; static int block_limit[TCACHE_BUFFERS]; // we have block_link_pool to avoid using mallocs -#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 1024 : 16*1024) +#define BLOCK_LINK_MAX_COUNT(tcid) ((tcid) ? 512 : 32*512) static struct block_link *block_link_pool[TCACHE_BUFFERS]; static int block_link_pool_counts[TCACHE_BUFFERS]; static struct block_link **unresolved_links[TCACHE_BUFFERS]; @@ -363,7 +363,7 @@ static struct block_list *inactive_blocks[TCACHE_BUFFERS]; // each array has len: sizeof(mem) / INVAL_PAGE_SIZE static struct block_list **inval_lookup[TCACHE_BUFFERS]; -#define HASH_TABLE_SIZE(tcid) ((tcid) ? 256 : 64*256) +#define HASH_TABLE_SIZE(tcid) ((tcid) ? 512 : 64*512) static struct block_entry **hash_tables[TCACHE_BUFFERS]; #define HASH_FUNC(hash_tab, addr, mask) \ @@ -5188,20 +5188,14 @@ static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) #endif } -void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2) +void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2) { - int off = ((u16) t ? 0 : 2); - int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); - - sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT); + sh2_smc_rm_blocks(a, len, 0, SH2_DRCBLK_RAM_SHIFT); } -void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2) +void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2) { - int off = ((u16) t ? 0 : 2); - int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); - - sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); + sh2_smc_rm_blocks(a, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -6403,6 +6397,9 @@ end: last_btarget = 0; op = 0; // delay/poll insns counter for (i = 0, pc = base_pc; i < i_end; i++, pc += 2) { + int null; + if ((op_flags[i] & OF_BTARGET) && dr_get_entry(pc, is_slave, &null)) + break; // branch target already compiled opd = &ops[i]; crc += FETCH_OP(pc); @@ -6483,7 +6480,7 @@ end: op ++; // condition 2 #endif } - end_pc = base_pc + i_end * 2; + end_pc = pc; // end_literals is used to decide to inline a literal or not // XXX: need better detection if this actually is used in write diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 94dff8c5..5f374c8c 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2); -void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2); +void sh2_drc_wcheck_ram(unsigned int a, unsigned len, SH2 *sh2); +void sh2_drc_wcheck_da(unsigned int a, unsigned len, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 39504416..44bc72d7 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -162,15 +162,13 @@ void NOINLINE p32x_sh2_poll_event(SH2 *sh2, u32 flags, u32 m68k_cycles) sh2->poll_addr = sh2->poll_cycles = sh2->poll_cnt = 0; } -static void sh2s_sync_on_read(SH2 *sh2) +static void sh2s_sync_on_read(SH2 *sh2, unsigned cycles) { - int cycles; if (sh2->poll_cnt != 0) return; - cycles = sh2_cycles_done(sh2); - if (cycles > 600) - p32x_sync_other_sh2(sh2, sh2->m68krcycles_done + C_SH2_TO_M68K(sh2, cycles)); + if (p32x_sh2_ready(sh2->other_sh2, cycles-250)) + p32x_sync_other_sh2(sh2, cycles); } // poll fifo, stores writes to potential addresses used for polling. @@ -271,8 +269,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory16(unsigned int a, u32 d, SH2 *sh2) DRC_SAVE_SR(sh2); // is this a synchronisation address? if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { - sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); // check poll fifo and sign-extend the result correctly d = (s16)sh2_poll_read(a, d, cycles, sh2); } @@ -291,8 +289,8 @@ u32 REGPARM(3) p32x_sh2_poll_memory32(unsigned int a, u32 d, SH2 *sh2) DRC_SAVE_SR(sh2); // is this a synchronisation address? if(p[(a & 0x3ffff) >> SH2_DRCBLK_RAM_SHIFT] & 0x80) { - sh2s_sync_on_read(sh2); cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); // check poll fifo and sign-extend the result correctly d = (sh2_poll_read(a, d >> 16, cycles, sh2) << 16) | ((u16)sh2_poll_read(a+2, d, cycles, sh2)); @@ -729,6 +727,7 @@ static void p32x_vdp_write16(u32 a, u32 d, SH2 *sh2) static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) { u16 *r = Pico32x.regs; + unsigned cycles; a &= 0x3e; switch (a/2) { @@ -737,8 +736,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) | Pico32x.sh2irq_mask[sh2->is_slave]; case 0x04/2: // H count (often as comm too) sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); - sh2s_sync_on_read(sh2); - return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], sh2_cycles_done_m68k(sh2), sh2); + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + return sh2_poll_read(a, Pico32x.sh2_regs[4 / 2], cycles, sh2); case 0x06/2: return (r[a / 2] & ~P32XS_FULL) | 0x4000; case 0x08/2: // DREQ src @@ -770,8 +770,9 @@ static u32 p32x_sh2reg_read16(u32 a, SH2 *sh2) case 0x2c/2: case 0x2e/2: sh2_poll_detect(a, sh2, SH2_STATE_CPOLL, 9); - sh2s_sync_on_read(sh2); - return sh2_poll_read(a, r[a / 2], sh2_cycles_done_m68k(sh2), sh2); + cycles = sh2_cycles_done_m68k(sh2); + sh2s_sync_on_read(sh2, cycles); + return sh2_poll_read(a, r[a / 2], cycles, sh2); case 0x30/2: // PWM case 0x32/2: case 0x34/2: @@ -825,7 +826,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 4); sh2_poll_write(a & ~1, d, cycles, sh2); } @@ -852,7 +853,7 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) REG8IN16(r, a) = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 1); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } @@ -945,7 +946,7 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) Pico32x.regs[a / 2] = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 1); sh2_poll_write(a, d, cycles, sh2); } @@ -1580,7 +1581,7 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) cycles = sh2_cycles_done_m68k(sh2); sh2_poll_write(a, d, cycles, sh2); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); - if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + if (p32x_sh2_ready(sh2->other_sh2, cycles+8)) sh2_end_run(sh2, 1); DRC_RESTORE_SR(sh2); } @@ -1588,27 +1589,25 @@ static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) { if (t & 0x80) sh2_sdram_poll(a, d, sh2); - if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2); + if (t & 0x7f) sh2_drc_wcheck_ram(a, 2, sh2); } void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) { - u32 m = 0x80 | 0x800000; - if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); - if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2); + if (t & ~0x800080) sh2_drc_wcheck_ram(a, 4, sh2); } #ifndef _ASM_32X_MEMORY_C static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) { - sh2_drc_wcheck_da(a, t, sh2); + sh2_drc_wcheck_da(a, 2, sh2); } static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) { - sh2_drc_wcheck_da(a, t, sh2); + sh2_drc_wcheck_da(a, 4, sh2); } #endif #endif diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index b3a94b62..40707fe7 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -139,12 +139,11 @@ sh2_write8_sdram: mov r3, r3, lsl #SH2_RAM_SHIFT strb r1, [ip, r3, lsr #SH2_RAM_SHIFT] #ifdef DRC_SH2 - ldr ip, [r2, #OFS_SH2_p_drcblk_ram] - ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] + ldr r1, [r2, #OFS_SH2_p_drcblk_ram] + ldrb r3, [r1, r3, lsr #SH2_RAM_SHIFT+1] cmp r3, #0 bxeq lr @ need to load aligned 16 bit data for check - ldr ip, [r2, #OFS_SH2_p_sdram] bic r0, r0, #1 mov r1, r0, lsl #SH2_RAM_SHIFT mov r1, r1, lsr #SH2_RAM_SHIFT @@ -166,6 +165,7 @@ sh2_write8_da: bic r0, r0, #1 cmp r1, #0 bxeq lr + mov r1, #2 b sh2_drc_wcheck_da #else bx lr @@ -206,6 +206,7 @@ sh2_write16_da: ldrb r1, [ip, r3, lsr #1] cmp r1, #0 bxeq lr + mov r1, #2 b sh2_drc_wcheck_da #else bx lr @@ -256,6 +257,7 @@ sh2_write32_da: ldrb ip, [ip, #1] orrs r1, r1, ip, lsl #16 bxeq lr + mov r1, #4 b sh2_drc_wcheck_da #else bx lr -- 2.39.2