From 52055c13b253cce969a24fa2b95eb9c39ac7ea79 Mon Sep 17 00:00:00 2001 From: kub Date: Thu, 17 Oct 2019 21:54:37 +0200 Subject: [PATCH] sh2 drc: reorganised block mgmt code, plus some small scale optimisations --- cpu/sh2/compiler.c | 691 ++++++++++++++++++------------------ cpu/sh2/compiler.h | 4 +- cpu/sh2/sh2.h | 2 +- pico/32x/memory.c | 56 +-- pico/32x/memory_arm.S | 15 +- pico/pico_int.h | 4 + platform/gp2x/PicoDrive.gpe | 2 + 7 files changed, 395 insertions(+), 379 deletions(-) diff --git a/cpu/sh2/compiler.c b/cpu/sh2/compiler.c index 86d4b85a..1acc7215 100644 --- a/cpu/sh2/compiler.c +++ b/cpu/sh2/compiler.c @@ -764,58 +764,16 @@ static void rm_from_block_lists(struct block_desc *block) block->list = NULL; } -static void rm_block_list(struct block_list **blist) +static void discard_block_list(struct block_list **blist) { - while (*blist != NULL) - rm_from_block_lists((*blist)->block); -} - -static void REGPARM(1) flush_tcache(int tcid) -{ - int i; -#if (DRC_DEBUG & 1) - int tc_used, bl_used; - - tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); - bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); - elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, - tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); -#endif - - block_counts[tcid] = 0; - block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; - block_link_pool_counts[tcid] = 0; - blink_free[tcid] = NULL; - memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); - memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); - tcache_ptrs[tcid] = tcache_bases[tcid]; - tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; - if (Pico32xMem->sdram != NULL) { - if (tcid == 0) { // ROM, RAM - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); - memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } else { - memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); - memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); - memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); - memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); - memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); - memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); - sh2s[tcid - 1].rts_cache_idx = 0; - } + struct block_list *next, *current = *blist; + while (current != NULL) { + next = current->next; + current->next = blist_free; + blist_free = current; + current = next; } -#if (DRC_DEBUG & 4) - tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; -#endif - - for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) - rm_block_list(&inval_lookup[tcid][i]); - rm_block_list(&inactive_blocks[tcid]); + *blist = NULL; } static void add_to_hashlist(struct block_entry *be, int tcache_id) @@ -902,68 +860,127 @@ static void rm_from_hashlist_unresolved(struct block_link *bl, int tcache_id) bl->next->prev = bl->prev; } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free); -static void dr_free_oldest_block(int tcache_id) +#if LINK_BRANCHES +static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump) { - struct block_desc *bd; + dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ", + bl->jump, bl->target_pc, be->tcache_ptr); - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { - // block desc wrap around - block_limit[tcache_id] = 0; + if (emit_jump) { + u8 *jump = bl->jump; + int jsz = emith_jump_patch_size(); + if (bl->type == BL_JMP) { // patch: jump @entry + // inlined: @jump far jump to target + emith_jump_patch(jump, be->tcache_ptr, &jump); + } else if (bl->type == BL_LDJMP) { // write: jump @entry + // inlined: @jump far jump to target + emith_jump_at(jump, be->tcache_ptr); + jsz = emith_jump_at_size(); + } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry + if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) { + // inlined: @jump near jumpcc to target + emith_jump_patch(jump, be->tcache_ptr, &jump); + } else { // dispatcher cond immediate + // via blx: @jump near jumpcc to blx; @blx far jump + emith_jump_patch(jump, bl->blx, &jump); + emith_jump_at(bl->blx, be->tcache_ptr); + if ((((uintptr_t)bl->blx & 0xf) + emith_jump_at_size()-1) > 0xf) + host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); + } + } else { + printf("unknown BL type %d\n", bl->type); + exit(1); + } + // only needs sync if patch is possibly crossing cacheline (assume 16 byte) + if ((((uintptr_t)jump & 0xf) + jsz-1) > 0xf) + host_instructions_updated(jump, jump + jsz-1); } - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { - // cache wrap around - tcache_ptrs[tcache_id] = bd->tcache_ptr; - } + // move bl to block_entry + bl->target = be; + bl->prev = NULL; + if (be->links) + be->links->prev = bl; + bl->next = be->links; + be->links = bl; +} - if (bd->addr && bd->entry_count) - sh2_smc_rm_block_entry(bd, tcache_id, 0, 1); +static void dr_block_unlink(struct block_link *bl, int emit_jump) +{ + dbg(2,"- unlink from %p to pc %08x", bl->jump, bl->target_pc); - block_limit[tcache_id]++; - if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) - block_limit[tcache_id] = 0; - bd = &block_tables[tcache_id][block_limit[tcache_id]]; - if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) - tcache_limit[tcache_id] = bd->tcache_ptr; - else - tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; + if (bl->target) { + if (emit_jump) { + u8 *jump = bl->jump; + int jsz = emith_jump_patch_size(); + if (bl->type == BL_JMP) { // jump_patch @dispatcher + // inlined: @jump far jump to dispatcher + emith_jump_patch(jump, sh2_drc_dispatcher, &jump); + } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher + // inlined: @jump load target_pc, far jump to dispatcher + memcpy(jump, bl->jdisp, emith_jump_at_size()); + jsz = emith_jump_at_size(); + } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump + // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump + emith_jump_patch(bl->jump, bl->blx, &jump); + memcpy(bl->blx, bl->jdisp, emith_jump_at_size()); + host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); + } else { + printf("unknown BL type %d\n", bl->type); + exit(1); + } + // update cpu caches since the previous jump target doesn't exist anymore + host_instructions_updated(jump, jump + jsz-1); + } + + if (bl->prev) + bl->prev->next = bl->next; + else + bl->target->links = bl->next; + if (bl->next) + bl->next->prev = bl->prev; + bl->target = NULL; + } } +#endif -static u8 *dr_prepare_cache(int tcache_id, int insn_count) +static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) { - u8 *limit = tcache_limit[tcache_id]; +#if LINK_BRANCHES + struct block_link *bl = block_link_pool[tcache_id]; + int cnt = block_link_pool_counts[tcache_id]; + int target_tcache_id; - // if no block desc available - if (block_counts[tcache_id] == block_limit[tcache_id]) - dr_free_oldest_block(tcache_id); + // get the target block entry + target_tcache_id = dr_get_tcache_id(pc, is_slave); + if (target_tcache_id && target_tcache_id != tcache_id) + return NULL; - // while not enough cache space left (limit - tcache_ptr < max space needed) - while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) - dr_free_oldest_block(tcache_id); + // get a block link + if (blink_free[tcache_id] != NULL) { + bl = blink_free[tcache_id]; + blink_free[tcache_id] = bl->next; + } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { + dbg(1, "bl overflow for tcache %d", tcache_id); + return NULL; + } else { + bl += cnt; + block_link_pool_counts[tcache_id] = cnt+1; + } - if (limit != tcache_limit[tcache_id]) { -#if BRANCH_CACHE - if (tcache_id) - memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - else { - memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); - memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); - } -#endif -#if CALL_STACK - if (tcache_id) { - memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); - sh2s[tcache_id-1].rts_cache_idx = 0; - } else { - memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); - memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); - sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; - } + // prepare link and add to outgoing list of owner + bl->tcache_id = tcache_id; + bl->target_pc = pc; + bl->jump = tcache_ptr; + bl->blx = NULL; + bl->o_next = owner->o_links; + owner->o_links = bl; + + add_to_hashlist_unresolved(bl, tcache_id); + return bl; +#else + return NULL; #endif - } - return (u8 *)tcache_ptrs[tcache_id]; } static void dr_mark_memory(int mark, struct block_desc *block, int tcache_id, u32 nolit) @@ -1059,207 +1076,117 @@ static u32 dr_check_nolit(u32 start, u32 end, int tcache_id) return end; } -static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, - u32 addr, int size, u32 addr_lit, int size_lit) +static void dr_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) { - struct block_list **head = &inactive_blocks[tcache_id]; - struct block_list *current; + struct block_link *bl; + u32 i; - for (current = *head; current != NULL; current = current->next) { - struct block_desc *block = current->block; - if (block->crc == crc && block->addr == addr && block->size == size && - block->addr_lit == addr_lit && block->size_lit == size_lit) - { - rm_from_block_lists(block); - return block; - } + free = free || nolit; // block is invalid if literals are overwritten + dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", + bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, + tcache_id, bd - block_tables[tcache_id]); + if (bd->addr == 0 || bd->entry_count == 0) { + dbg(1, " killing dead block!? %08x", bd->addr); + return; } - return NULL; -} -static struct block_desc *dr_add_block(u32 addr, int size, - u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) -{ - struct block_entry *be; - struct block_desc *bd; - int tcache_id; - int *bcount; +#if LINK_BRANCHES + // remove from hash table, make incoming links unresolved + if (bd->active) { + for (i = 0; i < bd->entry_count; i++) { + rm_from_hashlist(&bd->entryp[i], tcache_id); - // do a lookup to get tcache_id and override check - be = dr_get_entry(addr, is_slave, &tcache_id); - if (be != NULL) - dbg(1, "block override for %08x", addr); + while ((bl = bd->entryp[i].links) != NULL) { + dr_block_unlink(bl, 1); + add_to_hashlist_unresolved(bl, tcache_id); + } + } - bcount = &block_counts[tcache_id]; - if (*bcount == block_limit[tcache_id]) { - dbg(1, "bd overflow for tcache %d", tcache_id); - return NULL; + dr_mark_memory(-1, bd, tcache_id, nolit); + add_to_block_list(&inactive_blocks[tcache_id], bd); } - - bd = &block_tables[tcache_id][*bcount]; - bd->addr = addr; - bd->size = size; - bd->addr_lit = addr_lit; - bd->size_lit = size_lit; - bd->tcache_ptr = tcache_ptr; - bd->crc = crc; bd->active = 0; - bd->entry_count = 0; -#if (DRC_DEBUG & 2) - bd->refcount = 0; #endif - *blk_id = *bcount; - (*bcount)++; - if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) - *bcount = 0; - - return bd; -} - -static void REGPARM(3) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) -{ - struct block_entry *be = NULL; - void *block = NULL; - - be = dr_get_entry(pc, sh2->is_slave, tcache_id); - if (be != NULL) - block = be->tcache_ptr; - -#if (DRC_DEBUG & 2) - if (be != NULL) - be->block->refcount++; -#endif - return block; -} - -static void *dr_failure(void) -{ - lprintf("recompilation failed\n"); - exit(1); -} - + if (free) { #if LINK_BRANCHES -static void dr_block_link(struct block_entry *be, struct block_link *bl, int emit_jump) -{ - dbg(2, "- %slink from %p to pc %08x entry %p", emit_jump ? "":"early ", - bl->jump, bl->target_pc, be->tcache_ptr); - - if (emit_jump) { - u8 *jump = bl->jump; - int jsz = emith_jump_patch_size(); - if (bl->type == BL_JMP) { // patch: jump @entry - // inlined: @jump far jump to target - emith_jump_patch(jump, be->tcache_ptr, &jump); - } else if (bl->type == BL_LDJMP) { // write: jump @entry - // inlined: @jump far jump to target - emith_jump_at(jump, be->tcache_ptr); - jsz = emith_jump_at_size(); - } else if (bl->type == BL_JCCBLX) { // patch: jump cond -> jump @entry - if (emith_jump_patch_inrange(bl->jump, be->tcache_ptr)) { - // inlined: @jump near jumpcc to target - emith_jump_patch(jump, be->tcache_ptr, &jump); - } else { // dispatcher cond immediate - // via blx: @jump near jumpcc to blx; @blx far jump - emith_jump_patch(jump, bl->blx, &jump); - emith_jump_at(bl->blx, be->tcache_ptr); - if ((((uintptr_t)bl->blx & 0xf) + emith_jump_at_size()-1) > 0xf) - host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); - } - } else { - printf("unknown BL type %d\n", bl->type); - exit(1); + // revoke outgoing links + for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { + if (bl->target) + dr_block_unlink(bl, 0); + else + rm_from_hashlist_unresolved(bl, tcache_id); + bl->jump = NULL; + bl->next = blink_free[bl->tcache_id]; + blink_free[bl->tcache_id] = bl; } - // only needs sync if patch is possibly crossing cacheline (assume 16 byte) - if ((((uintptr_t)jump & 0xf) + jsz-1) > 0xf) - host_instructions_updated(jump, jump + jsz-1); + bd->entryp[0].o_links = NULL; +#endif + // invalidate block + rm_from_block_lists(bd); + bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; + bd->entry_count = 0; } - - // move bl to block_entry - bl->target = be; - bl->prev = NULL; - if (be->links) - be->links->prev = bl; - bl->next = be->links; - be->links = bl; + emith_update_cache(); } -static void dr_block_unlink(struct block_link *bl, int emit_jump) +static struct block_desc *dr_find_inactive_block(int tcache_id, u16 crc, + u32 addr, int size, u32 addr_lit, int size_lit) { - dbg(2,"- unlink from %p to pc %08x", bl->jump, bl->target_pc); + struct block_list **head = &inactive_blocks[tcache_id]; + struct block_list *current; - if (bl->target) { - if (emit_jump) { - u8 *jump = bl->jump; - int jsz = emith_jump_patch_size(); - if (bl->type == BL_JMP) { // jump_patch @dispatcher - // inlined: @jump far jump to dispatcher - emith_jump_patch(jump, sh2_drc_dispatcher, &jump); - } else if (bl->type == BL_LDJMP) { // restore: load pc, jump @dispatcher - // inlined: @jump load target_pc, far jump to dispatcher - memcpy(jump, bl->jdisp, emith_jump_at_size()); - jsz = emith_jump_at_size(); - } else if (bl->type == BL_JCCBLX) { // jump cond @blx; @blx: load pc, jump - // via blx: @jump near jumpcc to blx; @blx load target_pc, far jump - emith_jump_patch(bl->jump, bl->blx, &jump); - memcpy(bl->blx, bl->jdisp, emith_jump_at_size()); - host_instructions_updated(bl->blx, bl->blx + emith_jump_at_size()-1); - } else { - printf("unknown BL type %d\n", bl->type); - exit(1); - } - // update cpu caches since the previous jump target doesn't exist anymore - host_instructions_updated(jump, jump + jsz-1); + for (current = *head; current != NULL; current = current->next) { + struct block_desc *block = current->block; + if (block->crc == crc && block->addr == addr && block->size == size && + block->addr_lit == addr_lit && block->size_lit == size_lit) + { + rm_from_block_lists(block); + return block; } - - if (bl->prev) - bl->prev->next = bl->next; - else - bl->target->links = bl->next; - if (bl->next) - bl->next->prev = bl->prev; - bl->target = NULL; } + return NULL; } -#endif -static struct block_link *dr_prepare_ext_branch(struct block_entry *owner, u32 pc, int is_slave, int tcache_id) +static struct block_desc *dr_add_block(u32 addr, int size, + u32 addr_lit, int size_lit, u16 crc, int is_slave, int *blk_id) { -#if LINK_BRANCHES - struct block_link *bl = block_link_pool[tcache_id]; - int cnt = block_link_pool_counts[tcache_id]; - int target_tcache_id; + struct block_entry *be; + struct block_desc *bd; + int tcache_id; + int *bcount; - // get the target block entry - target_tcache_id = dr_get_tcache_id(pc, is_slave); - if (target_tcache_id && target_tcache_id != tcache_id) - return NULL; + // do a lookup to get tcache_id and override check + be = dr_get_entry(addr, is_slave, &tcache_id); + if (be != NULL) + dbg(1, "block override for %08x", addr); - // get a block link - if (blink_free[tcache_id] != NULL) { - bl = blink_free[tcache_id]; - blink_free[tcache_id] = bl->next; - } else if (cnt >= BLOCK_LINK_MAX_COUNT(tcache_id)) { - dbg(1, "bl overflow for tcache %d", tcache_id); + bcount = &block_counts[tcache_id]; + if (*bcount == block_limit[tcache_id]) { + dbg(1, "bd overflow for tcache %d", tcache_id); return NULL; - } else { - bl += cnt; - block_link_pool_counts[tcache_id] = cnt+1; } - // prepare link and add to outgoing list of owner - bl->tcache_id = tcache_id; - bl->target_pc = pc; - bl->jump = tcache_ptr; - bl->blx = NULL; - bl->o_next = owner->o_links; - owner->o_links = bl; - - add_to_hashlist_unresolved(bl, tcache_id); - return bl; -#else - return NULL; + bd = &block_tables[tcache_id][*bcount]; + bd->addr = addr; + bd->size = size; + bd->addr_lit = addr_lit; + bd->size_lit = size_lit; + bd->tcache_ptr = tcache_ptr; + bd->crc = crc; + bd->active = 0; + bd->list = NULL; + bd->entry_count = 0; +#if (DRC_DEBUG & 2) + bd->refcount = 0; #endif + + *blk_id = *bcount; + (*bcount)++; + if (*bcount >= BLOCK_MAX_COUNT(tcache_id)) + *bcount = 0; + + return bd; } static void dr_link_blocks(struct block_entry *be, int tcache_id) @@ -1321,6 +1248,139 @@ static void dr_activate_block(struct block_desc *bd, int tcache_id, int is_slave bd->active = 1; } +static void REGPARM(3) ALIGNED(32) *dr_lookup_block(u32 pc, SH2 *sh2, int *tcache_id) +{ + struct block_entry *be = NULL; + void *block = NULL; + + be = dr_get_entry(pc, sh2->is_slave, tcache_id); + if (be != NULL) + block = be->tcache_ptr; + +#if (DRC_DEBUG & 2) + if (be != NULL) + be->block->refcount++; +#endif + return block; +} + +static void dr_free_oldest_block(int tcache_id) +{ + struct block_desc *bd; + + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) { + // block desc wrap around + block_limit[tcache_id] = 0; + } + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + + if (bd->tcache_ptr && bd->tcache_ptr < tcache_ptrs[tcache_id]) { + // cache wrap around + tcache_ptrs[tcache_id] = bd->tcache_ptr; + } + + if (bd->addr && bd->entry_count) + dr_rm_block_entry(bd, tcache_id, 0, 1); + + block_limit[tcache_id]++; + if (block_limit[tcache_id] >= BLOCK_MAX_COUNT(tcache_id)) + block_limit[tcache_id] = 0; + bd = &block_tables[tcache_id][block_limit[tcache_id]]; + if (bd->tcache_ptr >= tcache_ptrs[tcache_id]) + tcache_limit[tcache_id] = bd->tcache_ptr; + else + tcache_limit[tcache_id] = tcache_bases[tcache_id] + tcache_sizes[tcache_id]; +} + +static u8 *dr_prepare_cache(int tcache_id, int insn_count) +{ + u8 *limit = tcache_limit[tcache_id]; + + // if no block desc available + if (block_counts[tcache_id] == block_limit[tcache_id]) + dr_free_oldest_block(tcache_id); + + // while not enough cache space left (limit - tcache_ptr < max space needed) + while (tcache_limit[tcache_id] - tcache_ptrs[tcache_id] < insn_count * 128) + dr_free_oldest_block(tcache_id); + + if (limit != tcache_limit[tcache_id]) { +#if BRANCH_CACHE + if (tcache_id) + memset32(sh2s[tcache_id-1].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + else { + memset32(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)/4); + memset32(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)/4); + } +#endif +#if CALL_STACK + if (tcache_id) { + memset32(sh2s[tcache_id-1].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + sh2s[tcache_id-1].rts_cache_idx = 0; + } else { + memset32(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)/4); + memset32(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)/4); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } +#endif + } + return (u8 *)tcache_ptrs[tcache_id]; +} + +static void dr_flush_tcache(int tcid) +{ + int i; +#if (DRC_DEBUG & 1) + int tc_used, bl_used; + + tc_used = tcache_sizes[tcid] - (tcache_limit[tcid] - tcache_ptrs[tcid]); + bl_used = BLOCK_MAX_COUNT(tcid) - (block_limit[tcid] - block_counts[tcid]); + elprintf(EL_STATUS, "tcache #%d flush! (%d/%d, bds %d/%d)", tcid, tc_used, + tcache_sizes[tcid], bl_used, BLOCK_MAX_COUNT(tcid)); +#endif + + block_counts[tcid] = 0; + block_limit[tcid] = BLOCK_MAX_COUNT(tcid) - 1; + block_link_pool_counts[tcid] = 0; + blink_free[tcid] = NULL; + memset(unresolved_links[tcid], 0, sizeof(*unresolved_links[0]) * HASH_TABLE_SIZE(tcid)); + memset(hash_tables[tcid], 0, sizeof(*hash_tables[0]) * HASH_TABLE_SIZE(tcid)); + tcache_ptrs[tcid] = tcache_bases[tcid]; + tcache_limit[tcid] = tcache_bases[tcid] + tcache_sizes[tcid]; + if (Pico32xMem->sdram != NULL) { + if (tcid == 0) { // ROM, RAM + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(sh2s[0].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[1].branch_cache, -1, sizeof(sh2s[1].branch_cache)); + memset(sh2s[0].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + memset(sh2s[1].rts_cache, -1, sizeof(sh2s[1].rts_cache)); + sh2s[0].rts_cache_idx = sh2s[1].rts_cache_idx = 0; + } else { + memset(Pico32xMem->drcblk_ram, 0, sizeof(Pico32xMem->drcblk_ram)); + memset(Pico32xMem->drclit_ram, 0, sizeof(Pico32xMem->drclit_ram)); + memset(Pico32xMem->drcblk_da[tcid - 1], 0, sizeof(Pico32xMem->drcblk_da[tcid - 1])); + memset(Pico32xMem->drclit_da[tcid - 1], 0, sizeof(Pico32xMem->drclit_da[tcid - 1])); + memset(sh2s[tcid - 1].branch_cache, -1, sizeof(sh2s[0].branch_cache)); + memset(sh2s[tcid - 1].rts_cache, -1, sizeof(sh2s[0].rts_cache)); + sh2s[tcid - 1].rts_cache_idx = 0; + } + } +#if (DRC_DEBUG & 4) + tcache_dsm_ptrs[tcid] = tcache_bases[tcid]; +#endif + + for (i = 0; i < RAM_SIZE(tcid) / INVAL_PAGE_SIZE; i++) + discard_block_list(&inval_lookup[tcid][i]); + discard_block_list(&inactive_blocks[tcid]); +} + +static void *dr_failure(void) +{ + lprintf("recompilation failed\n"); + exit(1); +} + #define ADD_TO_ARRAY(array, count, item, failcode) { \ if (count >= ARRAY_SIZE(array)) { \ dbg(1, "warning: " #array " overflow"); \ @@ -5066,61 +5126,7 @@ static void sh2_generate_utils(void) #endif } -static void sh2_smc_rm_block_entry(struct block_desc *bd, int tcache_id, u32 nolit, int free) -{ - struct block_link *bl; - u32 i; - - free = free || nolit; // block is invalid if literals are overwritten - dbg(2," %sing block %08x-%08x,%08x-%08x, blkid %d,%d", free?"delet":"disabl", - bd->addr, bd->addr + bd->size, bd->addr_lit, bd->addr_lit + bd->size_lit, - tcache_id, bd - block_tables[tcache_id]); - if (bd->addr == 0 || bd->entry_count == 0) { - dbg(1, " killing dead block!? %08x", bd->addr); - return; - } - -#if LINK_BRANCHES - // remove from hash table, make incoming links unresolved - if (bd->active) { - for (i = 0; i < bd->entry_count; i++) { - rm_from_hashlist(&bd->entryp[i], tcache_id); - - while ((bl = bd->entryp[i].links) != NULL) { - dr_block_unlink(bl, 1); - add_to_hashlist_unresolved(bl, tcache_id); - } - } - - dr_mark_memory(-1, bd, tcache_id, nolit); - add_to_block_list(&inactive_blocks[tcache_id], bd); - } - bd->active = 0; -#endif - - if (free) { -#if LINK_BRANCHES - // revoke outgoing links - for (bl = bd->entryp[0].o_links; bl != NULL; bl = bl->o_next) { - if (bl->target) - dr_block_unlink(bl, 0); - else - rm_from_hashlist_unresolved(bl, tcache_id); - bl->jump = NULL; - bl->next = blink_free[bl->tcache_id]; - blink_free[bl->tcache_id] = bl; - } - bd->entryp[0].o_links = NULL; -#endif - // invalidate block - rm_from_block_lists(bd); - bd->addr = bd->size = bd->addr_lit = bd->size_lit = 0; - bd->entry_count = 0; - } - emith_update_cache(); -} - -static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) +static void sh2_smc_rm_blocks(u32 a, int len, int tcache_id, u32 shift) { struct block_list **blist, *entry, *next; u32 mask = RAM_SIZE(tcache_id) - 1; @@ -5146,12 +5152,12 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) start_lit = block->addr_lit & wtmask; end_lit = start_lit + block->size_lit; // disable/delete block if it covers the modified address - if ((start_addr <= a && a < end_addr) || - (start_lit <= a && a < end_lit)) + if ((start_addr <= a+len && a < end_addr) || + (start_lit <= a+len && a < end_lit)) { dbg(2, "smc remove @%08x", a); - end_addr = (start_lit <= a && block->size_lit ? a : 0); - sh2_smc_rm_block_entry(block, tcache_id, end_addr, 0); + end_addr = (start_lit <= a+len && block->size_lit ? a : 0); + dr_rm_block_entry(block, tcache_id, end_addr, 0); #if (DRC_DEBUG & 2) removed = 1; #endif @@ -5182,17 +5188,20 @@ static void sh2_smc_rm_blocks(u32 a, int tcache_id, u32 shift) #endif } -void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2) +void sh2_drc_wcheck_ram(unsigned int a, unsigned t, SH2 *sh2) { - dbg(2, "%csh2 smc check @%08x v=%d", sh2->is_slave ? 's' : 'm', a, val); - sh2_smc_rm_blocks(a, 0, SH2_DRCBLK_RAM_SHIFT); + int off = ((u16) t ? 0 : 2); + int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); + + sh2_smc_rm_blocks(a + off, len, 0, SH2_DRCBLK_RAM_SHIFT); } -void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2) +void sh2_drc_wcheck_da(unsigned int a, unsigned t, SH2 *sh2) { - int cpuid = sh2->is_slave; - dbg(2, "%csh2 smc check @%08x v=%d", cpuid ? 's' : 'm', a, val); - sh2_smc_rm_blocks(a, 1 + cpuid, SH2_DRCBLK_DA_SHIFT); + int off = ((u16) t ? 0 : 2); + int len = ((u16) t ? 2 : 0) + (t >> 16 ? 2 : 0); + + sh2_smc_rm_blocks(a + off, len, 1 + sh2->is_slave, SH2_DRCBLK_DA_SHIFT); } int sh2_execute_drc(SH2 *sh2c, int cycles) @@ -5408,9 +5417,9 @@ void sh2_drc_flush_all(void) block_stats(); entry_stats(); bcache_stats(); - flush_tcache(0); - flush_tcache(1); - flush_tcache(2); + dr_flush_tcache(0); + dr_flush_tcache(1); + dr_flush_tcache(2); Pico32x.emu_flags &= ~P32XF_DRC_ROM_C; } diff --git a/cpu/sh2/compiler.h b/cpu/sh2/compiler.h index 3565940d..94dff8c5 100644 --- a/cpu/sh2/compiler.h +++ b/cpu/sh2/compiler.h @@ -1,7 +1,7 @@ int sh2_drc_init(SH2 *sh2); void sh2_drc_finish(SH2 *sh2); -void sh2_drc_wcheck_ram(unsigned int a, int val, SH2 *sh2); -void sh2_drc_wcheck_da(unsigned int a, int val, SH2 *sh2); +void sh2_drc_wcheck_ram(unsigned int a, unsigned val, SH2 *sh2); +void sh2_drc_wcheck_da(unsigned int a, unsigned val, SH2 *sh2); #ifdef DRC_SH2 void sh2_drc_mem_setup(SH2 *sh2); diff --git a/cpu/sh2/sh2.h b/cpu/sh2/sh2.h index cf830dfc..57693ac1 100644 --- a/cpu/sh2/sh2.h +++ b/cpu/sh2/sh2.h @@ -80,7 +80,7 @@ typedef struct SH2_ unsigned char data_array[0x1000]; // cache (can be used as RAM) unsigned int peri_regs[0x200/4]; // periphereal regs -} SH2; +} SH2 ALIGNED(32); #define CYCLE_MULT_SHIFT 10 #define C_M68K_TO_SH2(xsh2, c) \ diff --git a/pico/32x/memory.c b/pico/32x/memory.c index 06215a7c..39504416 100644 --- a/pico/32x/memory.c +++ b/pico/32x/memory.c @@ -231,7 +231,7 @@ static NOINLINE void sh2_poll_write(u32 a, u32 d, unsigned int cycles, SH2 *sh2) for (idx = nrd = wr; idx != rd; ) { idx = (idx-1) % PFIFO_SZ; q = &fifo[idx]; - if (q->cpu != cpu && q->a == a) { q->a = -1; } + if (q->a == a && q->cpu != cpu) { q->a = -1; } if (q->a != -1) { nrd = idx; } } rd = nrd; @@ -825,7 +825,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) unsigned int cycles = sh2_cycles_done_m68k(sh2); Pico32x.sh2_regs[4 / 2] = d; p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 4); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 4); sh2_poll_write(a & ~1, d, cycles, sh2); } return; @@ -851,7 +852,8 @@ static void p32x_sh2reg_write8(u32 a, u32 d, SH2 *sh2) REG8IN16(r, a) = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); sh2_poll_write(a & ~1, r[a / 2], cycles, sh2); } return; @@ -943,7 +945,8 @@ static void p32x_sh2reg_write16(u32 a, u32 d, SH2 *sh2) Pico32x.regs[a / 2] = d; p32x_m68k_poll_event(P32XF_68KCPOLL); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_CPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); sh2_poll_write(a, d, cycles, sh2); } return; @@ -1569,7 +1572,7 @@ static u32 REGPARM(2) sh2_read32_rom(u32 a, SH2 *sh2) // writes #ifdef DRC_SH2 -static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) +static void sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) { unsigned cycles; @@ -1577,34 +1580,35 @@ static void NOINLINE sh2_sdram_poll(u32 a, u32 d, SH2 *sh2) cycles = sh2_cycles_done_m68k(sh2); sh2_poll_write(a, d, cycles, sh2); p32x_sh2_poll_event(sh2->other_sh2, SH2_STATE_RPOLL, cycles); - sh2_end_run(sh2, 1); + if (p32x_sh2_ready(sh2->other_sh2, cycles+16)) + sh2_end_run(sh2, 1); DRC_RESTORE_SR(sh2); } -void NOINLINE sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, int t) +void sh2_sdram_checks(u32 a, u32 d, SH2 *sh2, u32 t) { - if (t & 0x80) - sh2_sdram_poll(a, d, sh2); - if (t & 0x7f) - sh2_drc_wcheck_ram(a, t & 0x7f, sh2); + if (t & 0x80) sh2_sdram_poll(a, d, sh2); + if (t & 0x7f) sh2_drc_wcheck_ram(a, t & 0x7f, sh2); } -void NOINLINE sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, int t) +void sh2_sdram_checks_l(u32 a, u32 d, SH2 *sh2, u32 t) { - sh2_sdram_checks(a, d>>16, sh2, t); - sh2_sdram_checks(a+2, d, sh2, t>>16); + u32 m = 0x80 | 0x800000; + + if (t & 0x000080) sh2_sdram_poll(a, d>>16, sh2); + if (t & 0x800000) sh2_sdram_poll(a+2, d, sh2); + if (t & ~m) sh2_drc_wcheck_ram(a, t & ~m, sh2); } #ifndef _ASM_32X_MEMORY_C -static void sh2_da_checks(u32 a, int t, SH2 *sh2) +static void sh2_da_checks(u32 a, u32 t, SH2 *sh2) { sh2_drc_wcheck_da(a, t, sh2); } -static void NOINLINE sh2_da_checks_l(u32 a, int t, SH2 *sh2) +static void sh2_da_checks_l(u32 a, u32 t, SH2 *sh2) { - sh2_da_checks(a, t, sh2); - sh2_da_checks(a+2, t>>16, sh2); + sh2_drc_wcheck_da(a, t, sh2); } #endif #endif @@ -1667,7 +1671,7 @@ static void REGPARM(3) sh2_write8_sdram(u32 a, u32 d, SH2 *sh2) ((u8 *)sh2->p_sdram)[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_sdram_checks(a & ~1, ((u16 *)sh2->p_sdram)[a1 / 2], sh2, t); #endif @@ -1679,7 +1683,7 @@ static void REGPARM(3) sh2_write8_da(u32 a, u32 d, SH2 *sh2) sh2->data_array[a1] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_da_checks(a, t, sh2); #endif @@ -1741,7 +1745,7 @@ static void REGPARM(3) sh2_write16_sdram(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->p_sdram)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; if (t) sh2_sdram_checks(a, d, sh2, t); #endif @@ -1753,7 +1757,7 @@ static void REGPARM(3) sh2_write16_da(u32 a, u32 d, SH2 *sh2) ((u16 *)sh2->data_array)[a1 / 2] = d; #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; if (t) sh2_da_checks(a, t, sh2); #endif @@ -1816,8 +1820,8 @@ static void REGPARM(3) sh2_write32_sdram(u32 a, u32 d, SH2 *sh2) *(u32 *)(sh2->p_sdram + a1) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_ram; - int t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; - int u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_RAM_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_RAM_SHIFT]; if (t|(u<<16)) sh2_sdram_checks_l(a, d, sh2, t|(u<<16)); #endif @@ -1829,8 +1833,8 @@ static void REGPARM(3) sh2_write32_da(u32 a, u32 d, SH2 *sh2) *((u32 *)sh2->data_array + a1/4) = (d << 16) | (d >> 16); #ifdef DRC_SH2 u8 *p = sh2->p_drcblk_da; - int t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; - int u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; + u32 t = p[a1 >> SH2_DRCBLK_DA_SHIFT]; + u32 u = p[(a1+2) >> SH2_DRCBLK_DA_SHIFT]; if (t|(u<<16)) sh2_da_checks_l(a, t|(u<<16), sh2); #endif diff --git a/pico/32x/memory_arm.S b/pico/32x/memory_arm.S index ba83a6bf..b3a94b62 100644 --- a/pico/32x/memory_arm.S +++ b/pico/32x/memory_arm.S @@ -17,6 +17,7 @@ .equ SH2_DRAM_OW, 1<<(32-SH2_DRAM_SHIFT) @ DRAM overwrite mode bit .text +.align 5 #if 0 @ u32 a, SH2 *sh2 @@ -142,11 +143,12 @@ sh2_write8_sdram: ldrb r3, [ip, r3, lsr #SH2_RAM_SHIFT+1] cmp r3, #0 bxeq lr + @ need to load aligned 16 bit data for check ldr ip, [r2, #OFS_SH2_p_sdram] bic r0, r0, #1 - mov r3, r0, lsl #SH2_RAM_SHIFT - mov r3, r3, lsr #SH2_RAM_SHIFT - ldrh r1, [ip, r3] + mov r1, r0, lsl #SH2_RAM_SHIFT + mov r1, r1, lsr #SH2_RAM_SHIFT + ldrh r1, [ip, r1] b sh2_sdram_checks #else bx lr @@ -252,13 +254,8 @@ sh2_write32_da: ldr ip, [r2, #OFS_SH2_p_drcblk_da] ldrb r1, [ip, r3, lsr #SH2_DA_SHIFT+1]! ldrb ip, [ip, #1] - orrs r3, r1, ip, lsl #16 + orrs r1, r1, ip, lsl #16 bxeq lr - stmfd sp!, {r0, r2, ip, lr} - bl sh2_drc_wcheck_da - ldmfd sp!, {r0, r2, ip, lr} - add r0, r0, #2 - mov r1, ip b sh2_drc_wcheck_da #else bx lr diff --git a/pico/pico_int.h b/pico/pico_int.h index 89acc4fb..0fc458ef 100644 --- a/pico/pico_int.h +++ b/pico/pico_int.h @@ -921,6 +921,10 @@ void p32x_event_schedule(unsigned int now, enum p32x_event event, int after); void p32x_event_schedule_sh2(SH2 *sh2, enum p32x_event event, int after); void p32x_schedule_hint(SH2 *sh2, unsigned int m68k_cycles); +#define p32x_sh2_ready(sh2, cycles) \ + (CYCLES_GT(cycles,sh2->m68krcycles_done) && \ + !(sh2->state&(SH2_STATE_CPOLL|SH2_STATE_VPOLL|SH2_STATE_RPOLL))) + // 32x/memory.c extern struct Pico32xMem *Pico32xMem; unsigned int PicoRead8_32x(unsigned int a); diff --git a/platform/gp2x/PicoDrive.gpe b/platform/gp2x/PicoDrive.gpe index 1c065185..59416d93 100644 --- a/platform/gp2x/PicoDrive.gpe +++ b/platform/gp2x/PicoDrive.gpe @@ -7,6 +7,8 @@ if ! [ -e /dev/accel ]; then export POLLUX_RAM_TIMINGS='ram_timings=2,9,4,1,1,1,1' export POLLUX_LCD_TIMINGS_NTSC='lcd_timings=397,1,37,277,341,0,17,337;clkdiv0=9' export POLLUX_LCD_TIMINGS_PAL='lcd_timings=428,1,37,277,341,0,17,337;clkdiv0=10' +else + export POLLUX_RAM_TIMINGS='ram_timings=3,9,4,1,1,1,1' fi ./PicoDrive "$@" -- 2.39.5