From: notaz Date: Mon, 16 Feb 2026 01:32:32 +0000 (+0200) Subject: drc: simplify block expiration X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=commitdiff_plain;h=7240777bbc155a41a0085dc0c93ec043fb8a3d82;p=pcsx_rearmed.git drc: simplify block expiration just do it at will instead of the complex phasing thing that was there, to allow to store things in tcache easier in the next commit --- diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index aabf1c25..e8092c3a 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -146,41 +146,6 @@ static void set_jump_target(void *addr, void *target_) } } -// This optionally copies the instruction from the target of the branch into -// the space before the branch. Works, but the difference in speed is -// usually insignificant. -#if 0 -static void set_jump_target_fillslot(int addr,u_int target,int copy) -{ - u_char *ptr=(u_char *)addr; - u_int *ptr2=(u_int *)ptr; - assert(!copy||ptr2[-1]==0xe28dd000); - if(ptr[3]==0xe2) { - assert(!copy); - assert((target-(u_int)ptr2-8)<4096); - *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8); - } - else { - assert((ptr[3]&0x0e)==0xa); - u_int target_insn=*(u_int *)target; - if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags - copy=0; - } - if((target_insn&0x0c100000)==0x04100000) { // Load - copy=0; - } - if(target_insn&0x08000000) { - copy=0; - } - if(copy) { - ptr2[-1]=target_insn; - target+=4; - } - *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8); - } -} -#endif - /* Literal pool */ static void add_literal(int addr,int val) { @@ -190,17 +155,6 @@ static void add_literal(int addr,int val) literalcount++; } -// from a pointer to external jump stub (which was produced by emit_extjump2) -// find where the jumping insn is -static void *find_extjump_insn(void *stub) -{ - int *ptr=(int *)(stub+4); - assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs] - u_int offset=*ptr&0xfff; - void **l_ptr=(void *)ptr+offset+8; - return *l_ptr; -} - // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) { @@ -1584,7 +1538,7 @@ static void literal_pool_jumpover(int n) } // parsed by find_extjump_insn, check_extjump2 -static void emit_extjump(u_char *addr, u_int target) +static void emit_extjump_stub(u_char *addr, u_int target) { u_char *ptr=(u_char *)addr; assert((ptr[3]&0x0e)==0xa); @@ -1597,6 +1551,17 @@ static void emit_extjump(u_char *addr, u_int target) emit_far_jump(dyna_linker); } +// from a pointer to external jump stub (which was produced by emit_extjump_stub) +// find where the jumping insn is +static void *find_extjump_insn(void *stub) +{ + int *ptr=(int *)(stub+4); + assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs] + u_int offset=*ptr&0xfff; + void **l_ptr=(void *)ptr+offset+8; + return *l_ptr; +} + static void check_extjump2(void *src) { u_int *ptr = src; @@ -2373,7 +2338,7 @@ static void arch_init(void) start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val] - end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops), 1); } // vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index dabacae5..0b11a73a 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -61,16 +61,6 @@ static void set_jump_target(void *addr, void *target) abort(); // should not happen } -// from a pointer to external jump stub (which was produced by emit_extjump2) -// find where the jumping insn is -static void *find_extjump_insn(void *stub) -{ - int *ptr = (int *)stub + 2; - assert((*ptr&0x9f000000) == 0x10000000); // adr - int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3); - return ptr + offset / 4; -} - // Allocate a specific ARM register. static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr) { @@ -1358,7 +1348,7 @@ static void literal_pool_jumpover(int n) } // parsed by find_extjump_insn, check_extjump2 -static void emit_extjump(u_char *addr, u_int target) +static void emit_extjump_stub(u_char *addr, u_int target) { assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond @@ -1371,6 +1361,16 @@ static void emit_extjump(u_char *addr, u_int target) emit_far_jump(dyna_linker); } +// from a pointer to external jump stub (which was produced by emit_extjump_stub) +// find where the jumping insn is +static void *find_extjump_insn(void *stub) +{ + int *ptr = (int *)stub + 2; + assert((*ptr&0x9f000000) == 0x10000000); // adr + int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3); + return ptr + offset / 4; +} + static void check_extjump2(void *src) { u_int *ptr = src; @@ -2128,7 +2128,7 @@ static void arch_init(void) ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 } - end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops), 1); } // vim:shiftwidth=2:expandtab diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index e31d2291..801754ca 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -106,8 +106,8 @@ extern uintptr_t mini_ht[32][2]; #endif #define RAM_SIZE 0x200000 -#define MAX_OUTPUT_BLOCK_SIZE 262144 -#define EXPIRITY_OFFSET (MAX_OUTPUT_BLOCK_SIZE * 2) +#define MAX_OUTPUT_BLOCK_SIZE_SHIFT 18 +#define MAX_OUTPUT_BLOCK_SIZE (1 << MAX_OUTPUT_BLOCK_SIZE_SHIFT) // 262144 #define PAGE_COUNT 1024 #if defined(HAVE_CONDITIONAL_CALL) && !defined(DESTRUCTIVE_SHIFT) @@ -219,14 +219,17 @@ struct link_entry struct block_info { - struct block_info *next; + struct block_info *next_by_vaddr; + struct block_info *next_in_tc; const void *source; const void *copy; u_int start; // vaddr of the block start u_int len; // of the whole block source u_int tc_offs; - //u_int tc_len; + u_int tc_len; u_int reg_sv_flags; + u_short jump_out_cnt; + u_short unused; u_char is_dirty; u_char inv_near_misses; u_short jump_in_cnt; @@ -287,7 +290,8 @@ static struct compile_info static char invalid_code[0x100000]; static struct ht_entry hash_table[65536]; static struct block_info *blocks[PAGE_COUNT]; - static struct jump_info *jumps[PAGE_COUNT]; + static struct block_info *block_oldest, *block_last_compiled; + static struct jump_info *jumps[PAGE_COUNT]; // [] static u_int start; static u_int *source; static uint64_t gte_rs[MAXBLOCK]; // gte: 32 data and 32 ctl regs @@ -319,7 +323,6 @@ static struct compile_info static int is_delayslot; static char shadow[1048576] __attribute__((aligned(16))); static void *copy; - static u_int expirep; static u_int stop_after_jal; static u_int ni_count; static u_int err_print_count; @@ -527,7 +530,7 @@ static void start_tcache_write(void *start, void *end) mprotect_w_x(start, end, 0); } -static void end_tcache_write(void *start, void *end) +static void end_tcache_write(void *start, void *end, int clear_cache) { #ifdef NDRC_THREAD if (!ndrc_g.thread.dirty_start || (size_t)ndrc_g.thread.dirty_start > (size_t)start) @@ -535,23 +538,48 @@ static void end_tcache_write(void *start, void *end) if ((size_t)ndrc_g.thread.dirty_end < (size_t)end) ndrc_g.thread.dirty_end = end; #endif - new_dyna_clear_cache(start, end); + if (clear_cache) + new_dyna_clear_cache(start, end); mprotect_w_x(start, end, 1); } -static void *start_block(void) +static noinline void clear_tcache_space(uintptr_t tc_base, u_int max_space); + +static void *start_tcache_write_reserve(u_int max_space) { - u_char *end = out + MAX_OUTPUT_BLOCK_SIZE; - if (end > ndrc->translation_cache + sizeof(ndrc->translation_cache)) - end = ndrc->translation_cache + sizeof(ndrc->translation_cache); + u_char *tc_base = ndrc->translation_cache; + u_char *end = out + max_space; + assert(sizeof(ndrc->translation_cache) > max_space); + if (end > tc_base + sizeof(ndrc->translation_cache)) { + clear_tcache_space(out - tc_base, end - out); + out = ndrc->translation_cache; + end = out + max_space; + } + // we do a bit more than requested to avoid large jump and lots of + // expirations the moment we wrap + assert(TARGET_SIZE_2 >= MAX_OUTPUT_BLOCK_SIZE_SHIFT); + max_space += (end - tc_base) >> (TARGET_SIZE_2 - MAX_OUTPUT_BLOCK_SIZE_SHIFT); + clear_tcache_space(out - tc_base, max_space); + if (out - tc_base + max_space > sizeof(ndrc->translation_cache)) { + u_int wrapped = out - tc_base + max_space - sizeof(ndrc->translation_cache); + clear_tcache_space(0, wrapped); + } + // align + if (((uintptr_t)out) & 7) + out += 8 - (((uintptr_t)out) & 7); start_tcache_write(NDRC_WRITE_OFFSET(out), NDRC_WRITE_OFFSET(end)); return out; } +static void *start_block(int max_space) +{ + return start_tcache_write_reserve(max_space); +} + static void end_block(void *start) { - end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(out)); + end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(out), 1); } #ifdef NDRC_CACHE_FLUSH_ALL @@ -569,7 +597,7 @@ static void mark_clear_cache(void *target) static void do_clear_cache(void) { if (needs_clear_cache) { - end_tcache_write(NDRC_WRITE_OFFSET(ndrc), NDRC_WRITE_OFFSET(ndrc + 1)); + end_tcache_write(NDRC_WRITE_OFFSET(ndrc), NDRC_WRITE_OFFSET(ndrc + 1), 1); needs_clear_cache = 0; } } @@ -613,7 +641,7 @@ static void do_clear_cache(void) break; end += 4096; } - end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(end)); + end_tcache_write(NDRC_WRITE_OFFSET(start), NDRC_WRITE_OFFSET(end), 1); } needs_clear_cache[i] = 0; } @@ -750,12 +778,6 @@ static void mark_invalid_code(u_int vaddr, u_int len, char invalid) inv_code_start = inv_code_end = ~0; } -static int doesnt_expire_soon(u_char *tcaddr) -{ - u_int diff = (u_int)(tcaddr - out) & ((1u << TARGET_SIZE_2) - 1u); - return diff > EXPIRITY_OFFSET + MAX_OUTPUT_BLOCK_SIZE; -} - static attr_unused void check_for_block_changes(u_int start, u_int end) { u_int start_page = get_page_prev(start); @@ -763,8 +785,8 @@ static attr_unused void check_for_block_changes(u_int start, u_int end) u_int page; for (page = start_page; page <= end_page; page++) { - struct block_info *block; - for (block = blocks[page]; block != NULL; block = block->next) { + const struct block_info *block; + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr) { if (block->is_dirty) continue; if (memcmp(block->source, block->copy, block->len)) { @@ -786,7 +808,7 @@ static void *try_restore_block(u_int vaddr, u_int start_page, u_int end_page) stat_inc(stat_restore_tries); for (page = start_page; page <= end_page; page++) { struct block_info *block; - for (block = blocks[page]; block != NULL; block = block->next) { + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr) { if (vaddr < block->start) break; if (!block->is_dirty || vaddr >= block->start + block->len) @@ -845,7 +867,7 @@ static void noinline *get_addr(struct ht_entry *ht, const u_int vaddr, stat_inc(stat_jump_in_lookups); for (page = start_page; page <= end_page; page++) { const struct block_info *block; - for (block = blocks[page]; block != NULL; block = block->next) { + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr) { if (vaddr < block->start) break; if (block->is_dirty || vaddr >= block->start + block->len) @@ -1396,7 +1418,7 @@ static void *get_trampoline(const void *f) if (tramp->f[i] == NULL) { start_tcache_write(&tramp->f[i], &tramp->f[i + 1]); tramp->f[i] = f; - end_tcache_write(&tramp->f[i], &tramp->f[i + 1]); + end_tcache_write(&tramp->f[i], &tramp->f[i + 1], 1); #ifdef HAVE_LIBNX // invalidate the RX mirror (unsure if necessary, but just in case...) armDCacheFlush(&ndrc->tramp.f[i], sizeof(ndrc->tramp.f[i])); @@ -1428,15 +1450,13 @@ static void emit_far_call(const void *f) } // Check if an address is already compiled -// but don't return addresses which are about to expire from the cache static void *check_addr(u_int vaddr) { struct ht_entry *ht_bin = hash_table_get(vaddr); size_t i; for (i = 0; i < ARRAY_SIZE(ht_bin->vaddr); i++) { if (ht_bin->vaddr[i] == vaddr) - if (doesnt_expire_soon(ht_bin->tcaddr[i])) - return ht_bin->tcaddr[i]; + return ht_bin->tcaddr[i]; } // refactor to get_addr_nocompile? @@ -1446,13 +1466,11 @@ static void *check_addr(u_int vaddr) stat_inc(stat_jump_in_lookups); for (page = start_page; page <= end_page; page++) { const struct block_info *block; - for (block = blocks[page]; block != NULL; block = block->next) { + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr) { if (vaddr < block->start) break; if (block->is_dirty || vaddr >= block->start + block->len) continue; - if (!doesnt_expire_soon(ndrc->translation_cache + block->tc_offs)) - continue; for (i = 0; i < block->jump_in_cnt; i++) if (block->jump_in[i].vaddr == vaddr) break; @@ -1486,6 +1504,7 @@ static void *check_addr(u_int vaddr) return NULL; } +// asumes blocks are to be destroyed separately static void blocks_clear(struct block_info **head) { struct block_info *cur, *next; @@ -1493,36 +1512,13 @@ static void blocks_clear(struct block_info **head) if ((cur = *head)) { *head = NULL; while (cur) { - next = cur->next; + next = cur->next_by_vaddr; free(cur); cur = next; } } } -static int blocks_remove_matching_addrs(struct block_info **head, - u_int base_offs, int shift) -{ - struct block_info *next; - int hit = 0; - while (*head) { - if ((((*head)->tc_offs ^ base_offs) >> shift) == 0) { - inv_debug("EXP: rm block %08x (tc_offs %x)\n", (*head)->start, (*head)->tc_offs); - invalidate_block(*head); - next = (*head)->next; - free(*head); - *head = next; - stat_dec(stat_blocks); - hit = 1; - } - else - { - head = &((*head)->next); - } - } - return hit; -} - // This is called when we write to a compiled block (see do_invstub) static void unlink_jumps_vaddr_range(u_int start, u_int end) { @@ -1539,9 +1535,10 @@ static void unlink_jumps_vaddr_range(u_int start, u_int end) continue; } - inv_debug("INV: rm link to %08x (tc_offs %zx)\n", ji->e[i].target_vaddr, - (u_char *)ji->e[i].stub - ndrc->translation_cache); void *host_addr = find_extjump_insn(ji->e[i].stub); + inv_debug("INV: rm link to %08x (tc_offs %06zx->%06zx)\n", ji->e[i].target_vaddr, + (u_char *)host_addr - ndrc->translation_cache, + (u_char *)ji->e[i].stub - ndrc->translation_cache); mark_clear_cache(host_addr); set_jump_target(host_addr, ji->e[i].stub); // point back to dyna_linker stub @@ -1556,29 +1553,6 @@ static void unlink_jumps_vaddr_range(u_int start, u_int end) } } -static void unlink_jumps_tc_range(struct jump_info *ji, u_int base_offs, int shift) -{ - int i; - if (ji == NULL) - return; - for (i = 0; i < ji->count; ) { - u_int tc_offs = (u_char *)ji->e[i].stub - ndrc->translation_cache; - if (((tc_offs ^ base_offs) >> shift) != 0) { - i++; - continue; - } - - inv_debug("EXP: rm link to %08x (tc_offs %x)\n", ji->e[i].target_vaddr, tc_offs); - stat_dec(stat_links); - ji->count--; - if (i < ji->count) { - ji->e[i] = ji->e[ji->count]; - continue; - } - i++; - } -} - static void invalidate_block(struct block_info *block) { u_int i; @@ -1609,7 +1583,7 @@ static int invalidate_range(u_int start, u_int end, for (page = start_page; page <= end_page; page++) { struct block_info *block; - for (block = blocks[page]; block != NULL; block = block->next) { + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr) { if (block->is_dirty) continue; last_block = block; @@ -1709,7 +1683,7 @@ void new_dynarec_invalidate_all_pages(void) struct block_info *block; u_int page; for (page = 0; page < ARRAY_SIZE(blocks); page++) { - for (block = blocks[page]; block != NULL; block = block->next) { + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr) { if (block->is_dirty) continue; if (!block->source) // hack block? @@ -1723,7 +1697,7 @@ void new_dynarec_invalidate_all_pages(void) } // Add an entry to jump_out after making a link -// stub should point to stub code by emit_extjump() +// stub should point to stub code by emit_extjump_stub() static void ndrc_add_jump_out(u_int vaddr, void *stub) { inv_debug("ndrc_add_jump_out: %p -> %x\n", stub, vaddr); @@ -1735,17 +1709,25 @@ static void ndrc_add_jump_out(u_int vaddr, void *stub) ji = jumps[page]; if (ji == NULL) { ji = malloc(sizeof(*ji) + sizeof(ji->e[0]) * 16); + if (!ji) + goto oom; ji->alloc = 16; ji->count = 0; } else if (ji->count >= ji->alloc) { ji->alloc += 16; ji = realloc(ji, sizeof(*ji) + sizeof(ji->e[0]) * ji->alloc); + if (!ji) + goto oom; } jumps[page] = ji; ji->e[ji->count].target_vaddr = vaddr; ji->e[ji->count].stub = stub; ji->count++; + return; +oom: + SysPrintf("ndrc jump OOM\n"); + abort(); } void ndrc_patch_link(u_int vaddr, void *insn, void *stub, void *target) @@ -1764,7 +1746,7 @@ void ndrc_patch_link(u_int vaddr, void *insn, void *stub, void *target) // w^x: have to do costly permission switching anyway new_dyna_clear_cache(NDRC_WRITE_OFFSET(insn), NDRC_WRITE_OFFSET(insn_end)); #endif - //end_tcache_write(insn, insn_end); + //end_tcache_write(insn, insn_end, 1); mprotect_w_x(insn, insn_end, 1); } @@ -6318,7 +6300,7 @@ static noinline void new_dynarec_test(void) for (i = 0; i < ARRAY_SIZE(ret); i++) { out = ndrc->translation_cache; - beginning = start_block(); + beginning = start_block(16*4); ((volatile u_int *)NDRC_WRITE_OFFSET(out))[0]++; // make the cache dirty emit_movimm(DRC_TEST_VAL + i, 0); // test emit_ret(); @@ -6352,7 +6334,6 @@ void new_dynarec_clear_full(void) hash_table_clear(); mini_ht_clear(); copy=shadow; - expirep = EXPIRITY_OFFSET; literalcount=0; stop_after_jal=0; ni_count=0; @@ -6362,6 +6343,7 @@ void new_dynarec_clear_full(void) f1_hack=0; for (n = 0; n < ARRAY_SIZE(blocks); n++) blocks_clear(&blocks[n]); + block_oldest = block_last_compiled = NULL; for (n = 0; n < ARRAY_SIZE(jumps); n++) { free(jumps[n]); jumps[n] = NULL; @@ -6497,6 +6479,7 @@ void new_dynarec_cleanup(void) #endif for (n = 0; n < ARRAY_SIZE(blocks); n++) blocks_clear(&blocks[n]); + block_oldest = block_last_compiled = NULL; for (n = 0; n < ARRAY_SIZE(jumps); n++) { free(jumps[n]); jumps[n] = NULL; @@ -6573,7 +6556,7 @@ int new_dynarec_save_blocks(void *save, int size) o = 0; for (p = 0; p < ARRAY_SIZE(blocks); p++) { bcnt = 0; - for (block = blocks[p]; block != NULL; block = block->next) { + for (block = blocks[p]; block != NULL; block = block->next_by_vaddr) { if (block->is_dirty) continue; tmp_blocks[bcnt].addr = block->start; @@ -6614,7 +6597,7 @@ void new_dynarec_load_blocks(const void *save, int size) // restore clean blocks, if any for (page = 0, b = i = 0; page < ARRAY_SIZE(blocks); page++) { - for (block = blocks[page]; block != NULL; block = block->next, b++) { + for (block = blocks[page]; block != NULL; block = block->next_by_vaddr, b++) { if (!block->is_dirty) continue; assert(block->source && block->copy); @@ -9109,68 +9092,142 @@ static noinline void pass6_clean_registers(int istart, int iend, int wr) } } -static noinline void pass10_expire_blocks(void) +static u_int *get_jump_outs(struct block_info *block) { - u_int step = MAX_OUTPUT_BLOCK_SIZE / PAGE_COUNT / 2; - // not sizeof(ndrc->translation_cache) due to vita hack - u_int step_mask = ((1u << TARGET_SIZE_2) - 1u) & ~(step - 1u); - u_int end = (out - ndrc->translation_cache + EXPIRITY_OFFSET) & step_mask; - u_int base_shift = __builtin_ctz(MAX_OUTPUT_BLOCK_SIZE); - int hit; + return (u_int *)((u_char *)block + sizeof(*block) + + block->jump_in_cnt * sizeof(block->jump_in[0])); +} - for (; expirep != end; expirep = ((expirep + step) & step_mask)) - { - u_int base_offs = expirep & ~(MAX_OUTPUT_BLOCK_SIZE - 1); - u_int block_i = expirep / step & (PAGE_COUNT - 1); - u_int phase = (expirep >> (base_shift - 1)) & 1u; - if (!(expirep & (MAX_OUTPUT_BLOCK_SIZE / 2 - 1))) { - inv_debug("EXP: base_offs %x/%lx phase %u\n", base_offs, - (long)(out - ndrc->translation_cache), phase); +static void block_destroy(struct block_info *block) +{ + u_int page = get_page(block->start); + struct block_info **b_pptr; + u_int *jump_outs; + int i, j; + + if (block == block_last_compiled) + block_last_compiled = NULL; + invalidate_block(block); + + jump_outs = get_jump_outs(block); + for (i = 0; i < block->jump_out_cnt; i++) { + u_int t_vaddr = jump_outs[i]; + u_int t_page = get_page(t_vaddr); + struct jump_info *ji = jumps[t_page]; + if (ji) + for (j = 0; j < ji->count; ) { + uintptr_t j_tc_offs; + if (t_vaddr != ji->e[j].target_vaddr) { + j++; continue; + } + j_tc_offs = (u_char *)ji->e[j].stub - ndrc->translation_cache; + if (j_tc_offs < block->tc_offs || j_tc_offs >= block->tc_offs + block->tc_len) { + j++; continue; + } + // remove the entry + stat_dec(stat_links); + ji->count--; + if (j < ji->count) + ji->e[j] = ji->e[ji->count]; } + } - if (!phase) { - hit = blocks_remove_matching_addrs(&blocks[block_i], base_offs, base_shift); - if (hit) { - do_clear_cache(); - mini_ht_clear(); - } + // rm from the blocks list + for (b_pptr = &blocks[page]; *b_pptr; b_pptr = &((*b_pptr)->next_by_vaddr)) { + if (*b_pptr == block) { + *b_pptr = block->next_by_vaddr; + free(block); + block = NULL; + stat_dec(stat_blocks); + break; } - else - unlink_jumps_tc_range(jumps[block_i], base_offs, base_shift); } + assert(block == NULL); } -static struct block_info *new_block_info(u_int start, u_int len, - const void *source, const void *copy, u_char *beginning, u_short jump_in_count) +static noinline void clear_tcache_space(uintptr_t tc_base, u_int max_space) +{ + struct block_info *block; + + assert(tc_base < sizeof(ndrc->translation_cache)); + for (block = block_oldest; block; ) + { + u_int end_ofs = block->tc_offs + block->tc_len; + struct block_info *block_to_rm = block; + + assert(end_ofs <= sizeof(ndrc->translation_cache)); + if (end_ofs <= tc_base) + break; + if (tc_base + max_space <= block->tc_offs) + break; + + block = block->next_in_tc; + inv_debug("EXP: tc_offs %06x tc_len %u vaddr %08x len %u\n", + block_to_rm->tc_offs, block_to_rm->tc_len, block_to_rm->start, block_to_rm->len); + unlink_jumps_vaddr_range(block_to_rm->start, block_to_rm->start + block_to_rm->len); + block_destroy(block_to_rm); + } + if (block_oldest != block) { + block_oldest = block; + do_clear_cache(); + mini_ht_clear(); + inv_debug("EXP: cleared tc_offs %06zx-%06zx\n", tc_base, tc_base + max_space); + } +} + +static struct block_info *block_info_new(u_int start, u_int len, + const void *source, const void *copy, u_short jump_in_count, u_int jump_out_count) { - struct block_info **b_pptr; struct block_info *block; - u_int page = get_page(start); - block = malloc(sizeof(*block) + jump_in_count * sizeof(block->jump_in[0])); - assert(block); + block = calloc(sizeof(*block) + jump_in_count * sizeof(block->jump_in[0]) + + jump_out_count * sizeof(u_int), 1); + if (!block) { + SysPrintf("ndrc block OOM\n"); + abort(); + } assert(jump_in_count > 0); + assert(jump_out_count < 0x10000u); block->source = source; block->copy = copy; block->start = start; block->len = len; - block->reg_sv_flags = 0; - block->tc_offs = beginning - ndrc->translation_cache; - //block->tc_len = out - beginning; - block->is_dirty = 0; - block->inv_near_misses = 0; + block->jump_out_cnt = jump_out_count; block->jump_in_cnt = jump_in_count; + return block; +} + +static void block_info_finish(struct block_info *block, u_char *beginning) +{ + u_int page = get_page(block->start); + struct block_info **b_pptr; + u_int *jump_outs; + int i, j; + + block->tc_offs = beginning - ndrc->translation_cache; + block->tc_len = out - beginning; + + jump_outs = get_jump_outs(block); + for (i = j = 0; i < linkcount; i++) + if (!link_addr[i].internal) + jump_outs[j++] = link_addr[i].target; + assert(j == block->jump_out_cnt); + // insert sorted by start mirror-unmasked vaddr - for (b_pptr = &blocks[page]; ; b_pptr = &((*b_pptr)->next)) { - if (*b_pptr == NULL || (*b_pptr)->start >= start) { - block->next = *b_pptr; + for (b_pptr = &blocks[page]; ; b_pptr = &((*b_pptr)->next_by_vaddr)) { + if (*b_pptr == NULL || (*b_pptr)->start >= block->start) { + block->next_by_vaddr = *b_pptr; *b_pptr = block; break; } } + if (block_oldest == NULL) + block_oldest = block; + if (block_last_compiled) + block_last_compiled->next_in_tc = block; + block_last_compiled = block; stat_inc(stat_blocks); - return block; } static int noinline new_recompile_block(u_int addr) @@ -9199,20 +9256,21 @@ static int noinline new_recompile_block(u_int addr) ndrc_g.did_compile++; if (Config.HLE && start == 0x80001000) // hlecall { - void *beginning = start_block(); + void *beginning = start_block(16*4); emit_movimm(start,0); emit_writeword(0,&psxRegs.pc); emit_far_jump(new_dyna_leave); literal_pool(0); end_block(beginning); - struct block_info *block = new_block_info(start, 4, NULL, NULL, beginning, 1); + struct block_info *block = block_info_new(start, 4, NULL, NULL, 1, 0); block->jump_in[0].vaddr = start; block->jump_in[0].addr = beginning; + block_info_finish(block, beginning); return 0; } else if (f1_hack && hack_addr == 0) { - void *beginning = start_block(); + void *beginning = start_block(64*4); emit_movimm(start, 0); emit_writeword(0, &hack_addr); emit_readword(&psxRegs.GPR.n.sp, 0); @@ -9229,9 +9287,10 @@ static int noinline new_recompile_block(u_int addr) literal_pool(0); end_block(beginning); - struct block_info *block = new_block_info(start, 4, NULL, NULL, beginning, 1); + struct block_info *block = block_info_new(start, 4, NULL, NULL, 1, 0); block->jump_in[0].vaddr = start; block->jump_in[0].addr = beginning; + block_info_finish(block, beginning); SysPrintf("F1 hack to %08x\n", start); return 0; } @@ -9258,7 +9317,6 @@ static int noinline new_recompile_block(u_int addr) /* Pass 7: flag 32-bit registers */ /* Pass 8: assembly */ /* Pass 9: linker */ - /* Pass 10: garbage collection / free memory */ /* Pass 1 disassembly */ @@ -9306,7 +9364,7 @@ static int noinline new_recompile_block(u_int addr) linkcount=0;stubcount=0; is_delayslot=0; u_int dirty_pre=0; - void *beginning=start_block(); + void *beginning = start_block(MAX_OUTPUT_BLOCK_SIZE); void *instr_addr0_override = NULL; int ds = 0; @@ -9502,7 +9560,8 @@ static int noinline new_recompile_block(u_int addr) #endif /* Pass 9 - Linker */ - for(i=0;i %08x\n", log_addr(link_addr[i].addr), link_addr[i].target); @@ -9511,10 +9570,11 @@ static int noinline new_recompile_block(u_int addr) { void *stub = out; void *addr = check_addr(link_addr[i].target); - emit_extjump(link_addr[i].addr, link_addr[i].target); + emit_extjump_stub(link_addr[i].addr, link_addr[i].target); + jump_out_count++; if (addr) { set_jump_target(link_addr[i].addr, addr); - ndrc_add_jump_out(link_addr[i].target,stub); + ndrc_add_jump_out(link_addr[i].target, stub); } else set_jump_target(link_addr[i].addr, stub); @@ -9525,11 +9585,7 @@ static int noinline new_recompile_block(u_int addr) int target=(link_addr[i].target-start)>>2; assert(target>=0&&target>1); - //#else set_jump_target(link_addr[i].addr, instr_addr[target]); - //#endif } } @@ -9552,7 +9608,7 @@ static int noinline new_recompile_block(u_int addr) } struct block_info *block = - new_block_info(start, slen * 4, source, copy, beginning, jump_in_count); + block_info_new(start, slen * 4, source, copy, jump_in_count, jump_out_count); block->reg_sv_flags = state_rflags; int jump_in_i = 0; @@ -9580,29 +9636,17 @@ static int noinline new_recompile_block(u_int addr) hash_table_add(block->jump_in[0].vaddr, block->jump_in[0].addr); // Write out the literal pool if necessary literal_pool(0); - #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK - // Align code - if(((u_int)out)&7) emit_addnop(13); - #endif assert(out - (u_char *)beginning < MAX_OUTPUT_BLOCK_SIZE); //printf("shadow buffer: %p-%p\n",copy,(u_char *)copy+slen*4); memcpy(copy, source, source_len); copy += source_len; end_block(beginning); - - // If we're within 256K of the end of the buffer, - // start over from the beginning. (Is 256K enough?) - if (out > ndrc->translation_cache + sizeof(ndrc->translation_cache) - MAX_OUTPUT_BLOCK_SIZE) - out = ndrc->translation_cache; + block_info_finish(block, beginning); // Trap writes to any of the pages we compiled mark_invalid_code(start, slen*4, 0); - /* Pass 10 - Free memory by expiring oldest blocks */ - - pass10_expire_blocks(); - #ifdef ASSEM_PRINT fflush(stdout); #endif