From: notaz Date: Wed, 21 Sep 2011 23:11:04 +0000 (+0300) Subject: drc: do modification check in smaller than page granularity X-Git-Tag: r10~37 X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=commitdiff_plain;h=9be4ba6483d8d4bbd87fd12ade5c5cc7f9e3f116 drc: do modification check in smaller than page granularity There are some games that keep writing in the same 4k page where the code resides (Alien Ressurection is one such example). I've noticed those accesses are usually to a small region, so keep range of addresses that has no code and has been recently accessed and check it when the writes come. --- diff --git a/libpcsxcore/new_dynarec/emu_if.h b/libpcsxcore/new_dynarec/emu_if.h index a5612556..128d4317 100644 --- a/libpcsxcore/new_dynarec/emu_if.h +++ b/libpcsxcore/new_dynarec/emu_if.h @@ -47,6 +47,9 @@ extern unsigned char byte; extern void *psxH_ptr; +// same as invalid_code, just a region for ram write checks (inclusive) +extern u32 inv_code_start, inv_code_end; + /* cycles/irqs */ extern unsigned int next_interupt; extern int pending_exception; diff --git a/libpcsxcore/new_dynarec/linkage_arm.s b/libpcsxcore/new_dynarec/linkage_arm.s index ac4929f4..c545ed35 100644 --- a/libpcsxcore/new_dynarec/linkage_arm.s +++ b/libpcsxcore/new_dynarec/linkage_arm.s @@ -62,6 +62,8 @@ rdram = 0x80000000 .global psxRegs .global nd_pcsx_io .global psxH_ptr + .global inv_code_start + .global inv_code_end .bss .align 4 @@ -186,10 +188,16 @@ nd_pcsx_io_end = spu_writef + 4 psxH_ptr = nd_pcsx_io_end .type psxH_ptr, %object .size psxH_ptr, 4 -align0 = psxH_ptr + 4 /* just for alignment */ +inv_code_start = psxH_ptr + 4 + .type inv_code_start, %object + .size inv_code_start, 4 +inv_code_end = inv_code_start + 4 + .type inv_code_end, %object + .size inv_code_end, 4 +align0 = inv_code_end + 4 /* just for alignment */ .type align0, %object - .size align0, 4 -branch_target = align0 + 4 + .size align0, 12 +branch_target = align0 + 12 .type branch_target, %object .size branch_target, 4 mini_ht = branch_target + 4 @@ -705,7 +713,6 @@ indirect_jump: .type invalidate_addr_r0, %function invalidate_addr_r0: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r0, #12 b invalidate_addr_call .size invalidate_addr_r0, .-invalidate_addr_r0 .align 2 @@ -713,7 +720,7 @@ invalidate_addr_r0: .type invalidate_addr_r1, %function invalidate_addr_r1: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r1, #12 + mov r0, r1 b invalidate_addr_call .size invalidate_addr_r1, .-invalidate_addr_r1 .align 2 @@ -721,7 +728,7 @@ invalidate_addr_r1: .type invalidate_addr_r2, %function invalidate_addr_r2: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r2, #12 + mov r0, r2 b invalidate_addr_call .size invalidate_addr_r2, .-invalidate_addr_r2 .align 2 @@ -729,7 +736,7 @@ invalidate_addr_r2: .type invalidate_addr_r3, %function invalidate_addr_r3: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r3, #12 + mov r0, r3 b invalidate_addr_call .size invalidate_addr_r3, .-invalidate_addr_r3 .align 2 @@ -737,7 +744,7 @@ invalidate_addr_r3: .type invalidate_addr_r4, %function invalidate_addr_r4: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r4, #12 + mov r0, r4 b invalidate_addr_call .size invalidate_addr_r4, .-invalidate_addr_r4 .align 2 @@ -745,7 +752,7 @@ invalidate_addr_r4: .type invalidate_addr_r5, %function invalidate_addr_r5: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r5, #12 + mov r0, r5 b invalidate_addr_call .size invalidate_addr_r5, .-invalidate_addr_r5 .align 2 @@ -753,7 +760,7 @@ invalidate_addr_r5: .type invalidate_addr_r6, %function invalidate_addr_r6: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r6, #12 + mov r0, r6 b invalidate_addr_call .size invalidate_addr_r6, .-invalidate_addr_r6 .align 2 @@ -761,7 +768,7 @@ invalidate_addr_r6: .type invalidate_addr_r7, %function invalidate_addr_r7: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r7, #12 + mov r0, r7 b invalidate_addr_call .size invalidate_addr_r7, .-invalidate_addr_r7 .align 2 @@ -769,7 +776,7 @@ invalidate_addr_r7: .type invalidate_addr_r8, %function invalidate_addr_r8: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r8, #12 + mov r0, r8 b invalidate_addr_call .size invalidate_addr_r8, .-invalidate_addr_r8 .align 2 @@ -777,7 +784,7 @@ invalidate_addr_r8: .type invalidate_addr_r9, %function invalidate_addr_r9: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r9, #12 + mov r0, r9 b invalidate_addr_call .size invalidate_addr_r9, .-invalidate_addr_r9 .align 2 @@ -785,7 +792,7 @@ invalidate_addr_r9: .type invalidate_addr_r10, %function invalidate_addr_r10: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r10, #12 + mov r0, r10 b invalidate_addr_call .size invalidate_addr_r10, .-invalidate_addr_r10 .align 2 @@ -793,13 +800,17 @@ invalidate_addr_r10: .type invalidate_addr_r12, %function invalidate_addr_r12: stmia fp, {r0, r1, r2, r3, r12, lr} - lsr r0, r12, #12 + mov r0, r12 .size invalidate_addr_r12, .-invalidate_addr_r12 .align 2 .global invalidate_addr_call .type invalidate_addr_call, %function invalidate_addr_call: - bl invalidate_block + ldr r12, [fp, #inv_code_start-dynarec_local] + ldr lr, [fp, #inv_code_end-dynarec_local] + cmp r0, r12 + cmpcs lr, r0 + blcc invalidate_addr ldmia fp, {r0, r1, r2, r3, r12, pc} .size invalidate_addr_call, .-invalidate_addr_call @@ -914,8 +925,13 @@ ari_write_ram32: str\pf r1, [r0] tst r2, r2 movne pc, lr - lsr r0, r0, #12 - b invalidate_block + ldr r1, [fp, #inv_code_start-dynarec_local] + ldr r2, [fp, #inv_code_end-dynarec_local] + cmp r0, r1 + cmpcs r2, r0 + movcs pc, lr + nop + b invalidate_addr .endm ari_write_ram_mirror8: diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index 74451c7d..8ab31282 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -371,6 +371,7 @@ void *get_addr(u_int vaddr) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; memory_map[vaddr>>12]|=0x40000000; if(vpage<2048) { #ifndef DISABLE_TLB @@ -463,6 +464,7 @@ void *get_addr_32(u_int vaddr,u_int flags) if(verify_dirty(head->addr)) { //printf("restore candidate: %x (%d) d=%d\n",vaddr,page,invalid_code[vaddr>>12]); invalid_code[vaddr>>12]=0; + inv_code_start=inv_code_end=~0; memory_map[vaddr>>12]|=0x40000000; if(vpage<2048) { #ifndef DISABLE_TLB @@ -1127,39 +1129,10 @@ void invalidate_page(u_int page) head=next; } } -void invalidate_block(u_int block) + +static void invalidate_block_range(u_int block, u_int first, u_int last) { u_int page=get_page(block<<12); - u_int vpage=get_vpage(block<<12); - inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); - //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); - u_int first,last; - first=last=page; - struct ll_entry *head; - head=jump_dirty[vpage]; - //printf("page=%d vpage=%d\n",page,vpage); - while(head!=NULL) { - u_int start,end; - if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision - get_bounds((int)head->addr,&start,&end); - //printf("start: %x end: %x\n",start,end); - if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { - if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { - if((((start-(u_int)rdram)>>12)&2047)>12)&2047; - if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; - } - } -#ifndef DISABLE_TLB - if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { - if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { - if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; - if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; - } - } -#endif - } - head=head->next; - } //printf("first=%d last=%d\n",first,last); invalidate_page(page); assert(first+5>page); // NB: this assumes MAXBLOCK<=4096 (4 pages) @@ -1178,9 +1151,6 @@ void invalidate_block(u_int block) // Don't trap writes invalid_code[block]=1; -#ifdef PCSX - invalid_code[((u_int)0x80000000>>12)|page]=1; -#endif #ifndef DISABLE_TLB // If there is a valid TLB entry for this page, remove write protect if(tlb_LUT_w[block]) { @@ -1198,10 +1168,98 @@ void invalidate_block(u_int block) memset(mini_ht,-1,sizeof(mini_ht)); #endif } + +void invalidate_block(u_int block) +{ + u_int page=get_page(block<<12); + u_int vpage=get_vpage(block<<12); + inv_debug("INVALIDATE: %x (%d)\n",block<<12,page); + //inv_debug("invalid_code[block]=%d\n",invalid_code[block]); + u_int first,last; + first=last=page; + struct ll_entry *head; + head=jump_dirty[vpage]; + //printf("page=%d vpage=%d\n",page,vpage); + while(head!=NULL) { + u_int start,end; + if(vpage>2047||(head->vaddr>>12)==block) { // Ignore vaddr hash collision + get_bounds((int)head->addr,&start,&end); + //printf("start: %x end: %x\n",start,end); + if(page<2048&&start>=0x80000000&&end<0x80000000+RAM_SIZE) { + if(((start-(u_int)rdram)>>12)<=page&&((end-1-(u_int)rdram)>>12)>=page) { + if((((start-(u_int)rdram)>>12)&2047)>12)&2047; + if((((end-1-(u_int)rdram)>>12)&2047)>last) last=((end-1-(u_int)rdram)>>12)&2047; + } + } +#ifndef DISABLE_TLB + if(page<2048&&(signed int)start>=(signed int)0xC0000000&&(signed int)end>=(signed int)0xC0000000) { + if(((start+memory_map[start>>12]-(u_int)rdram)>>12)<=page&&((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)>=page) { + if((((start+memory_map[start>>12]-(u_int)rdram)>>12)&2047)>12]-(u_int)rdram)>>12)&2047; + if((((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047)>last) last=((end-1+memory_map[(end-1)>>12]-(u_int)rdram)>>12)&2047; + } + } +#endif + } + head=head->next; + } + invalidate_block_range(block,first,last); +} + void invalidate_addr(u_int addr) { +#ifdef PCSX + //static int rhits; + // this check is done by the caller + //if (inv_code_start<=addr&&addr<=inv_code_end) { rhits++; return; } + u_int page=get_page(addr); + if(page<2048) { // RAM + struct ll_entry *head; + u_int addr_min=~0, addr_max=0; + int mask=RAM_SIZE-1; + int pg1; + inv_code_start=addr&~0xfff; + inv_code_end=addr|0xfff; + pg1=page; + if (pg1>0) { + // must check previous page too because of spans.. + pg1--; + inv_code_start-=0x1000; + } + for(;pg1<=page;pg1++) { + for(head=jump_dirty[pg1];head!=NULL;head=head->next) { + u_int start,end; + get_bounds((int)head->addr,&start,&end); + if((start&mask)<=(addr&mask)&&(addr&mask)<(end&mask)) { + if(startaddr_max) addr_max=end; + } + else if(addrinv_code_start) + inv_code_start=end; + } + } + } + if (addr_min!=~0) { + inv_debug("INV ADDR: %08x hit %08x-%08x\n", addr, addr_min, addr_max); + inv_code_start=inv_code_end=~0; + invalidate_block_range(addr>>12,(addr_min&mask)>>12,(addr_max&mask)>>12); + return; + } + else { + inv_debug("INV ADDR: %08x miss, inv %08x-%08x, sk %d\n", addr, inv_code_start, inv_code_end, 0);//rhits); + } + //rhits=0; + if(page!=0) // FIXME: don't know what's up with page 0 (Klonoa) + return; + } +#endif invalidate_block(addr>>12); } + // This is called when loading a save state. // Anything could have changed, so invalidate everything. void invalidate_all_pages() @@ -7793,6 +7851,7 @@ void new_dynarec_clear_full() pending_exception=0; literalcount=0; stop_after_jal=0; + inv_code_start=inv_code_end=~0; // TLB #ifndef DISABLE_TLB using_tlb=0; @@ -11325,6 +11384,7 @@ int new_recompile_block(int addr) } #endif } + inv_code_start=inv_code_end=~0; #ifdef PCSX // PCSX maps all RAM mirror invalid_code tests to 0x80000000..0x80000000+RAM_SIZE if(get_page(start)<(RAM_SIZE>>12))