X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?p=pcsx_rearmed.git;a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fnew_dynarec.c;h=4e09592d0741dc04e46fdb0fdedb0295d4637a04;hp=cfeddc297cc4934ff1d0995573215af29506ee05;hb=a3203cf4f2f21b0f2f74c5e494e3f5ba58225eae;hpb=2a014d73faf4cec54f8bf51134828173f0debfaa diff --git a/libpcsxcore/new_dynarec/new_dynarec.c b/libpcsxcore/new_dynarec/new_dynarec.c index cfeddc29..4e09592d 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.c +++ b/libpcsxcore/new_dynarec/new_dynarec.c @@ -185,8 +185,10 @@ struct link_entry static uint64_t unneeded_reg[MAXBLOCK]; static uint64_t branch_unneeded_reg[MAXBLOCK]; static signed char regmap_pre[MAXBLOCK][HOST_REGS]; // pre-instruction i? - static uint64_t current_constmap[HOST_REGS]; - static uint64_t constmap[MAXBLOCK][HOST_REGS]; + // contains 'real' consts at [i] insn, but may differ from what's actually + // loaded in host reg as 'final' value is always loaded, see get_final_value() + static uint32_t current_constmap[HOST_REGS]; + static uint32_t constmap[MAXBLOCK][HOST_REGS]; static struct regstat regs[MAXBLOCK]; static struct regstat branch_regs[MAXBLOCK]; static signed char minimum_free_regs[MAXBLOCK]; @@ -355,7 +357,7 @@ static void start_tcache_write(void *start, void *end) static void end_tcache_write(void *start, void *end) { -#ifdef __arm__ +#if defined(__arm__) || defined(__aarch64__) size_t len = (char *)end - (char *)start; #if defined(__BLACKBERRY_QNX__) msync(start, len, MS_SYNC | MS_CACHE_ONLY | MS_INVALIDATE_ICACHE); @@ -365,12 +367,14 @@ static void end_tcache_write(void *start, void *end) sceKernelSyncVMDomain(sceBlock, start, len); #elif defined(_3DS) ctr_flush_invalidate_cache(); + #elif defined(__aarch64__) + // as of 2021, __clear_cache() is still broken on arm64 + // so here is a custom one :( + clear_cache_arm64(start, end); #else __clear_cache(start, end); #endif (void)len; -#else - __clear_cache(start, end); #endif mprotect_w_x(start, end, 1); @@ -390,16 +394,62 @@ static void end_block(void *start) end_tcache_write(start, out); } +// also takes care of w^x mappings when patching code +static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)]; + +static void mark_clear_cache(void *target) +{ + uintptr_t offset = (u_char *)target - ndrc->translation_cache; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((uintptr_t)target & ~4095l); + start_tcache_write(start, start + 4095); + needs_clear_cache[offset >> 17] |= mask; + } +} + +// Clearing the cache is rather slow on ARM Linux, so mark the areas +// that need to be cleared, and then only clear these areas once. +static void do_clear_cache(void) +{ + int i, j; + for (i = 0; i < (1<<(TARGET_SIZE_2-17)); i++) + { + u_int bitmap = needs_clear_cache[i]; + if (!bitmap) + continue; + for (j = 0; j < 32; j++) + { + u_char *start, *end; + if (!(bitmap & (1<translation_cache + i*131072 + j*4096; + end = start + 4095; + for (j++; j < 32; j++) { + if (!(bitmap & (1<>31)|1; - return (x * cycle_multiplier + s * 50) / 100; + return (x * m + s * 50) / 100; } static u_int get_page(u_int vaddr) @@ -547,7 +597,7 @@ void dirty_reg(struct regstat *cur,signed char reg) } } -void set_const(struct regstat *cur,signed char reg,uint64_t value) +static void set_const(struct regstat *cur, signed char reg, uint32_t value) { int hr; if(!reg) return; @@ -559,7 +609,7 @@ void set_const(struct regstat *cur,signed char reg,uint64_t value) } } -void clear_const(struct regstat *cur,signed char reg) +static void clear_const(struct regstat *cur, signed char reg) { int hr; if(!reg) return; @@ -570,7 +620,7 @@ void clear_const(struct regstat *cur,signed char reg) } } -int is_const(struct regstat *cur,signed char reg) +static int is_const(struct regstat *cur, signed char reg) { int hr; if(reg<0) return 0; @@ -582,7 +632,8 @@ int is_const(struct regstat *cur,signed char reg) } return 0; } -uint64_t get_const(struct regstat *cur,signed char reg) + +static uint32_t get_const(struct regstat *cur, signed char reg) { int hr; if(!reg) return 0; @@ -1054,9 +1105,7 @@ static void ll_kill_pointers(struct ll_entry *head,uintptr_t addr,int shift) { inv_debug("EXP: Kill pointer at %p (%x)\n",head->addr,head->vaddr); void *host_addr=find_extjump_insn(head->addr); - #if defined(__arm__) || defined(__aarch64__) - mark_clear_cache(host_addr); - #endif + mark_clear_cache(host_addr); set_jump_target(host_addr, head->addr); } head=head->next; @@ -1082,9 +1131,7 @@ static void invalidate_page(u_int page) while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); void *host_addr=find_extjump_insn(head->addr); - #if defined(__arm__) || defined(__aarch64__) - mark_clear_cache(host_addr); - #endif + mark_clear_cache(host_addr); set_jump_target(host_addr, head->addr); next=head->next; free(head); @@ -1107,9 +1154,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) for(first=page+1;firstregmap,rs2[i]); if(rs2[i]==0) // rx=0); - if(opcode2[i]==0x2a) // SLT + if(opcode2[i]==0x2a&&rs1[i]!=0) { // SLT + assert(s1l>=0); emit_shrimm(s1l,31,t); - else // SLTU (unsigned can not be less than zero) + } + else // SLTU (unsigned can not be less than zero, 0<0) emit_zeroreg(t); } else if(rs1[i]==0) // r0=0) reglist|=1< 0 && !bt[i]) { + for (hr = 0; hr < HOST_REGS; hr++) { + int reg = regs[i-1].regmap[hr]; + if (hr == EXCLUDE_REG || reg < 0) + continue; + if (!((regs[i-1].isconst >> hr) & 1)) + continue; + if (i > 1 && reg == regs[i-2].regmap[hr] && constmap[i-1][hr] == constmap[i-2][hr]) + continue; + emit_movimm(constmap[i-1][hr],0); + emit_storereg(reg, 0); + } + } emit_movimm(start+i*4,0); emit_writeword(0,&pcaddr); emit_far_call(do_insn_cmp); @@ -4292,6 +4354,7 @@ static void drc_dbg_emit_do_cmp(int i) //emit_writeword(0,&cycle); (void)get_reg2; restore_regs(reglist); + assem_debug("\\\\do_insn_cmp\n"); } #else #define drc_dbg_emit_do_cmp(x) @@ -4418,11 +4481,13 @@ void do_cc(int i,signed char i_regmap[],int *adj,int addr,int taken,int invert) else if(*adj==0||invert) { int cycles=CLOCK_ADJUST(count+2); // faster loop HACK +#if 0 if (t&&*adj) { int rel=t-i; if(-NO_CYCLE_PENALTY_THRtranslation_cache; @@ -6497,7 +6562,7 @@ void new_dynarec_clear_full() for(n=0;n<4096;n++) ll_clear(jump_dirty+n); } -void new_dynarec_init() +void new_dynarec_init(void) { SysPrintf("Init new dynarec\n"); @@ -6547,7 +6612,7 @@ void new_dynarec_init() SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); } -void new_dynarec_cleanup() +void new_dynarec_cleanup(void) { int n; #ifdef BASE_ADDR_DYNAMIC @@ -6569,16 +6634,25 @@ void new_dynarec_cleanup() static u_int *get_source_start(u_int addr, u_int *limit) { + if (!(new_dynarec_hacks & NDHACK_OVERRIDE_CYCLE_M)) + cycle_multiplier_override = 0; + if (addr < 0x00200000 || - (0xa0000000 <= addr && addr < 0xa0200000)) { + (0xa0000000 <= addr && addr < 0xa0200000)) + { // used for BIOS calls mostly? *limit = (addr&0xa0000000)|0x00200000; return (u_int *)(rdram + (addr&0x1fffff)); } else if (!Config.HLE && ( /* (0x9fc00000 <= addr && addr < 0x9fc80000) ||*/ - (0xbfc00000 <= addr && addr < 0xbfc80000))) { - // BIOS + (0xbfc00000 <= addr && addr < 0xbfc80000))) + { + // BIOS. The multiplier should be much higher as it's uncached 8bit mem, + // but timings in PCSX are too tied to the interpreter's BIAS + if (!(new_dynarec_hacks & NDHACK_OVERRIDE_CYCLE_M)) + cycle_multiplier_override = 200; + *limit = (addr & 0xfff00000) | 0x80000; return (u_int *)((u_char *)psxR + (addr&0x7ffff)); } @@ -7656,7 +7730,7 @@ int new_recompile_block(u_int addr) dirty_reg(&branch_regs[i-1],31); } memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); break; case RJUMP: memcpy(&branch_regs[i-1],¤t,sizeof(current)); @@ -7677,7 +7751,7 @@ int new_recompile_block(u_int addr) } #endif memcpy(&branch_regs[i-1].regmap_entry,&branch_regs[i-1].regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); break; case CJUMP: if((opcode[i-1]&0x3E)==4) // BEQ/BNE @@ -7704,7 +7778,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else if((opcode[i-1]&0x3E)==6) // BLEZ/BGTZ @@ -7729,7 +7803,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else // Alloc the delay slot in case the branch is taken @@ -7783,7 +7857,7 @@ int new_recompile_block(u_int addr) branch_regs[i-1].isconst=0; branch_regs[i-1].wasconst=0; memcpy(&branch_regs[i-1].regmap_entry,¤t.regmap,sizeof(current.regmap)); - memcpy(constmap[i],constmap[i-1],sizeof(current_constmap)); + memcpy(constmap[i],constmap[i-1],sizeof(constmap[i])); } else // Alloc the delay slot in case the branch is taken @@ -7880,7 +7954,7 @@ int new_recompile_block(u_int addr) if(!is_ds[i]) { regs[i].dirty=current.dirty; regs[i].isconst=current.isconst; - memcpy(constmap[i],current_constmap,sizeof(current_constmap)); + memcpy(constmap[i],current_constmap,sizeof(constmap[i])); } for(hr=0;hr=0) { @@ -9082,10 +9156,8 @@ int new_recompile_block(u_int addr) break; case 3: // Clear jump_out - #if defined(__arm__) || defined(__aarch64__) if((expirep&2047)==0) do_clear_cache(); - #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift); break;