From 919981d0bca7a0898133362a91405395678612e3 Mon Sep 17 00:00:00 2001 From: notaz Date: Sun, 14 Nov 2021 01:20:38 +0200 Subject: [PATCH] drc: update cache flushing as of now the arm64 __clear_cache workaround is still needed --- libpcsxcore/new_dynarec/assem_arm.c | 43 ---------------- libpcsxcore/new_dynarec/assem_arm64.c | 72 +++++++++++++------------- libpcsxcore/new_dynarec/new_dynarec.c | 74 ++++++++++++++++++++------- libpcsxcore/new_dynarec/new_dynarec.h | 8 +-- 4 files changed, 95 insertions(+), 102 deletions(-) diff --git a/libpcsxcore/new_dynarec/assem_arm.c b/libpcsxcore/new_dynarec/assem_arm.c index 62038a20..f9333f2d 100644 --- a/libpcsxcore/new_dynarec/assem_arm.c +++ b/libpcsxcore/new_dynarec/assem_arm.c @@ -2348,49 +2348,6 @@ static void do_miniht_insert(u_int return_address,int rt,int temp) { #endif } -static void mark_clear_cache(void *target) -{ - u_long offset = (u_char *)target - translation_cache; - u_int mask = 1u << ((offset >> 12) & 31); - if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((u_long)target & ~4095ul); - start_tcache_write(start, start + 4096); - needs_clear_cache[offset >> 17] |= mask; - } -} - -// Clearing the cache is rather slow on ARM Linux, so mark the areas -// that need to be cleared, and then only clear these areas once. -static void do_clear_cache() -{ - int i,j; - for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) - { - u_int bitmap=needs_clear_cache[i]; - if(bitmap) { - u_char *start, *end; - for(j=0;j<32;j++) - { - if(bitmap&(1<>3][0]); } -static void mark_clear_cache(void *target) +static void clear_cache_arm64(char *start, char *end) { - u_long offset = (u_char *)target - translation_cache; - u_int mask = 1u << ((offset >> 12) & 31); - if (!(needs_clear_cache[offset >> 17] & mask)) { - char *start = (char *)((u_long)target & ~4095ul); - start_tcache_write(start, start + 4096); - needs_clear_cache[offset >> 17] |= mask; + // Don't rely on GCC's __clear_cache implementation, as it caches + // icache/dcache cache line sizes, that can vary between cores on + // big.LITTLE architectures. + uint64_t addr, ctr_el0; + static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff; + size_t isize, dsize; + + __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0)); + isize = 4 << ((ctr_el0 >> 0) & 0xf); + dsize = 4 << ((ctr_el0 >> 16) & 0xf); + + // use the global minimum cache line size + icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize; + dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize; + + /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is + not required for instruction to data coherence. */ + if ((ctr_el0 & (1 << 28)) == 0x0) { + addr = (uint64_t)start & ~(uint64_t)(dsize - 1); + for (; addr < (uint64_t)end; addr += dsize) + // use "civac" instead of "cvau", as this is the suggested workaround for + // Cortex-A53 errata 819472, 826319, 827319 and 824069. + __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory"); } -} + __asm__ volatile("dsb ish" : : : "memory"); -// Clearing the cache is rather slow on ARM Linux, so mark the areas -// that need to be cleared, and then only clear these areas once. -static void do_clear_cache() -{ - int i,j; - for (i=0;i<(1<<(TARGET_SIZE_2-17));i++) - { - u_int bitmap=needs_clear_cache[i]; - if(bitmap) { - u_char *start, *end; - for(j=0;j<32;j++) - { - if(bitmap&(1<translation_cache; + u_int mask = 1u << ((offset >> 12) & 31); + if (!(needs_clear_cache[offset >> 17] & mask)) { + char *start = (char *)((uintptr_t)target & ~4095l); + start_tcache_write(start, start + 4095); + needs_clear_cache[offset >> 17] |= mask; + } +} + +// Clearing the cache is rather slow on ARM Linux, so mark the areas +// that need to be cleared, and then only clear these areas once. +static void do_clear_cache(void) +{ + int i, j; + for (i = 0; i < (1<<(TARGET_SIZE_2-17)); i++) + { + u_int bitmap = needs_clear_cache[i]; + if (!bitmap) + continue; + for (j = 0; j < 32; j++) + { + u_char *start, *end; + if (!(bitmap & (1<translation_cache + i*131072 + j*4096; + end = start + 4095; + for (j++; j < 32; j++) { + if (!(bitmap & (1<addr,head->vaddr); void *host_addr=find_extjump_insn(head->addr); - #if defined(__arm__) || defined(__aarch64__) - mark_clear_cache(host_addr); - #endif + mark_clear_cache(host_addr); set_jump_target(host_addr, head->addr); } head=head->next; @@ -1082,9 +1125,7 @@ static void invalidate_page(u_int page) while(head!=NULL) { inv_debug("INVALIDATE: kill pointer to %x (%p)\n",head->vaddr,head->addr); void *host_addr=find_extjump_insn(head->addr); - #if defined(__arm__) || defined(__aarch64__) - mark_clear_cache(host_addr); - #endif + mark_clear_cache(host_addr); set_jump_target(host_addr, head->addr); next=head->next; free(head); @@ -1107,9 +1148,7 @@ static void invalidate_block_range(u_int block, u_int first, u_int last) for(first=page+1;firsttranslation_cache; @@ -6497,7 +6537,7 @@ void new_dynarec_clear_full() for(n=0;n<4096;n++) ll_clear(jump_dirty+n); } -void new_dynarec_init() +void new_dynarec_init(void) { SysPrintf("Init new dynarec\n"); @@ -6547,7 +6587,7 @@ void new_dynarec_init() SysPrintf("warning: RAM is not directly mapped, performance will suffer\n"); } -void new_dynarec_cleanup() +void new_dynarec_cleanup(void) { int n; #ifdef BASE_ADDR_DYNAMIC @@ -9082,10 +9122,8 @@ int new_recompile_block(u_int addr) break; case 3: // Clear jump_out - #if defined(__arm__) || defined(__aarch64__) if((expirep&2047)==0) do_clear_cache(); - #endif ll_remove_matching_addrs(jump_out+(expirep&2047),base,shift); ll_remove_matching_addrs(jump_out+2048+(expirep&2047),base,shift); break; diff --git a/libpcsxcore/new_dynarec/new_dynarec.h b/libpcsxcore/new_dynarec/new_dynarec.h index 1bec5e1d..a19bff0b 100644 --- a/libpcsxcore/new_dynarec/new_dynarec.h +++ b/libpcsxcore/new_dynarec/new_dynarec.h @@ -11,12 +11,12 @@ extern int cycle_multiplier; // 100 for 1.0 #define NDHACK_GTE_NO_FLAGS (1<<2) extern int new_dynarec_hacks; -void new_dynarec_init(); -void new_dynarec_cleanup(); -void new_dynarec_clear_full(); +void new_dynarec_init(void); +void new_dynarec_cleanup(void); +void new_dynarec_clear_full(void); void new_dyna_start(void *context); int new_dynarec_save_blocks(void *save, int size); void new_dynarec_load_blocks(const void *save, int size); -void invalidate_all_pages(); +void invalidate_all_pages(void); void invalidate_block(unsigned int block); -- 2.39.2