X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm64.c;h=271bee5807e69d7663b20503ad0126c800fa5e99;hb=9b495f6ec3f28cf5ed1d41f6af16a9967fcf3e64;hp=3a88f9efc78594b45aed93866d5da9840fc20d95;hpb=104df9d3b15f92d5c73d2d6beb6f01f0cc158e03;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index 3a88f9ef..271bee58 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,15 +23,13 @@ #include "pcnt.h" #include "arm_features.h" -#define unused __attribute__((unused)) - void do_memhandler_pre(); void do_memhandler_post(); /* Linker */ static void set_jump_target(void *addr, void *target) { - u_int *ptr = addr; + u_int *ptr = NDRC_WRITE_OFFSET(addr); intptr_t offset = (u_char *)target - (u_char *)addr; if ((*ptr&0xFC000000) == 0x14000000) { // b @@ -45,7 +43,7 @@ static void set_jump_target(void *addr, void *target) // should only happen when jumping to an already compiled block (see add_jump_out) // a workaround would be to do a trampoline jump via a stub at the end of the block assert(-1048576 <= offset && offset < 1048576); - *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5); + *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5); } else if((*ptr&0x9f000000)==0x10000000) { // adr // generated by do_miniht_insert @@ -142,7 +140,7 @@ static unused const char *condname[16] = { static void output_w32(u_int word) { - *((u_int *)out) = word; + *((u_int *)NDRC_WRITE_OFFSET(out)) = word; out += 4; } @@ -399,6 +397,27 @@ static void emit_movimm(u_int imm, u_int rt) } } +static void emit_movimm64(uint64_t imm, u_int rt) +{ + u_int shift, op, imm16, insns = 0; + for (shift = 0; shift < 4; shift++) { + imm16 = (imm >> shift * 16) & 0xffff; + if (!imm16) + continue; + op = insns ? 0xf2800000 : 0xd2800000; + assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16); + if (shift) + assem_debug(",lsl #%u", shift * 16); + assem_debug("\n"); + output_w32(op | (shift << 21) | imm16_rd(imm16, rt)); + insns++; + } + if (!insns) { + assem_debug("movz %s,#0\n", regname64[rt]); + output_w32(0xd2800000 | imm16_rd(0, rt)); + } +} + static void emit_readword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; @@ -598,6 +617,10 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) { + if (imm == 0) { + emit_mov(rs, rt); + return; + } emit_addimm_s(0, 0, rs, imm, rt); } @@ -838,6 +861,12 @@ static void emit_cmp(u_int rs,u_int rt) output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR)); } +static void emit_cmpcs(u_int rs,u_int rt) +{ + assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]); + output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0)); +} + static void emit_set_gz32(u_int rs, u_int rt) { //assem_debug("set_gz32\n"); @@ -961,9 +990,11 @@ static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r) output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r)); } -static unused void emit_cbz(const void *a, u_int r) +static void *emit_cbz(u_int r, const void *a) { + void *ret = out; emit_cb(0, 0, a, r); + return ret; } static void emit_jmpreg(u_int r) @@ -1171,14 +1202,11 @@ static void emit_clz(u_int rs, u_int rt) } // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm) +static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt) { - host_tempreg_acquire(); - emit_shrimm(r, 12, HOST_TEMPREG); - assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]); - output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); - emit_cmpimm(HOST_TEMPREG, imm); - host_tempreg_release(); + emit_shrimm(r, 12, rt); + assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]); + output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt)); } // special for loadlr_assemble, rs2 is destroyed @@ -1323,16 +1351,7 @@ static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int r } // just move the whole thing. At least on Linux all addresses // seem to be 48bit, so 3 insns - not great not terrible - assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff); - output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt)); - assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff); - output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt)); - assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff); - output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt)); - if (rt_val >> 48) { - assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff); - output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt)); - } + emit_movimm64(rt_val, rt); } // trashes x2 @@ -1507,8 +1526,13 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_addimm(cc<0?2:cc,adj,2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; - emit_adrp((void *)l1, 1); - emit_addimm64(1, l1 & 0xfff, 1); + intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl); + if (-4294967296l <= offset && offset < 4294967296l) { + emit_adrp((void *)l1, 1); + emit_addimm64(1, l1 & 0xfff, 1); + } + else + emit_movimm64(l1, 1); } else emit_far_call(do_memhandler_pre); @@ -1892,7 +1916,7 @@ static void do_miniht_insert(u_int return_address,u_int rt,int temp) { emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); } -static void clear_cache_arm64(char *start, char *end) +static unused void clear_cache_arm64(char *start, char *end) { // Don't rely on GCC's __clear_cache implementation, as it caches // icache/dcache cache line sizes, that can vary between cores on @@ -1937,7 +1961,7 @@ static void clear_cache_arm64(char *start, char *end) static void arch_init(void) { uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops; - struct tramp_insns *ops = ndrc->tramp.ops; + struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops); size_t i; assert(!(diff & 3)); start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));