X-Git-Url: https://notaz.gp2x.de/cgi-bin/gitweb.cgi?a=blobdiff_plain;f=libpcsxcore%2Fnew_dynarec%2Fassem_arm64.c;h=d35ad451eb11860d574f0209851af016eea51b7c;hb=a5cd72d0e598f037fd9d9f23948af5b2fb06e2eb;hp=ff0d1a6c293b0841953a44708eee201cc6863441;hpb=3039c914c0ac33ee89127db3bd4c53eb3c25cafd;p=pcsx_rearmed.git diff --git a/libpcsxcore/new_dynarec/assem_arm64.c b/libpcsxcore/new_dynarec/assem_arm64.c index ff0d1a6c..d35ad451 100644 --- a/libpcsxcore/new_dynarec/assem_arm64.c +++ b/libpcsxcore/new_dynarec/assem_arm64.c @@ -23,19 +23,15 @@ #include "pcnt.h" #include "arm_features.h" -#define unused __attribute__((unused)) - void do_memhandler_pre(); void do_memhandler_post(); /* Linker */ static void set_jump_target(void *addr, void *target) { - u_int *ptr = addr; + u_int *ptr = NDRC_WRITE_OFFSET(addr); intptr_t offset = (u_char *)target - (u_char *)addr; - ptr += ndrc_write_ofs / sizeof(ptr[0]); - if ((*ptr&0xFC000000) == 0x14000000) { // b assert(offset>=-134217728LL&&offset<134217728LL); *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff); @@ -47,7 +43,7 @@ static void set_jump_target(void *addr, void *target) // should only happen when jumping to an already compiled block (see add_jump_out) // a workaround would be to do a trampoline jump via a stub at the end of the block assert(-1048576 <= offset && offset < 1048576); - *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5); + *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5); } else if((*ptr&0x9f000000)==0x10000000) { // adr // generated by do_miniht_insert @@ -144,7 +140,7 @@ static unused const char *condname[16] = { static void output_w32(u_int word) { - *((u_int *)(out + ndrc_write_ofs)) = word; + *((u_int *)NDRC_WRITE_OFFSET(out)) = word; out += 4; } @@ -306,6 +302,12 @@ static void emit_add(u_int rs1, u_int rs2, u_int rt) output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt)); } +static void emit_adds(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_add64(u_int rs1, u_int rs2, u_int rt) { assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]); @@ -319,19 +321,37 @@ static void emit_adds64(u_int rs1, u_int rs2, u_int rt) } #define emit_adds_ptr emit_adds64 +static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_neg(u_int rs, u_int rt) { assem_debug("neg %s,%s\n",regname[rt],regname[rs]); output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt)); } +static void emit_negs(u_int rs, u_int rt) +{ + assem_debug("negs %s,%s\n",regname[rt],regname[rs]); + output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt)); +} + static void emit_sub(u_int rs1, u_int rs2, u_int rt) { assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); } -static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +static void emit_subs(u_int rs1, u_int rs2, u_int rt) +{ + assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]); + output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt)); +} + +static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) { assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); @@ -401,11 +421,32 @@ static void emit_movimm(u_int imm, u_int rt) } } +static void emit_movimm64(uint64_t imm, u_int rt) +{ + u_int shift, op, imm16, insns = 0; + for (shift = 0; shift < 4; shift++) { + imm16 = (imm >> shift * 16) & 0xffff; + if (!imm16) + continue; + op = insns ? 0xf2800000 : 0xd2800000; + assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16); + if (shift) + assem_debug(",lsl #%u", shift * 16); + assem_debug("\n"); + output_w32(op | (shift << 21) | imm16_rd(imm16, rt)); + insns++; + } + if (!insns) { + assem_debug("movz %s,#0\n", regname64[rt]); + output_w32(0xd2800000 | imm16_rd(0, rt)); + } +} + static void emit_readword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 3) && offset <= 16380) { - assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset); + assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset)); output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt)); } else @@ -416,7 +457,7 @@ static void emit_readdword(void *addr, u_int rt) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { - assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset)); output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt)); } else @@ -446,7 +487,7 @@ static void emit_loadreg(u_int r, u_int hr) //case HIREG: addr = &hi; break; //case LOREG: addr = &lo; break; case CCREG: addr = &cycle_count; break; - case CSREG: addr = &Status; break; + case CSREG: addr = &psxRegs.CP0.n.SR; break; case INVCP: addr = &invc_ptr; is64 = 1; break; case ROREG: addr = &ram_offset; is64 = 1; break; default: @@ -465,7 +506,7 @@ static void emit_writeword(u_int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 3) && offset <= 16380) { - assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset); + assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset)); output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt)); } else @@ -476,7 +517,7 @@ static void emit_writedword(u_int rt, void *addr) { uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local; if (!(offset & 7) && offset <= 32760) { - assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset); + assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset)); output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt)); } else @@ -547,6 +588,12 @@ static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt) output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt)); } +static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt) +{ + assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift); + output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt)); +} + static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt) { assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm); @@ -578,28 +625,43 @@ static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm); output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt)); } - else if (imm < 16777216) { - assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000); - output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt)); - if ((imm & 0xfff) || s) { - assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff); - output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt)); + else if (imm < 16777216 && (!(imm & 0xfff) || !s)) { + assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000); + output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt)); + if (imm & 0xfff) { + assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff); + output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt)); } } - else if (-imm < 16777216) { - assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000); - output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt)); - if ((imm & 0xfff) || s) { - assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff); - output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt)); + else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) { + assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000); + output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt)); + if (-imm & 0xfff) { + assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff); + output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt)); } } - else - abort(); + else { + u_int tmp = rt; + assert(!is64); + if (rs == rt) { + host_tempreg_acquire(); + tmp = HOST_TEMPREG; + } + emit_movimm(imm, tmp); + assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]); + output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt)); + if (tmp == HOST_TEMPREG) + host_tempreg_release(); + } } static void emit_addimm(u_int rs, uintptr_t imm, u_int rt) { + if (imm == 0) { + emit_mov(rs, rt); + return; + } emit_addimm_s(0, 0, rs, imm, rt); } @@ -608,11 +670,21 @@ static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt) emit_addimm_s(0, 1, rs, imm, rt); } +static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt) +{ + emit_addimm64(rs, imm, rt); +} + static void emit_addimm_and_set_flags(int imm, u_int rt) { emit_addimm_s(1, 0, rt, imm, rt); } +static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt) +{ + emit_addimm_s(1, 0, rs, imm, rt); +} + static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt) { const char *names[] = { "and", "orr", "eor", "ands" }; @@ -818,6 +890,12 @@ static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt) output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt)); } +static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt) +{ + assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]); + output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt)); +} + static void emit_slti32(u_int rs,int imm,u_int rt) { if(rs!=rt) emit_zeroreg(rt); @@ -946,6 +1024,13 @@ static void emit_jge(const void *a) output_w32(0x54000000 | (offset << 5) | COND_GE); } +static void emit_jo(const void *a) +{ + assem_debug("bvs %p\n", a); + u_int offset = genjmpcc(a); + output_w32(0x54000000 | (offset << 5) | COND_VS); +} + static void emit_jno(const void *a) { assem_debug("bvc %p\n", a); @@ -969,9 +1054,11 @@ static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r) output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r)); } -static unused void emit_cbz(const void *a, u_int r) +static void *emit_cbz(u_int r, const void *a) { + void *ret = out; emit_cb(0, 0, a, r); + return ret; } static void emit_jmpreg(u_int r) @@ -1179,14 +1266,11 @@ static void emit_clz(u_int rs, u_int rt) } // special case for checking invalid_code -static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm) +static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt) { - host_tempreg_acquire(); - emit_shrimm(r, 12, HOST_TEMPREG); - assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]); - output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG)); - emit_cmpimm(HOST_TEMPREG, imm); - host_tempreg_release(); + emit_shrimm(r, 12, rt); + assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]); + output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt)); } // special for loadlr_assemble, rs2 is destroyed @@ -1331,16 +1415,7 @@ static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int r } // just move the whole thing. At least on Linux all addresses // seem to be 48bit, so 3 insns - not great not terrible - assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff); - output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt)); - assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff); - output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt)); - assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff); - output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt)); - if (rt_val >> 48) { - assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff); - output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt)); - } + emit_movimm64(rt_val, rt); } // trashes x2 @@ -1389,7 +1464,7 @@ static void do_readstub(int n) u_int reglist = stubs[n].e; const signed char *i_regmap = i_regs->regmap; int rt; - if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) { + if(dops[i].itype==C2LS||dops[i].itype==LOADLR) { rt=get_reg(i_regmap,FTEMP); }else{ rt=get_reg(i_regmap,dops[i].rt1); @@ -1419,7 +1494,7 @@ static void do_readstub(int n) emit_adds64(temp2,temp2,temp2); handler_jump=out; emit_jc(0); - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { switch(type) { case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break; case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break; @@ -1454,7 +1529,7 @@ static void do_readstub(int n) emit_addimm(cc<0?2:cc,(int)stubs[n].d,2); emit_far_call(handler); // (no cycle reload after read) - if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { + if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) { loadstore_extend(type,0,rt); } if(restore_jump) @@ -1515,8 +1590,13 @@ static void inline_readstub(enum stub_type type, int i, u_int addr, emit_addimm(cc<0?2:cc,adj,2); if(is_dynamic) { uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1; - emit_adrp((void *)l1, 1); - emit_addimm64(1, l1 & 0xfff, 1); + intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl); + if (-4294967296l <= offset && offset < 4294967296l) { + emit_adrp((void *)l1, 1); + emit_addimm64(1, l1 & 0xfff, 1); + } + else + emit_movimm64(l1, 1); } else emit_far_call(do_memhandler_pre); @@ -1540,7 +1620,7 @@ static void do_writestub(int n) u_int reglist=stubs[n].e; signed char *i_regmap=i_regs->regmap; int rt,r; - if(dops[i].itype==C1LS||dops[i].itype==C2LS) { + if(dops[i].itype==C2LS) { rt=get_reg(i_regmap,r=FTEMP); }else{ rt=get_reg(i_regmap,r=dops[i].rs2); @@ -1806,8 +1886,10 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) // div 0 quotient (remainder is already correct) host_tempreg_acquire(); - if (dops[i].opcode2 == 0x1A) // DIV - emit_sub_asrimm(0,numerator,31,HOST_TEMPREG); + if (dops[i].opcode2 == 0x1A) { // DIV + emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG); + emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG); + } else emit_movimm(~0,HOST_TEMPREG); emit_test(denominator,denominator); @@ -1831,8 +1913,10 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) if (hr >= 0) emit_mov(numerator,hr); if (lr >= 0) { - if (dops[i].opcode2 == 0x1A) // DIV - emit_sub_asrimm(0,numerator,31,lr); + if (dops[i].opcode2 == 0x1A) { // DIV + emit_add_lsrimm(WZR,numerator,31,lr); + emit_orn_asrimm(lr,numerator,31,lr); + } else emit_movimm(~0,lr); } @@ -1842,6 +1926,17 @@ static void multdiv_assemble_arm64(int i, const struct regstat *i_regs) if (lr >= 0) emit_movimm(~0,lr); } } + else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0) + { + signed char denominator = get_reg(i_regs->regmap, dops[i].rs2); + assert(denominator >= 0); + if (hr >= 0) emit_zeroreg(hr); + if (lr >= 0) { + emit_zeroreg(lr); + emit_test(denominator, denominator); + emit_csinvne_reg(lr, lr, lr); + } + } else { // Multiply by zero is zero. @@ -1900,7 +1995,7 @@ static void do_miniht_insert(u_int return_address,u_int rt,int temp) { emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]); } -static void clear_cache_arm64(char *start, char *end) +static unused void clear_cache_arm64(char *start, char *end) { // Don't rely on GCC's __clear_cache implementation, as it caches // icache/dcache cache line sizes, that can vary between cores on @@ -1945,13 +2040,13 @@ static void clear_cache_arm64(char *start, char *end) static void arch_init(void) { uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops; - struct tramp_insns *ops = ndrc->tramp.ops, *opsw; + struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops); size_t i; assert(!(diff & 3)); - opsw = start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); + start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) { - opsw[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] - opsw[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 + ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val] + ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17 } end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops)); }