1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define CALLER_SAVE_REGS 0x0007ffff
28 #define unused __attribute__((unused))
30 void do_memhandler_pre();
31 void do_memhandler_post();
34 static void set_jump_target(void *addr, void *target)
37 intptr_t offset = (u_char *)target - (u_char *)addr;
39 if ((*ptr&0xFC000000) == 0x14000000) { // b
40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_link)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
49 assert(-1048576 <= offset && offset < 1048576);
50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
52 else if((*ptr&0x9f000000)==0x10000000) { // adr
53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
58 abort(); // should not happen
61 // from a pointer to external jump stub (which was produced by emit_extjump2)
62 // find where the jumping insn is
63 static void *find_extjump_insn(void *stub)
65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
71 // find where external branch is liked to using addr of it's stub:
72 // get address that the stub loads (dyna_linker arg1),
73 // treat it as a pointer to branch insn,
74 // return addr where that branch jumps to
75 static void *get_pointer(void *stub)
77 int *i_ptr = find_extjump_insn(stub);
78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
87 // Allocate a specific ARM register.
88 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
108 // Alloc cycle count into dedicated register
109 static void alloc_cc(struct regstat *cur,int i)
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
119 static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
126 static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
138 static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
143 static void output_w32(u_int word)
145 *((u_int *)out) = word;
149 static void output_w64(uint64_t dword)
151 *((uint64_t *)out) = dword;
156 static u_int rm_rd(u_int rm, u_int rd)
160 return (rm << 16) | rd;
164 static u_int rn_rd(u_int rn, u_int rd)
168 return (rn << 5) | rd;
171 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
176 return (rm << 16) | (rn << 5) | rd;
179 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
185 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
194 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
200 static u_int imm16_rd(u_int imm16, u_int rd)
202 assert(imm16 < 0x10000);
204 return (imm16 << 5) | rd;
207 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
209 assert(imm12 < 0x1000);
212 return (imm12 << 10) | (rn << 5) | rd;
215 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
217 assert(imm9 < 0x200);
220 return (imm9 << 12) | (rn << 5) | rd;
223 static u_int imm19_rt(u_int imm19, u_int rt)
225 assert(imm19 < 0x80000);
227 return (imm19 << 5) | rt;
230 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
240 static u_int genjmp(const u_char *addr)
242 intptr_t offset = addr - out;
243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
244 if (offset < -134217728 || offset > 134217727) {
245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
249 return ((u_int)offset >> 2) & 0x03ffffff;
252 static u_int genjmpcc(const u_char *addr)
254 intptr_t offset = addr - out;
255 if ((uintptr_t)addr < 3) return 0;
256 if (offset < -1048576 || offset > 1048572) {
257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
261 return ((u_int)offset >> 2) & 0x7ffff;
264 static uint32_t is_mask(u_int value)
266 return value && ((value + 1) & value) == 0;
269 // This function returns true if the argument contains a
270 // non-empty sequence of ones (possibly rotated) with the remainder zero.
271 static uint32_t is_rotated_mask(u_int value)
273 if (value == 0 || value == ~0)
275 if (is_mask((value - 1) | value))
277 return is_mask((~value - 1) | ~value);
280 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
282 int lzeros, tzeros, ones;
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
304 static void emit_mov(u_int rs, u_int rt)
306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
310 static void emit_mov64(u_int rs, u_int rt)
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
316 static void emit_add(u_int rs1, u_int rs2, u_int rt)
318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
322 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
328 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
334 static void emit_neg(u_int rs, u_int rt)
336 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
337 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
340 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
342 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
343 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
346 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
348 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
349 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
352 static void emit_movz(u_int imm, u_int rt)
354 assem_debug("movz %s,#%#x\n", regname[rt], imm);
355 output_w32(0x52800000 | imm16_rd(imm, rt));
358 static void emit_movz_lsl16(u_int imm, u_int rt)
360 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x52a00000 | imm16_rd(imm, rt));
364 static void emit_movn(u_int imm, u_int rt)
366 assem_debug("movn %s,#%#x\n", regname[rt], imm);
367 output_w32(0x12800000 | imm16_rd(imm, rt));
370 static void emit_movn_lsl16(u_int imm,u_int rt)
372 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
373 output_w32(0x12a00000 | imm16_rd(imm, rt));
376 static void emit_movk(u_int imm,u_int rt)
378 assem_debug("movk %s,#%#x\n", regname[rt], imm);
379 output_w32(0x72800000 | imm16_rd(imm, rt));
382 static void emit_movk_lsl16(u_int imm,u_int rt)
385 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
386 output_w32(0x72a00000 | imm16_rd(imm, rt));
389 static void emit_zeroreg(u_int rt)
394 static void emit_movimm(u_int imm, u_int rt)
398 else if ((~imm) < 65536)
400 else if ((imm&0xffff) == 0)
401 emit_movz_lsl16(imm >> 16, rt);
402 else if (((~imm)&0xffff) == 0)
403 emit_movn_lsl16(~imm >> 16, rt);
404 else if (is_rotated_mask(imm)) {
406 gen_logical_imm(imm, &immr, &imms);
407 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
408 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
411 emit_movz(imm & 0xffff, rt);
412 emit_movk_lsl16(imm >> 16, rt);
416 static void emit_readword(void *addr, u_int rt)
418 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
419 if (!(offset & 3) && offset <= 16380) {
420 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
421 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
427 static void emit_readdword(void *addr, u_int rt)
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 7) && offset <= 32760) {
431 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
432 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 static void emit_readshword(void *addr, u_int rt)
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
449 static void emit_loadreg(u_int r, u_int hr)
456 void *addr = &psxRegs.GPR.r[r];
458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
462 case INVCP: addr = &invc_ptr; is64 = 1; break;
463 default: assert(r < 34); break;
466 emit_readdword(addr, hr);
468 emit_readword(addr, hr);
472 static void emit_writeword(u_int rt, void *addr)
474 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
475 if (!(offset & 3) && offset <= 16380) {
476 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
477 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
483 static void emit_writedword(u_int rt, void *addr)
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 7) && offset <= 32760) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
488 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
494 static void emit_storereg(u_int r, u_int hr)
497 void *addr = &psxRegs.GPR.r[r];
499 //case HIREG: addr = &hi; break;
500 //case LOREG: addr = &lo; break;
501 case CCREG: addr = &cycle_count; break;
502 default: assert(r < 34); break;
504 emit_writeword(hr, addr);
507 static void emit_test(u_int rs, u_int rt)
509 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
510 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
513 static void emit_testimm(u_int rs, u_int imm)
516 assem_debug("tst %s,#%#x\n", regname[rs], imm);
517 assert(is_rotated_mask(imm)); // good enough for PCSX
518 gen_logical_imm(imm, &immr, &imms);
519 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
522 static void emit_not(u_int rs,u_int rt)
524 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
525 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
528 static void emit_and(u_int rs1,u_int rs2,u_int rt)
530 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
531 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
534 static void emit_or(u_int rs1,u_int rs2,u_int rt)
536 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
537 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
540 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
542 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
543 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
546 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
548 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
549 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
552 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
554 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
555 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
558 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
560 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
561 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
564 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
566 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
567 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
570 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
572 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
573 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
576 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
578 unused const char *st = s ? "s" : "";
579 s = s ? 0x20000000 : 0;
580 is64 = is64 ? 0x80000000 : 0;
582 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
583 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
585 else if (-imm < 4096) {
586 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
587 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
589 else if (imm < 16777216) {
590 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
591 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
597 else if (-imm < 16777216) {
598 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
599 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
602 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
609 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
611 emit_addimm_s(0, 0, rs, imm, rt);
614 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
616 emit_addimm_s(0, 1, rs, imm, rt);
619 static void emit_addimm_and_set_flags(int imm, u_int rt)
621 emit_addimm_s(1, 0, rt, imm, rt);
624 static void emit_addimm_no_flags(u_int imm,u_int rt)
626 emit_addimm(rt,imm,rt);
629 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
631 const char *names[] = { "and", "orr", "eor", "ands" };
632 const char *name = names[op];
635 if (is_rotated_mask(imm)) {
636 gen_logical_imm(imm, &immr, &imms);
637 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
638 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
641 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
642 host_tempreg_acquire();
643 emit_movimm(imm, HOST_TEMPREG);
644 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
645 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
646 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
647 host_tempreg_release();
652 static void emit_andimm(u_int rs, u_int imm, u_int rt)
657 emit_logicop_imm(0, rs, imm, rt);
660 static void emit_orimm(u_int rs, u_int imm, u_int rt)
667 emit_logicop_imm(1, rs, imm, rt);
670 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
677 emit_logicop_imm(2, rs, imm, rt);
680 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
682 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
686 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
688 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
692 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
694 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
698 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
700 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
704 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
710 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
712 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
716 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
718 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
719 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
722 static void emit_signextend16(u_int rs, u_int rt)
724 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
725 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
728 static void emit_shl(u_int rs,u_int rshift,u_int rt)
730 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
731 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
734 static void emit_shr(u_int rs,u_int rshift,u_int rt)
736 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
740 static void emit_sar(u_int rs,u_int rshift,u_int rt)
742 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
746 static void emit_cmpimm(u_int rs, u_int imm)
749 assem_debug("cmp %s,%#x\n", regname[rs], imm);
750 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
752 else if (-imm < 4096) {
753 assem_debug("cmn %s,%#x\n", regname[rs], imm);
754 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
756 else if (imm < 16777216 && !(imm & 0xfff)) {
757 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
758 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
761 host_tempreg_acquire();
762 emit_movimm(imm, HOST_TEMPREG);
763 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
764 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
765 host_tempreg_release();
769 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
771 assert(imm == 0 || imm == 1);
772 assert(cond0 < 0x10);
773 assert(cond1 < 0x10);
775 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
776 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
778 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
779 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
783 static void emit_cmovne_imm(u_int imm,u_int rt)
785 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
788 static void emit_cmovl_imm(u_int imm,u_int rt)
790 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
793 static void emit_cmovb_imm(int imm,u_int rt)
795 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
798 static void emit_cmoveq_reg(u_int rs,u_int rt)
800 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
801 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
804 static void emit_cmovne_reg(u_int rs,u_int rt)
806 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
810 static void emit_cmovl_reg(u_int rs,u_int rt)
812 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
816 static void emit_cmovs_reg(u_int rs,u_int rt)
818 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
822 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
824 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
825 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
828 static void emit_slti32(u_int rs,int imm,u_int rt)
830 if(rs!=rt) emit_zeroreg(rt);
832 if(rs==rt) emit_movimm(0,rt);
833 emit_cmovl_imm(1,rt);
836 static void emit_sltiu32(u_int rs,int imm,u_int rt)
838 if(rs!=rt) emit_zeroreg(rt);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovb_imm(1,rt);
844 static void emit_cmp(u_int rs,u_int rt)
846 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
847 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
850 static void emit_set_gz32(u_int rs, u_int rt)
852 //assem_debug("set_gz32\n");
855 emit_cmovl_imm(0,rt);
858 static void emit_set_nz32(u_int rs, u_int rt)
860 //assem_debug("set_nz32\n");
861 if(rs!=rt) emit_mov(rs,rt);
863 emit_cmovne_imm(1,rt);
866 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
868 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
869 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
871 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
872 emit_cmovl_imm(1,rt);
875 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
877 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
878 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
880 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
881 emit_cmovb_imm(1,rt);
884 static int can_jump_or_call(const void *a)
886 intptr_t diff = (u_char *)a - out;
887 return (-134217728 <= diff && diff <= 134217727);
890 static void emit_call(const void *a)
892 intptr_t diff = (u_char *)a - out;
893 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
895 if (-134217728 <= diff && diff <= 134217727)
896 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
901 static void emit_jmp(const void *a)
903 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
904 u_int offset = genjmp(a);
905 output_w32(0x14000000 | offset);
908 static void emit_jne(const void *a)
910 assem_debug("bne %p\n", a);
911 u_int offset = genjmpcc(a);
912 output_w32(0x54000000 | (offset << 5) | COND_NE);
915 static void emit_jeq(const void *a)
917 assem_debug("beq %p\n", a);
918 u_int offset = genjmpcc(a);
919 output_w32(0x54000000 | (offset << 5) | COND_EQ);
922 static void emit_js(const void *a)
924 assem_debug("bmi %p\n", a);
925 u_int offset = genjmpcc(a);
926 output_w32(0x54000000 | (offset << 5) | COND_MI);
929 static void emit_jns(const void *a)
931 assem_debug("bpl %p\n", a);
932 u_int offset = genjmpcc(a);
933 output_w32(0x54000000 | (offset << 5) | COND_PL);
936 static void emit_jl(const void *a)
938 assem_debug("blt %p\n", a);
939 u_int offset = genjmpcc(a);
940 output_w32(0x54000000 | (offset << 5) | COND_LT);
943 static void emit_jge(const void *a)
945 assem_debug("bge %p\n", a);
946 u_int offset = genjmpcc(a);
947 output_w32(0x54000000 | (offset << 5) | COND_GE);
950 static void emit_jno(const void *a)
952 assem_debug("bvc %p\n", a);
953 u_int offset = genjmpcc(a);
954 output_w32(0x54000000 | (offset << 5) | COND_VC);
957 static void emit_jc(const void *a)
959 assem_debug("bcs %p\n", a);
960 u_int offset = genjmpcc(a);
961 output_w32(0x54000000 | (offset << 5) | COND_CS);
964 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
966 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
967 u_int offset = genjmpcc(a);
968 is64 = is64 ? 0x80000000 : 0;
969 isnz = isnz ? 0x01000000 : 0;
970 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
973 static void emit_cbz(const void *a, u_int r)
978 static void emit_jmpreg(u_int r)
980 assem_debug("br %s\n", regname64[r]);
981 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
984 static void emit_retreg(u_int r)
986 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
987 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
990 static void emit_ret(void)
995 static void emit_adr(void *addr, u_int rt)
997 intptr_t offset = (u_char *)addr - out;
998 assert(-1048576 <= offset && offset < 1048576);
1000 assem_debug("adr x%d,#%#lx\n", rt, offset);
1001 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1004 static void emit_adrp(void *addr, u_int rt)
1006 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1007 assert(-4294967296l <= offset && offset < 4294967296l);
1010 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1011 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1014 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1016 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1017 assert(-256 <= offset && offset < 256);
1018 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1021 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1023 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1024 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1027 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1029 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1030 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1033 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1035 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1036 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1039 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1041 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1045 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1047 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1051 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1053 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1057 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1059 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1063 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1065 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1069 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1071 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1075 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1077 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1082 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1084 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1089 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1091 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1096 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1098 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1103 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1105 if (!(offset & 3) && (u_int)offset <= 16380) {
1106 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1107 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1109 else if (-256 <= offset && offset < 256) {
1110 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1111 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1117 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1119 if (!(offset & 1) && (u_int)offset <= 8190) {
1120 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1121 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1123 else if (-256 <= offset && offset < 256) {
1124 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1125 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1131 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1133 if ((u_int)offset < 4096) {
1134 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1135 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1139 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1145 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1147 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1151 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1153 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1157 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1159 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1160 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1163 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1165 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1169 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1171 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1175 static void emit_clz(u_int rs, u_int rt)
1177 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1178 output_w32(0x5ac01000 | rn_rd(rs, rt));
1181 // special case for checking invalid_code
1182 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1184 host_tempreg_acquire();
1185 emit_shrimm(r, 12, HOST_TEMPREG);
1186 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1187 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1188 emit_cmpimm(HOST_TEMPREG, imm);
1189 host_tempreg_release();
1192 // special for loadlr_assemble, rs2 is destroyed
1193 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1195 emit_shl(rs2, shift, rs2);
1196 emit_bic(rs1, rs2, rt);
1199 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1201 emit_shr(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
1205 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1207 output_w32(0x58000000 | imm19_rt(ofs, rt));
1210 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1212 u_int op = 0xb9000000;
1213 unused const char *ldst = is_st ? "st" : "ld";
1214 unused char rp = is64 ? 'x' : 'w';
1215 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1216 is64 = is64 ? 1 : 0;
1217 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1218 ofs = (ofs >> (2+is64));
1219 if (!is_st) op |= 0x00400000;
1220 if (is64) op |= 0x40000000;
1221 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1224 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1226 u_int op = 0x29000000;
1227 unused const char *ldst = is_st ? "st" : "ld";
1228 unused char rp = is64 ? 'x' : 'w';
1229 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1230 is64 = is64 ? 1 : 0;
1231 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1232 ofs = (ofs >> (2+is64));
1233 assert(-64 <= ofs && ofs <= 63);
1235 if (!is_st) op |= 0x00400000;
1236 if (is64) op |= 0x80000000;
1237 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1240 static void save_load_regs_all(int is_store, u_int reglist)
1244 for (r = 0; reglist; r++, reglist >>= 1) {
1248 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1254 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1257 assert(ofs <= SSP_CALLER_REGS);
1260 // Save registers before function call
1261 static void save_regs(u_int reglist)
1263 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1264 save_load_regs_all(1, reglist);
1267 // Restore registers after function call
1268 static void restore_regs(u_int reglist)
1270 reglist &= CALLER_SAVE_REGS;
1271 save_load_regs_all(0, reglist);
1274 /* Stubs/epilogue */
1276 static void literal_pool(int n)
1281 static void literal_pool_jumpover(int n)
1285 // parsed by get_pointer, find_extjump_insn
1286 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1288 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1290 emit_movz(target & 0xffff, 0);
1291 emit_movk_lsl16(target >> 16, 0);
1293 // addr is in the current recompiled block (max 256k)
1294 // offset shouldn't exceed +/-1MB
1296 emit_far_jump(linker);
1299 static void check_extjump2(void *src)
1302 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1306 // put rt_val into rt, potentially making use of rs with value rs_val
1307 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1309 int diff = rt_val - rs_val;
1310 if ((-4096 < diff && diff < 4096)
1311 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1312 emit_addimm(rs, diff, rt);
1313 else if (rt_val == ~rs_val)
1315 else if (is_rotated_mask(rs_val ^ rt_val))
1316 emit_xorimm(rs, rs_val ^ rt_val, rt);
1318 emit_movimm(rt_val, rt);
1321 // return 1 if the above function can do it's job cheaply
1322 static int is_similar_value(u_int v1, u_int v2)
1325 return (-4096 < diff && diff < 4096)
1326 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1328 || is_rotated_mask(v1 ^ v2);
1332 static void pass_args64(u_int a0, u_int a1)
1336 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1338 else if(a0!=0&&a1==0) {
1340 if (a0>=0) emit_mov64(a0,0);
1343 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1344 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1348 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1351 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1353 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1354 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1356 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1358 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1363 #include "pcsxmem.h"
1364 //#include "pcsxmem_inline.c"
1366 static void do_readstub(int n)
1368 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1369 set_jump_target(stubs[n].addr, out);
1370 enum stub_type type = stubs[n].type;
1372 int rs = stubs[n].b;
1373 const struct regstat *i_regs = (void *)stubs[n].c;
1374 u_int reglist = stubs[n].e;
1375 const signed char *i_regmap = i_regs->regmap;
1377 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1378 rt=get_reg(i_regmap,FTEMP);
1380 rt=get_reg(i_regmap,rt1[i]);
1383 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1384 void *restore_jump = NULL, *handler_jump = NULL;
1386 for (r = 0; r < HOST_CCREG; r++) {
1387 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1392 if(rt>=0&&rt1[i]!=0)
1399 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1401 emit_readdword(&mem_rtab,temp);
1402 emit_shrimm(rs,12,temp2);
1403 emit_readdword_dualindexedx8(temp,temp2,temp2);
1404 emit_adds64(temp2,temp2,temp2);
1407 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1409 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1410 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1411 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1412 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1413 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1419 emit_jmp(0); // jump to reg restore
1422 emit_jmp(stubs[n].retaddr); // return address
1423 set_jump_target(handler_jump, out);
1428 if(type==LOADB_STUB||type==LOADBU_STUB)
1429 handler=jump_handler_read8;
1430 if(type==LOADH_STUB||type==LOADHU_STUB)
1431 handler=jump_handler_read16;
1432 if(type==LOADW_STUB)
1433 handler=jump_handler_read32;
1435 pass_args64(rs,temp2);
1436 int cc=get_reg(i_regmap,CCREG);
1438 emit_loadreg(CCREG,2);
1439 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1440 emit_far_call(handler);
1441 // (no cycle reload after read)
1442 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1443 loadstore_extend(type,0,rt);
1446 set_jump_target(restore_jump, out);
1447 restore_regs(reglist);
1448 emit_jmp(stubs[n].retaddr);
1451 static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1453 int rs=get_reg(regmap,target);
1454 int rt=get_reg(regmap,target);
1455 if(rs<0) rs=get_reg(regmap,-1);
1458 uintptr_t host_addr = 0;
1460 int cc=get_reg(regmap,CCREG);
1461 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1463 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1464 if (handler == NULL) {
1467 if (addr != host_addr) {
1468 if (host_addr >= 0x100000000ull)
1469 abort(); // ROREG not implemented
1470 emit_movimm_from(addr, rs, host_addr, rs);
1473 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1474 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1475 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1476 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1477 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1482 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1484 if(type==LOADB_STUB||type==LOADBU_STUB)
1485 handler=jump_handler_read8;
1486 if(type==LOADH_STUB||type==LOADHU_STUB)
1487 handler=jump_handler_read16;
1488 if(type==LOADW_STUB)
1489 handler=jump_handler_read32;
1492 // call a memhandler
1493 if(rt>=0&&rt1[i]!=0)
1497 emit_movimm(addr,0);
1501 emit_loadreg(CCREG,2);
1502 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1504 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1505 emit_adrp((void *)l1, 1);
1506 emit_addimm64(1, l1 & 0xfff, 1);
1509 emit_far_call(do_memhandler_pre);
1511 emit_far_call(handler);
1513 // (no cycle reload after read)
1514 if(rt>=0&&rt1[i]!=0)
1515 loadstore_extend(type, 0, rt);
1516 restore_regs(reglist);
1519 static void do_writestub(int n)
1521 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1522 set_jump_target(stubs[n].addr, out);
1523 enum stub_type type=stubs[n].type;
1526 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1527 u_int reglist=stubs[n].e;
1528 signed char *i_regmap=i_regs->regmap;
1530 if(itype[i]==C1LS||itype[i]==C2LS) {
1531 rt=get_reg(i_regmap,r=FTEMP);
1533 rt=get_reg(i_regmap,r=rs2[i]);
1537 int rtmp,temp=-1,temp2,regs_saved=0;
1538 void *restore_jump = NULL, *handler_jump = NULL;
1539 int reglist2=reglist|(1<<rs)|(1<<rt);
1540 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1541 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1549 for(rtmp=0;rtmp<=3;rtmp++)
1550 if(rtmp!=rs&&rtmp!=rt)
1553 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1556 host_tempreg_acquire();
1559 emit_readdword(&mem_wtab,temp);
1560 emit_shrimm(rs,12,temp2);
1561 emit_readdword_dualindexedx8(temp,temp2,temp2);
1562 emit_adds64(temp2,temp2,temp2);
1566 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1567 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1568 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1573 emit_jmp(0); // jump to reg restore
1576 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1577 set_jump_target(handler_jump, out);
1579 // TODO FIXME: regalloc should prefer callee-saved regs
1584 case STOREB_STUB: handler=jump_handler_write8; break;
1585 case STOREH_STUB: handler=jump_handler_write16; break;
1586 case STOREW_STUB: handler=jump_handler_write32; break;
1592 emit_mov64(temp2,3);
1593 host_tempreg_release();
1595 int cc=get_reg(i_regmap,CCREG);
1597 emit_loadreg(CCREG,2);
1598 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1599 // returns new cycle_count
1600 emit_far_call(handler);
1601 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1603 emit_storereg(CCREG,2);
1605 set_jump_target(restore_jump, out);
1606 restore_regs(reglist);
1607 emit_jmp(stubs[n].retaddr);
1610 static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1612 int rs = get_reg(regmap,-1);
1613 int rt = get_reg(regmap,target);
1616 uintptr_t host_addr = 0;
1617 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1618 if (handler == NULL) {
1619 if (addr != host_addr) {
1620 if (host_addr >= 0x100000000ull)
1621 abort(); // ROREG not implemented
1622 emit_movimm_from(addr, rs, host_addr, rs);
1625 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1626 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1627 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1633 // call a memhandler
1635 emit_writeword(rs, &address); // some handlers still need it
1636 loadstore_extend(type, rt, 0);
1638 cc = cc_use = get_reg(regmap, CCREG);
1640 emit_loadreg(CCREG, (cc_use = 2));
1641 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
1643 emit_far_call(do_memhandler_pre);
1644 emit_far_call(handler);
1645 emit_far_call(do_memhandler_post);
1646 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
1648 emit_storereg(CCREG, cc_use);
1649 restore_regs(reglist);
1652 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1654 int ret = memcmp(source, copy, size);
1655 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1659 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1660 static void do_dirty_stub_base(u_int vaddr)
1662 assert(slen <= MAXBLOCK);
1663 emit_loadlp_ofs(0, 0); // ldr x1, source
1664 emit_loadlp_ofs(0, 1); // ldr x2, copy
1665 emit_movz(slen*4, 2);
1666 emit_far_call(verify_code_arm64);
1669 emit_movz(vaddr & 0xffff, 0);
1670 emit_movk_lsl16(vaddr >> 16, 0);
1671 emit_far_call(get_addr);
1673 set_jump_target(jmp, out);
1676 static void assert_dirty_stub(const u_int *ptr)
1678 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1679 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1680 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4
1681 assert( ptr[8] == 0xd61f0000); // br x0
1684 static void set_loadlp(u_int *loadl, void *lit)
1686 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1687 assert((*loadl & ~0x1f) == 0x58000000);
1688 assert((ofs & 3) == 0);
1689 assert(ofs < 0x100000);
1690 *loadl |= (ofs >> 2) << 5;
1693 static void do_dirty_stub_emit_literals(u_int *loadlps)
1695 set_loadlp(&loadlps[0], out);
1696 output_w64((uintptr_t)source);
1697 set_loadlp(&loadlps[1], out);
1698 output_w64((uintptr_t)copy);
1701 static void *do_dirty_stub(int i)
1703 assem_debug("do_dirty_stub %x\n",start+i*4);
1704 u_int *loadlps = (void *)out;
1705 do_dirty_stub_base(start + i*4);
1709 entry = instr_addr[i];
1710 emit_jmp(instr_addr[i]);
1711 do_dirty_stub_emit_literals(loadlps);
1715 static void do_dirty_stub_ds(void)
1717 u_int *loadlps = (void *)out;
1718 do_dirty_stub_base(start + 1);
1719 void *lit_jumpover = out;
1720 emit_jmp(out + 8*2);
1721 do_dirty_stub_emit_literals(loadlps);
1722 set_jump_target(lit_jumpover, out);
1725 static uint64_t get_from_ldr_literal(const u_int *i)
1728 assert((i[0] & 0xff000000) == 0x58000000);
1731 return *(uint64_t *)(i + ofs);
1734 static uint64_t get_from_movz(const u_int *i)
1736 assert((i[0] & 0x7fe00000) == 0x52800000);
1737 return (i[0] >> 5) & 0xffff;
1740 // Find the "clean" entry point from a "dirty" entry point
1741 // by skipping past the call to verify_code
1742 static void *get_clean_addr(u_int *addr)
1744 assert_dirty_stub(addr);
1748 static int verify_dirty(const u_int *ptr)
1750 const void *source, *copy;
1752 assert_dirty_stub(ptr);
1753 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1754 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1755 len = get_from_movz(&ptr[2]); // movz w3, #slen*4
1756 return !memcmp(source, copy, len);
1759 static int isclean(void *addr)
1761 const u_int *ptr = addr;
1762 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1763 assert_dirty_stub(ptr);
1769 // get source that block at addr was compiled from (host pointers)
1770 static void get_bounds(void *addr, u_char **start, u_char **end)
1772 const u_int *ptr = addr;
1773 assert_dirty_stub(ptr);
1774 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1775 *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4
1780 static void c2op_prologue(u_int op,u_int reglist)
1782 save_load_regs_all(1, reglist);
1785 emit_far_call(pcnt_gte_start);
1787 // pointer to cop2 regs
1788 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1791 static void c2op_epilogue(u_int op,u_int reglist)
1795 emit_far_call(pcnt_gte_end);
1797 save_load_regs_all(0, reglist);
1800 static void c2op_assemble(int i,struct regstat *i_regs)
1802 u_int c2op=source[i]&0x3f;
1803 u_int hr,reglist_full=0,reglist;
1804 int need_flags,need_ir;
1805 for(hr=0;hr<HOST_REGS;hr++) {
1806 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1808 reglist=reglist_full&CALLER_SAVE_REGS;
1810 if (gte_handlers[c2op]!=NULL) {
1811 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1812 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1813 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1814 source[i],gte_unneeded[i+1],need_flags,need_ir);
1815 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1817 //int shift = (source[i] >> 19) & 1;
1818 //int lm = (source[i] >> 10) & 1;
1822 c2op_prologue(c2op,reglist);
1823 emit_movimm(source[i],1); // opcode
1824 emit_writeword(1,&psxRegs.code);
1825 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1828 c2op_epilogue(c2op,reglist);
1832 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1834 //value = value & 0x7ffff000;
1835 //if (value & 0x7f87e000) value |= 0x80000000;
1836 emit_andimm(sl, 0x7fffe000, temp);
1837 emit_testimm(temp, 0xff87ffff);
1838 emit_andimm(sl, 0x7ffff000, temp);
1839 host_tempreg_acquire();
1840 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1841 emit_cmovne_reg(HOST_TEMPREG, temp);
1842 host_tempreg_release();
1843 assert(0); // testing needed
1846 static void do_mfc2_31_one(u_int copr,signed char temp)
1848 emit_readshword(®_cop2d[copr],temp);
1849 emit_bicsar_imm(temp,31,temp);
1850 emit_cmpimm(temp,0xf80);
1851 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1852 emit_andimm(temp,0xf80,temp);
1855 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1858 host_tempreg_acquire();
1859 temp = HOST_TEMPREG;
1861 do_mfc2_31_one(9,temp);
1862 emit_shrimm(temp,7,tl);
1863 do_mfc2_31_one(10,temp);
1864 emit_orrshr_imm(temp,2,tl);
1865 do_mfc2_31_one(11,temp);
1866 emit_orrshl_imm(temp,3,tl);
1867 emit_writeword(tl,®_cop2d[29]);
1869 if (temp == HOST_TEMPREG)
1870 host_tempreg_release();
1873 static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1886 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1887 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1888 signed char hi=get_reg(i_regs->regmap,HIREG);
1889 signed char lo=get_reg(i_regs->regmap,LOREG);
1895 if(opcode2[i]==0x18) // MULT
1896 emit_smull(m1,m2,hi);
1898 emit_umull(m1,m2,hi);
1901 emit_shrimm64(hi,32,hi);
1907 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1908 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1909 signed char quotient=get_reg(i_regs->regmap,LOREG);
1910 signed char remainder=get_reg(i_regs->regmap,HIREG);
1911 assert(numerator>=0);
1912 assert(denominator>=0);
1913 assert(quotient>=0);
1914 assert(remainder>=0);
1916 if (opcode2[i] == 0x1A) // DIV
1917 emit_sdiv(numerator,denominator,quotient);
1919 emit_udiv(numerator,denominator,quotient);
1920 emit_msub(quotient,denominator,numerator,remainder);
1922 // div 0 quotient (remainder is already correct)
1923 host_tempreg_acquire();
1924 if (opcode2[i] == 0x1A) // DIV
1925 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1927 emit_movimm(~0,HOST_TEMPREG);
1928 emit_test(denominator,denominator);
1929 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1930 host_tempreg_release();
1939 signed char hr=get_reg(i_regs->regmap,HIREG);
1940 signed char lr=get_reg(i_regs->regmap,LOREG);
1941 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1944 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1945 assert(numerator >= 0);
1947 emit_mov(numerator,hr);
1949 if (opcode2[i] == 0x1A) // DIV
1950 emit_sub_asrimm(0,numerator,31,lr);
1956 if (hr >= 0) emit_zeroreg(hr);
1957 if (lr >= 0) emit_movimm(~0,lr);
1962 // Multiply by zero is zero.
1963 if (hr >= 0) emit_zeroreg(hr);
1964 if (lr >= 0) emit_zeroreg(lr);
1968 #define multdiv_assemble multdiv_assemble_arm64
1970 static void do_jump_vaddr(u_int rs)
1974 emit_far_call(get_addr_ht);
1978 static void do_preload_rhash(u_int r) {
1979 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1980 // register. On ARM the hash can be done with a single instruction (below)
1983 static void do_preload_rhtbl(u_int ht) {
1984 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1987 static void do_rhash(u_int rs,u_int rh) {
1988 emit_andimm(rs, 0xf8, rh);
1991 static void do_miniht_load(int ht, u_int rh) {
1992 emit_add64(ht, rh, ht);
1993 emit_ldst(0, 0, rh, ht, 0);
1996 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2002 set_jump_target(jaddr, out);
2003 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2004 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2008 // parsed by set_jump_target?
2009 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2010 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2011 emit_movk(return_address&0xffff,rt);
2012 add_to_linker(out,return_address,1);
2014 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2015 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2018 static void clear_cache_arm64(char *start, char *end)
2020 // Don't rely on GCC's __clear_cache implementation, as it caches
2021 // icache/dcache cache line sizes, that can vary between cores on
2022 // big.LITTLE architectures.
2023 uint64_t addr, ctr_el0;
2024 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2025 size_t isize, dsize;
2027 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2028 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2029 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2031 // use the global minimum cache line size
2032 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2033 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2035 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2036 not required for instruction to data coherence. */
2037 if ((ctr_el0 & (1 << 28)) == 0x0) {
2038 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2039 for (; addr < (uint64_t)end; addr += dsize)
2040 // use "civac" instead of "cvau", as this is the suggested workaround for
2041 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2042 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2044 __asm__ volatile("dsb ish" : : : "memory");
2046 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2047 Unification is not required for instruction to data coherence. */
2048 if ((ctr_el0 & (1 << 29)) == 0x0) {
2049 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2050 for (; addr < (uint64_t)end; addr += isize)
2051 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2053 __asm__ volatile("dsb ish" : : : "memory");
2056 __asm__ volatile("isb" : : : "memory");
2059 // CPU-architecture-specific initialization
2060 static void arch_init(void)
2062 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2063 struct tramp_insns *ops = ndrc->tramp.ops;
2065 assert(!(diff & 3));
2066 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2067 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2068 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2069 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2071 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2074 // vim:shiftwidth=2:expandtab