1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #if defined(BASE_ADDR_FIXED)
27 #elif defined(BASE_ADDR_DYNAMIC)
28 u_char *translation_cache;
30 u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
32 static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
34 #define CALLER_SAVE_REGS 0x0007ffff
36 #define unused __attribute__((unused))
38 void do_memhandler_pre();
39 void do_memhandler_post();
42 static void set_jump_target(void *addr, void *target)
45 intptr_t offset = (u_char *)target - (u_char *)addr;
47 if ((*ptr&0xFC000000) == 0x14000000) { // b
48 assert(offset>=-134217728LL&&offset<134217728LL);
49 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
51 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
52 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
53 // Conditional branch are limited to +/- 1MB
54 // block max size is 256k so branching beyond the +/- 1MB limit
55 // should only happen when jumping to an already compiled block (see add_link)
56 // a workaround would be to do a trampoline jump via a stub at the end of the block
57 assert(-1048576 <= offset && offset < 1048576);
58 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
60 else if((*ptr&0x9f000000)==0x10000000) { // adr
61 // generated by do_miniht_insert
62 assert(offset>=-1048576LL&&offset<1048576LL);
63 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
66 abort(); // should not happen
69 // from a pointer to external jump stub (which was produced by emit_extjump2)
70 // find where the jumping insn is
71 static void *find_extjump_insn(void *stub)
73 int *ptr = (int *)stub + 2;
74 assert((*ptr&0x9f000000) == 0x10000000); // adr
75 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
76 return ptr + offset / 4;
79 // find where external branch is liked to using addr of it's stub:
80 // get address that the stub loads (dyna_linker arg1),
81 // treat it as a pointer to branch insn,
82 // return addr where that branch jumps to
83 static void *get_pointer(void *stub)
85 int *i_ptr = find_extjump_insn(stub);
86 if ((*i_ptr&0xfc000000) == 0x14000000) // b
87 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
88 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
89 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
90 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
95 // Allocate a specific ARM register.
96 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
101 // see if it's already allocated (and dealloc it)
102 for(n=0;n<HOST_REGS;n++)
104 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
105 dirty=(cur->dirty>>n)&1;
111 cur->dirty&=~(1<<hr);
112 cur->dirty|=dirty<<hr;
113 cur->isconst&=~(1<<hr);
116 // Alloc cycle count into dedicated register
117 static void alloc_cc(struct regstat *cur,int i)
119 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
127 static unused const char *regname[32] = {
128 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
129 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
130 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
131 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
134 static unused const char *regname64[32] = {
135 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
136 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
137 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
138 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
142 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
143 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
146 static unused const char *condname[16] = {
147 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
148 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
151 static void output_w32(u_int word)
153 *((u_int *)out) = word;
157 static void output_w64(uint64_t dword)
159 *((uint64_t *)out) = dword;
164 static u_int rm_rd(u_int rm, u_int rd)
168 return (rm << 16) | rd;
172 static u_int rn_rd(u_int rn, u_int rd)
176 return (rn << 5) | rd;
179 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
184 return (rm << 16) | (rn << 5) | rd;
187 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
190 return rm_rn_rd(rm, rn, rd) | (ra << 10);
193 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
199 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
202 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
205 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
208 static u_int imm16_rd(u_int imm16, u_int rd)
210 assert(imm16 < 0x10000);
212 return (imm16 << 5) | rd;
215 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
217 assert(imm12 < 0x1000);
220 return (imm12 << 10) | (rn << 5) | rd;
223 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
225 assert(imm9 < 0x200);
228 return (imm9 << 12) | (rn << 5) | rd;
231 static u_int imm19_rt(u_int imm19, u_int rt)
233 assert(imm19 < 0x80000);
235 return (imm19 << 5) | rt;
238 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
245 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
248 static u_int genjmp(const u_char *addr)
250 intptr_t offset = addr - out;
251 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
252 if (offset < -134217728 || offset > 134217727) {
253 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
257 return ((u_int)offset >> 2) & 0x03ffffff;
260 static u_int genjmpcc(const u_char *addr)
262 intptr_t offset = addr - out;
263 if ((uintptr_t)addr < 3) return 0;
264 if (offset < -1048576 || offset > 1048572) {
265 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
269 return ((u_int)offset >> 2) & 0x7ffff;
272 static uint32_t is_mask(u_int value)
274 return value && ((value + 1) & value) == 0;
277 // This function returns true if the argument contains a
278 // non-empty sequence of ones (possibly rotated) with the remainder zero.
279 static uint32_t is_rotated_mask(u_int value)
281 if (value == 0 || value == ~0)
283 if (is_mask((value - 1) | value))
285 return is_mask((~value - 1) | ~value);
288 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
290 int lzeros, tzeros, ones;
292 if (is_mask((value - 1) | value)) {
293 lzeros = __builtin_clz(value);
294 tzeros = __builtin_ctz(value);
295 ones = 32 - lzeros - tzeros;
296 *immr = (32 - tzeros) & 31;
301 if (is_mask((value - 1) | value)) {
302 lzeros = __builtin_clz(value);
303 tzeros = __builtin_ctz(value);
304 ones = 32 - lzeros - tzeros;
312 static void emit_mov(u_int rs, u_int rt)
314 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
315 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
318 static void emit_mov64(u_int rs, u_int rt)
320 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
321 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
324 static void emit_add(u_int rs1, u_int rs2, u_int rt)
326 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
327 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
330 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
332 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
333 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
336 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
338 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
339 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
342 static void emit_neg(u_int rs, u_int rt)
344 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
345 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
348 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
350 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
351 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
354 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
356 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
357 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
360 static void emit_movz(u_int imm, u_int rt)
362 assem_debug("movz %s,#%#x\n", regname[rt], imm);
363 output_w32(0x52800000 | imm16_rd(imm, rt));
366 static void emit_movz_lsl16(u_int imm, u_int rt)
368 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
369 output_w32(0x52a00000 | imm16_rd(imm, rt));
372 static void emit_movn(u_int imm, u_int rt)
374 assem_debug("movn %s,#%#x\n", regname[rt], imm);
375 output_w32(0x12800000 | imm16_rd(imm, rt));
378 static void emit_movn_lsl16(u_int imm,u_int rt)
380 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
381 output_w32(0x12a00000 | imm16_rd(imm, rt));
384 static void emit_movk(u_int imm,u_int rt)
386 assem_debug("movk %s,#%#x\n", regname[rt], imm);
387 output_w32(0x72800000 | imm16_rd(imm, rt));
390 static void emit_movk_lsl16(u_int imm,u_int rt)
393 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
394 output_w32(0x72a00000 | imm16_rd(imm, rt));
397 static void emit_zeroreg(u_int rt)
402 static void emit_movimm(u_int imm, u_int rt)
406 else if ((~imm) < 65536)
408 else if ((imm&0xffff) == 0)
409 emit_movz_lsl16(imm >> 16, rt);
410 else if (((~imm)&0xffff) == 0)
411 emit_movn_lsl16(~imm >> 16, rt);
412 else if (is_rotated_mask(imm)) {
414 gen_logical_imm(imm, &immr, &imms);
415 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
416 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
419 emit_movz(imm & 0xffff, rt);
420 emit_movk_lsl16(imm >> 16, rt);
424 static void emit_readword(void *addr, u_int rt)
426 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
427 if (!(offset & 3) && offset <= 16380) {
428 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
429 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
435 static void emit_readdword(void *addr, u_int rt)
437 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
438 if (!(offset & 7) && offset <= 32760) {
439 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
440 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
446 static void emit_readshword(void *addr, u_int rt)
448 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
449 if (!(offset & 1) && offset <= 8190) {
450 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
451 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
457 static void emit_loadreg(u_int r, u_int hr)
464 void *addr = &psxRegs.GPR.r[r];
466 //case HIREG: addr = &hi; break;
467 //case LOREG: addr = &lo; break;
468 case CCREG: addr = &cycle_count; break;
469 case CSREG: addr = &Status; break;
470 case INVCP: addr = &invc_ptr; is64 = 1; break;
471 default: assert(r < 34); break;
474 emit_readdword(addr, hr);
476 emit_readword(addr, hr);
480 static void emit_writeword(u_int rt, void *addr)
482 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
483 if (!(offset & 3) && offset <= 16380) {
484 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
485 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
491 static void emit_writedword(u_int rt, void *addr)
493 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
494 if (!(offset & 7) && offset <= 32760) {
495 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
496 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
502 static void emit_storereg(u_int r, u_int hr)
505 void *addr = &psxRegs.GPR.r[r];
507 //case HIREG: addr = &hi; break;
508 //case LOREG: addr = &lo; break;
509 case CCREG: addr = &cycle_count; break;
510 default: assert(r < 34); break;
512 emit_writeword(hr, addr);
515 static void emit_test(u_int rs, u_int rt)
517 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
518 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
521 static void emit_testimm(u_int rs, u_int imm)
524 assem_debug("tst %s,#%#x\n", regname[rs], imm);
525 assert(is_rotated_mask(imm)); // good enough for PCSX
526 gen_logical_imm(imm, &immr, &imms);
527 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
530 static void emit_not(u_int rs,u_int rt)
532 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
533 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
536 static void emit_and(u_int rs1,u_int rs2,u_int rt)
538 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
539 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
542 static void emit_or(u_int rs1,u_int rs2,u_int rt)
544 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
545 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
548 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
550 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
551 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
554 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
556 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
557 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
560 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
562 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
563 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
566 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
568 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
569 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
572 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
574 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
575 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
578 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
580 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
581 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
584 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
586 unused const char *st = s ? "s" : "";
587 s = s ? 0x20000000 : 0;
588 is64 = is64 ? 0x80000000 : 0;
590 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
591 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
593 else if (-imm < 4096) {
594 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
595 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
597 else if (imm < 16777216) {
598 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
599 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
602 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
605 else if (-imm < 16777216) {
606 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
607 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
608 if ((imm & 0xfff) || s) {
609 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
610 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
617 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
619 emit_addimm_s(0, 0, rs, imm, rt);
622 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
624 emit_addimm_s(0, 1, rs, imm, rt);
627 static void emit_addimm_and_set_flags(int imm, u_int rt)
629 emit_addimm_s(1, 0, rt, imm, rt);
632 static void emit_addimm_no_flags(u_int imm,u_int rt)
634 emit_addimm(rt,imm,rt);
637 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
639 const char *names[] = { "and", "orr", "eor", "ands" };
640 const char *name = names[op];
643 if (is_rotated_mask(imm)) {
644 gen_logical_imm(imm, &immr, &imms);
645 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
646 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
649 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
650 host_tempreg_acquire();
651 emit_movimm(imm, HOST_TEMPREG);
652 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
653 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_release();
660 static void emit_andimm(u_int rs, u_int imm, u_int rt)
665 emit_logicop_imm(0, rs, imm, rt);
668 static void emit_orimm(u_int rs, u_int imm, u_int rt)
675 emit_logicop_imm(1, rs, imm, rt);
678 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
685 emit_logicop_imm(2, rs, imm, rt);
688 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
690 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
694 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
696 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
700 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
702 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
706 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
712 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
714 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
718 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
720 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
724 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
726 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
727 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
730 static void emit_signextend16(u_int rs, u_int rt)
732 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
733 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
736 static void emit_shl(u_int rs,u_int rshift,u_int rt)
738 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
742 static void emit_shr(u_int rs,u_int rshift,u_int rt)
744 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
748 static void emit_sar(u_int rs,u_int rshift,u_int rt)
750 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
751 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
754 static void emit_cmpimm(u_int rs, u_int imm)
757 assem_debug("cmp %s,%#x\n", regname[rs], imm);
758 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
760 else if (-imm < 4096) {
761 assem_debug("cmn %s,%#x\n", regname[rs], imm);
762 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
764 else if (imm < 16777216 && !(imm & 0xfff)) {
765 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
766 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
769 host_tempreg_acquire();
770 emit_movimm(imm, HOST_TEMPREG);
771 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
772 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
773 host_tempreg_release();
777 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
779 assert(imm == 0 || imm == 1);
780 assert(cond0 < 0x10);
781 assert(cond1 < 0x10);
783 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
784 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
786 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
787 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
791 static void emit_cmovne_imm(u_int imm,u_int rt)
793 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
796 static void emit_cmovl_imm(u_int imm,u_int rt)
798 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
801 static void emit_cmovb_imm(int imm,u_int rt)
803 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
806 static void emit_cmoveq_reg(u_int rs,u_int rt)
808 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
812 static void emit_cmovne_reg(u_int rs,u_int rt)
814 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
818 static void emit_cmovl_reg(u_int rs,u_int rt)
820 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
824 static void emit_cmovs_reg(u_int rs,u_int rt)
826 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
830 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
832 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
833 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
836 static void emit_slti32(u_int rs,int imm,u_int rt)
838 if(rs!=rt) emit_zeroreg(rt);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovl_imm(1,rt);
844 static void emit_sltiu32(u_int rs,int imm,u_int rt)
846 if(rs!=rt) emit_zeroreg(rt);
848 if(rs==rt) emit_movimm(0,rt);
849 emit_cmovb_imm(1,rt);
852 static void emit_cmp(u_int rs,u_int rt)
854 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
855 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
858 static void emit_set_gz32(u_int rs, u_int rt)
860 //assem_debug("set_gz32\n");
863 emit_cmovl_imm(0,rt);
866 static void emit_set_nz32(u_int rs, u_int rt)
868 //assem_debug("set_nz32\n");
869 if(rs!=rt) emit_mov(rs,rt);
871 emit_cmovne_imm(1,rt);
874 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
876 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovl_imm(1,rt);
883 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
885 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovb_imm(1,rt);
892 static void emit_call(const void *a)
894 intptr_t diff = (u_char *)a - out;
895 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
897 if (-134217728 <= diff && diff <= 134217727)
898 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
903 static void emit_jmp(const void *a)
905 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
906 u_int offset = genjmp(a);
907 output_w32(0x14000000 | offset);
910 static void emit_jne(const void *a)
912 assem_debug("bne %p\n", a);
913 u_int offset = genjmpcc(a);
914 output_w32(0x54000000 | (offset << 5) | COND_NE);
917 static void emit_jeq(const void *a)
919 assem_debug("beq %p\n", a);
920 u_int offset = genjmpcc(a);
921 output_w32(0x54000000 | (offset << 5) | COND_EQ);
924 static void emit_js(const void *a)
926 assem_debug("bmi %p\n", a);
927 u_int offset = genjmpcc(a);
928 output_w32(0x54000000 | (offset << 5) | COND_MI);
931 static void emit_jns(const void *a)
933 assem_debug("bpl %p\n", a);
934 u_int offset = genjmpcc(a);
935 output_w32(0x54000000 | (offset << 5) | COND_PL);
938 static void emit_jl(const void *a)
940 assem_debug("blt %p\n", a);
941 u_int offset = genjmpcc(a);
942 output_w32(0x54000000 | (offset << 5) | COND_LT);
945 static void emit_jge(const void *a)
947 assem_debug("bge %p\n", a);
948 u_int offset = genjmpcc(a);
949 output_w32(0x54000000 | (offset << 5) | COND_GE);
952 static void emit_jno(const void *a)
954 assem_debug("bvc %p\n", a);
955 u_int offset = genjmpcc(a);
956 output_w32(0x54000000 | (offset << 5) | COND_VC);
959 static void emit_jc(const void *a)
961 assem_debug("bcs %p\n", a);
962 u_int offset = genjmpcc(a);
963 output_w32(0x54000000 | (offset << 5) | COND_CS);
966 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
968 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
969 u_int offset = genjmpcc(a);
970 is64 = is64 ? 0x80000000 : 0;
971 isnz = isnz ? 0x01000000 : 0;
972 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
975 static void emit_cbz(const void *a, u_int r)
980 static void emit_jmpreg(u_int r)
982 assem_debug("br %s\n", regname64[r]);
983 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
986 static void emit_retreg(u_int r)
988 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
989 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
992 static void emit_ret(void)
997 static void emit_adr(void *addr, u_int rt)
999 intptr_t offset = (u_char *)addr - out;
1000 assert(-1048576 <= offset && offset < 1048576);
1002 assem_debug("adr x%d,#%#lx\n", rt, offset);
1003 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1006 static void emit_adrp(void *addr, u_int rt)
1008 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1009 assert(-4294967296l <= offset && offset < 4294967296l);
1012 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1013 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1016 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1018 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1019 assert(-256 <= offset && offset < 256);
1020 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1023 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1025 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1026 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1029 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1031 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1035 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1037 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1041 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1043 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1047 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1049 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1050 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1053 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1055 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1056 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1059 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1061 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1062 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1065 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1067 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1068 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1071 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1073 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1074 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1077 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1079 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1080 assert(-256 <= offset && offset < 256);
1081 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1084 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1086 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1087 assert(-256 <= offset && offset < 256);
1088 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1091 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1093 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1094 assert(-256 <= offset && offset < 256);
1095 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1098 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1100 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1105 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1107 if (!(offset & 3) && (u_int)offset <= 16380) {
1108 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1109 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1111 else if (-256 <= offset && offset < 256) {
1112 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1113 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1119 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1121 if (!(offset & 1) && (u_int)offset <= 8190) {
1122 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1123 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1125 else if (-256 <= offset && offset < 256) {
1126 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1127 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1133 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1135 if ((u_int)offset < 4096) {
1136 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1137 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1139 else if (-256 <= offset && offset < 256) {
1140 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1141 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1147 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1149 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1150 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1153 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1155 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1156 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1159 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1161 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1162 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1165 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1167 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1171 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1173 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1174 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1177 static void emit_clz(u_int rs, u_int rt)
1179 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1180 output_w32(0x5ac01000 | rn_rd(rs, rt));
1183 // special case for checking invalid_code
1184 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1186 host_tempreg_acquire();
1187 emit_shrimm(r, 12, HOST_TEMPREG);
1188 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1189 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1190 emit_cmpimm(HOST_TEMPREG, imm);
1191 host_tempreg_release();
1194 // special for loadlr_assemble, rs2 is destroyed
1195 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1197 emit_shl(rs2, shift, rs2);
1198 emit_bic(rs1, rs2, rt);
1201 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1203 emit_shr(rs2, shift, rs2);
1204 emit_bic(rs1, rs2, rt);
1207 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1209 output_w32(0x58000000 | imm19_rt(ofs, rt));
1212 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1214 u_int op = 0xb9000000;
1215 unused const char *ldst = is_st ? "st" : "ld";
1216 unused char rp = is64 ? 'x' : 'w';
1217 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1218 is64 = is64 ? 1 : 0;
1219 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1220 ofs = (ofs >> (2+is64));
1221 if (!is_st) op |= 0x00400000;
1222 if (is64) op |= 0x40000000;
1223 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1226 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1228 u_int op = 0x29000000;
1229 unused const char *ldst = is_st ? "st" : "ld";
1230 unused char rp = is64 ? 'x' : 'w';
1231 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1232 is64 = is64 ? 1 : 0;
1233 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1234 ofs = (ofs >> (2+is64));
1235 assert(-64 <= ofs && ofs <= 63);
1237 if (!is_st) op |= 0x00400000;
1238 if (is64) op |= 0x80000000;
1239 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1242 static void save_load_regs_all(int is_store, u_int reglist)
1246 for (r = 0; reglist; r++, reglist >>= 1) {
1250 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1256 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1259 assert(ofs <= SSP_CALLER_REGS);
1262 // Save registers before function call
1263 static void save_regs(u_int reglist)
1265 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1266 save_load_regs_all(1, reglist);
1269 // Restore registers after function call
1270 static void restore_regs(u_int reglist)
1272 reglist &= CALLER_SAVE_REGS;
1273 save_load_regs_all(0, reglist);
1276 /* Stubs/epilogue */
1278 static void literal_pool(int n)
1283 static void literal_pool_jumpover(int n)
1287 // parsed by get_pointer, find_extjump_insn
1288 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1290 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1292 emit_movz(target & 0xffff, 0);
1293 emit_movk_lsl16(target >> 16, 0);
1295 // addr is in the current recompiled block (max 256k)
1296 // offset shouldn't exceed +/-1MB
1301 static void check_extjump2(void *src)
1304 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1308 // put rt_val into rt, potentially making use of rs with value rs_val
1309 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1311 int diff = rt_val - rs_val;
1312 if ((-4096 < diff && diff < 4096)
1313 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1314 emit_addimm(rs, diff, rt);
1315 else if (rt_val == ~rs_val)
1317 else if (is_rotated_mask(rs_val ^ rt_val))
1318 emit_xorimm(rs, rs_val ^ rt_val, rt);
1320 emit_movimm(rt_val, rt);
1323 // return 1 if the above function can do it's job cheaply
1324 static int is_similar_value(u_int v1, u_int v2)
1327 return (-4096 < diff && diff < 4096)
1328 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1330 || is_rotated_mask(v1 ^ v2);
1334 static void pass_args64(u_int a0, u_int a1)
1338 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1340 else if(a0!=0&&a1==0) {
1342 if (a0>=0) emit_mov64(a0,0);
1345 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1346 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1350 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1353 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1355 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1356 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1358 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1360 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1365 #include "pcsxmem.h"
1366 //#include "pcsxmem_inline.c"
1368 static void do_readstub(int n)
1370 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1371 set_jump_target(stubs[n].addr, out);
1372 enum stub_type type = stubs[n].type;
1374 int rs = stubs[n].b;
1375 const struct regstat *i_regs = (void *)stubs[n].c;
1376 u_int reglist = stubs[n].e;
1377 const signed char *i_regmap = i_regs->regmap;
1379 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1380 rt=get_reg(i_regmap,FTEMP);
1382 rt=get_reg(i_regmap,rt1[i]);
1385 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1386 void *restore_jump = NULL, *handler_jump = NULL;
1388 for (r = 0; r < HOST_CCREG; r++) {
1389 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1394 if(rt>=0&&rt1[i]!=0)
1401 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1403 emit_readdword(&mem_rtab,temp);
1404 emit_shrimm(rs,12,temp2);
1405 emit_readdword_dualindexedx8(temp,temp2,temp2);
1406 emit_adds64(temp2,temp2,temp2);
1409 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1411 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1412 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1413 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1414 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1415 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1421 emit_jmp(0); // jump to reg restore
1424 emit_jmp(stubs[n].retaddr); // return address
1425 set_jump_target(handler_jump, out);
1430 if(type==LOADB_STUB||type==LOADBU_STUB)
1431 handler=jump_handler_read8;
1432 if(type==LOADH_STUB||type==LOADHU_STUB)
1433 handler=jump_handler_read16;
1434 if(type==LOADW_STUB)
1435 handler=jump_handler_read32;
1437 pass_args64(rs,temp2);
1438 int cc=get_reg(i_regmap,CCREG);
1440 emit_loadreg(CCREG,2);
1441 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1443 // (no cycle reload after read)
1444 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1445 loadstore_extend(type,0,rt);
1448 set_jump_target(restore_jump, out);
1449 restore_regs(reglist);
1450 emit_jmp(stubs[n].retaddr);
1453 static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1455 int rs=get_reg(regmap,target);
1456 int rt=get_reg(regmap,target);
1457 if(rs<0) rs=get_reg(regmap,-1);
1460 uintptr_t host_addr = 0;
1462 int cc=get_reg(regmap,CCREG);
1463 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1465 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1466 if (handler == NULL) {
1469 if (addr != host_addr) {
1470 if (host_addr >= 0x100000000ull)
1471 abort(); // ROREG not implemented
1472 emit_movimm_from(addr, rs, host_addr, rs);
1475 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1476 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1477 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1478 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1479 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1484 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1486 if(type==LOADB_STUB||type==LOADBU_STUB)
1487 handler=jump_handler_read8;
1488 if(type==LOADH_STUB||type==LOADHU_STUB)
1489 handler=jump_handler_read16;
1490 if(type==LOADW_STUB)
1491 handler=jump_handler_read32;
1494 // call a memhandler
1495 if(rt>=0&&rt1[i]!=0)
1499 emit_movimm(addr,0);
1503 emit_loadreg(CCREG,2);
1504 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1506 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1507 emit_adrp((void *)l1, 1);
1508 emit_addimm64(1, l1 & 0xfff, 1);
1511 emit_call(do_memhandler_pre);
1515 // (no cycle reload after read)
1516 if(rt>=0&&rt1[i]!=0)
1517 loadstore_extend(type, 0, rt);
1518 restore_regs(reglist);
1521 static void do_writestub(int n)
1523 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1524 set_jump_target(stubs[n].addr, out);
1525 enum stub_type type=stubs[n].type;
1528 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1529 u_int reglist=stubs[n].e;
1530 signed char *i_regmap=i_regs->regmap;
1532 if(itype[i]==C1LS||itype[i]==C2LS) {
1533 rt=get_reg(i_regmap,r=FTEMP);
1535 rt=get_reg(i_regmap,r=rs2[i]);
1539 int rtmp,temp=-1,temp2,regs_saved=0;
1540 void *restore_jump = NULL, *handler_jump = NULL;
1541 int reglist2=reglist|(1<<rs)|(1<<rt);
1542 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1543 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1551 for(rtmp=0;rtmp<=3;rtmp++)
1552 if(rtmp!=rs&&rtmp!=rt)
1555 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1558 host_tempreg_acquire();
1561 emit_readdword(&mem_wtab,temp);
1562 emit_shrimm(rs,12,temp2);
1563 emit_readdword_dualindexedx8(temp,temp2,temp2);
1564 emit_adds64(temp2,temp2,temp2);
1568 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1569 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1570 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1575 emit_jmp(0); // jump to reg restore
1578 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1579 set_jump_target(handler_jump, out);
1581 // TODO FIXME: regalloc should prefer callee-saved regs
1586 case STOREB_STUB: handler=jump_handler_write8; break;
1587 case STOREH_STUB: handler=jump_handler_write16; break;
1588 case STOREW_STUB: handler=jump_handler_write32; break;
1594 emit_mov64(temp2,3);
1595 host_tempreg_release();
1597 int cc=get_reg(i_regmap,CCREG);
1599 emit_loadreg(CCREG,2);
1600 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1601 // returns new cycle_count
1603 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1605 emit_storereg(CCREG,2);
1607 set_jump_target(restore_jump, out);
1608 restore_regs(reglist);
1609 emit_jmp(stubs[n].retaddr);
1612 static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1614 int rs = get_reg(regmap,-1);
1615 int rt = get_reg(regmap,target);
1618 uintptr_t host_addr = 0;
1619 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1620 if (handler == NULL) {
1621 if (addr != host_addr) {
1622 if (host_addr >= 0x100000000ull)
1623 abort(); // ROREG not implemented
1624 emit_movimm_from(addr, rs, host_addr, rs);
1627 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1628 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1629 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1635 // call a memhandler
1637 emit_writeword(rs, &address); // some handlers still need it
1638 loadstore_extend(type, rt, 0);
1640 cc = cc_use = get_reg(regmap, CCREG);
1642 emit_loadreg(CCREG, (cc_use = 2));
1643 emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2);
1645 emit_call(do_memhandler_pre);
1647 emit_call(do_memhandler_post);
1648 emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use);
1650 emit_storereg(CCREG, cc_use);
1651 restore_regs(reglist);
1654 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1656 int ret = memcmp(source, copy, size);
1657 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1661 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1662 static void do_dirty_stub_base(u_int vaddr)
1664 assert(slen <= MAXBLOCK);
1665 emit_loadlp_ofs(0, 0); // ldr x1, source
1666 emit_loadlp_ofs(0, 1); // ldr x2, copy
1667 emit_movz(slen*4, 2);
1668 emit_call(verify_code_arm64);
1671 emit_movz(vaddr & 0xffff, 0);
1672 emit_movk_lsl16(vaddr >> 16, 0);
1673 emit_call(get_addr);
1675 set_jump_target(jmp, out);
1678 static void assert_dirty_stub(const u_int *ptr)
1680 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1681 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1682 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4
1683 assert( ptr[8] == 0xd61f0000); // br x0
1686 static void set_loadlp(u_int *loadl, void *lit)
1688 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1689 assert((*loadl & ~0x1f) == 0x58000000);
1690 assert((ofs & 3) == 0);
1691 assert(ofs < 0x100000);
1692 *loadl |= (ofs >> 2) << 5;
1695 static void do_dirty_stub_emit_literals(u_int *loadlps)
1697 set_loadlp(&loadlps[0], out);
1698 output_w64((uintptr_t)source);
1699 set_loadlp(&loadlps[1], out);
1700 output_w64((uintptr_t)copy);
1703 static void *do_dirty_stub(int i)
1705 assem_debug("do_dirty_stub %x\n",start+i*4);
1706 u_int *loadlps = (void *)out;
1707 do_dirty_stub_base(start + i*4);
1711 entry = instr_addr[i];
1712 emit_jmp(instr_addr[i]);
1713 do_dirty_stub_emit_literals(loadlps);
1717 static void do_dirty_stub_ds(void)
1719 u_int *loadlps = (void *)out;
1720 do_dirty_stub_base(start + 1);
1721 void *lit_jumpover = out;
1722 emit_jmp(out + 8*2);
1723 do_dirty_stub_emit_literals(loadlps);
1724 set_jump_target(lit_jumpover, out);
1727 static uint64_t get_from_ldr_literal(const u_int *i)
1730 assert((i[0] & 0xff000000) == 0x58000000);
1733 return *(uint64_t *)(i + ofs);
1736 static uint64_t get_from_movz(const u_int *i)
1738 assert((i[0] & 0x7fe00000) == 0x52800000);
1739 return (i[0] >> 5) & 0xffff;
1742 // Find the "clean" entry point from a "dirty" entry point
1743 // by skipping past the call to verify_code
1744 static void *get_clean_addr(u_int *addr)
1746 assert_dirty_stub(addr);
1750 static int verify_dirty(const u_int *ptr)
1752 const void *source, *copy;
1754 assert_dirty_stub(ptr);
1755 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1756 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1757 len = get_from_movz(&ptr[2]); // movz w3, #slen*4
1758 return !memcmp(source, copy, len);
1761 static int isclean(void *addr)
1763 const u_int *ptr = addr;
1764 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1765 assert_dirty_stub(ptr);
1771 // get source that block at addr was compiled from (host pointers)
1772 static void get_bounds(void *addr, u_char **start, u_char **end)
1774 const u_int *ptr = addr;
1775 assert_dirty_stub(ptr);
1776 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1777 *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4
1782 static void c2op_prologue(u_int op,u_int reglist)
1784 save_load_regs_all(1, reglist);
1787 emit_call(pcnt_gte_start);
1789 // pointer to cop2 regs
1790 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1793 static void c2op_epilogue(u_int op,u_int reglist)
1797 emit_call(pcnt_gte_end);
1799 save_load_regs_all(0, reglist);
1802 static void c2op_assemble(int i,struct regstat *i_regs)
1804 u_int c2op=source[i]&0x3f;
1805 u_int hr,reglist_full=0,reglist;
1806 int need_flags,need_ir;
1807 for(hr=0;hr<HOST_REGS;hr++) {
1808 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1810 reglist=reglist_full&CALLER_SAVE_REGS;
1812 if (gte_handlers[c2op]!=NULL) {
1813 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1814 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1815 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1816 source[i],gte_unneeded[i+1],need_flags,need_ir);
1817 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
1819 //int shift = (source[i] >> 19) & 1;
1820 //int lm = (source[i] >> 10) & 1;
1824 c2op_prologue(c2op,reglist);
1825 emit_movimm(source[i],1); // opcode
1826 emit_writeword(1,&psxRegs.code);
1827 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1830 c2op_epilogue(c2op,reglist);
1834 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1836 //value = value & 0x7ffff000;
1837 //if (value & 0x7f87e000) value |= 0x80000000;
1838 emit_andimm(sl, 0x7fffe000, temp);
1839 emit_testimm(temp, 0xff87ffff);
1840 emit_andimm(sl, 0x7ffff000, temp);
1841 host_tempreg_acquire();
1842 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1843 emit_cmovne_reg(HOST_TEMPREG, temp);
1844 host_tempreg_release();
1845 assert(0); // testing needed
1848 static void do_mfc2_31_one(u_int copr,signed char temp)
1850 emit_readshword(®_cop2d[copr],temp);
1851 emit_bicsar_imm(temp,31,temp);
1852 emit_cmpimm(temp,0xf80);
1853 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1854 emit_andimm(temp,0xf80,temp);
1857 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1860 host_tempreg_acquire();
1861 temp = HOST_TEMPREG;
1863 do_mfc2_31_one(9,temp);
1864 emit_shrimm(temp,7,tl);
1865 do_mfc2_31_one(10,temp);
1866 emit_orrshr_imm(temp,2,tl);
1867 do_mfc2_31_one(11,temp);
1868 emit_orrshl_imm(temp,3,tl);
1869 emit_writeword(tl,®_cop2d[29]);
1871 if (temp == HOST_TEMPREG)
1872 host_tempreg_release();
1875 static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1888 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1889 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1890 signed char hi=get_reg(i_regs->regmap,HIREG);
1891 signed char lo=get_reg(i_regs->regmap,LOREG);
1897 if(opcode2[i]==0x18) // MULT
1898 emit_smull(m1,m2,hi);
1900 emit_umull(m1,m2,hi);
1903 emit_shrimm64(hi,32,hi);
1909 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1910 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1911 signed char quotient=get_reg(i_regs->regmap,LOREG);
1912 signed char remainder=get_reg(i_regs->regmap,HIREG);
1913 assert(numerator>=0);
1914 assert(denominator>=0);
1915 assert(quotient>=0);
1916 assert(remainder>=0);
1918 if (opcode2[i] == 0x1A) // DIV
1919 emit_sdiv(numerator,denominator,quotient);
1921 emit_udiv(numerator,denominator,quotient);
1922 emit_msub(quotient,denominator,numerator,remainder);
1924 // div 0 quotient (remainder is already correct)
1925 host_tempreg_acquire();
1926 if (opcode2[i] == 0x1A) // DIV
1927 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1929 emit_movimm(~0,HOST_TEMPREG);
1930 emit_test(denominator,denominator);
1931 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1932 host_tempreg_release();
1941 signed char hr=get_reg(i_regs->regmap,HIREG);
1942 signed char lr=get_reg(i_regs->regmap,LOREG);
1943 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1946 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1947 assert(numerator >= 0);
1949 emit_mov(numerator,hr);
1951 if (opcode2[i] == 0x1A) // DIV
1952 emit_sub_asrimm(0,numerator,31,lr);
1958 if (hr >= 0) emit_zeroreg(hr);
1959 if (lr >= 0) emit_movimm(~0,lr);
1964 // Multiply by zero is zero.
1965 if (hr >= 0) emit_zeroreg(hr);
1966 if (lr >= 0) emit_zeroreg(lr);
1970 #define multdiv_assemble multdiv_assemble_arm64
1972 static void do_jump_vaddr(u_int rs)
1976 emit_call(get_addr_ht);
1980 static void do_preload_rhash(u_int r) {
1981 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1982 // register. On ARM the hash can be done with a single instruction (below)
1985 static void do_preload_rhtbl(u_int ht) {
1986 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1989 static void do_rhash(u_int rs,u_int rh) {
1990 emit_andimm(rs, 0xf8, rh);
1993 static void do_miniht_load(int ht, u_int rh) {
1994 emit_add64(ht, rh, ht);
1995 emit_ldst(0, 0, rh, ht, 0);
1998 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2004 set_jump_target(jaddr, out);
2005 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2006 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2010 // parsed by set_jump_target?
2011 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2012 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2013 emit_movk(return_address&0xffff,rt);
2014 add_to_linker(out,return_address,1);
2016 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2017 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2020 static void mark_clear_cache(void *target)
2022 u_long offset = (u_char *)target - translation_cache;
2023 u_int mask = 1u << ((offset >> 12) & 31);
2024 if (!(needs_clear_cache[offset >> 17] & mask)) {
2025 char *start = (char *)((u_long)target & ~4095ul);
2026 start_tcache_write(start, start + 4096);
2027 needs_clear_cache[offset >> 17] |= mask;
2031 // Clearing the cache is rather slow on ARM Linux, so mark the areas
2032 // that need to be cleared, and then only clear these areas once.
2033 static void do_clear_cache()
2036 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2038 u_int bitmap=needs_clear_cache[i];
2040 u_char *start, *end;
2044 start=translation_cache+i*131072+j*4096;
2052 end_tcache_write(start, end);
2058 needs_clear_cache[i]=0;
2063 // CPU-architecture-specific initialization
2064 static void arch_init() {
2067 // vim:shiftwidth=2:expandtab