1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define CALLER_SAVE_REGS 0x0007ffff
28 #define unused __attribute__((unused))
30 void do_memhandler_pre();
31 void do_memhandler_post();
34 static void set_jump_target(void *addr, void *target)
37 intptr_t offset = (u_char *)target - (u_char *)addr;
39 if ((*ptr&0xFC000000) == 0x14000000) { // b
40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_jump_out)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
49 assert(-1048576 <= offset && offset < 1048576);
50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
52 else if((*ptr&0x9f000000)==0x10000000) { // adr
53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
58 abort(); // should not happen
61 // from a pointer to external jump stub (which was produced by emit_extjump2)
62 // find where the jumping insn is
63 static void *find_extjump_insn(void *stub)
65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
71 // find where external branch is liked to using addr of it's stub:
72 // get address that the stub loads (dyna_linker arg1),
73 // treat it as a pointer to branch insn,
74 // return addr where that branch jumps to
75 static void *get_pointer(void *stub)
77 int *i_ptr = find_extjump_insn(stub);
78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
87 // Allocate a specific ARM register.
88 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
108 // Alloc cycle count into dedicated register
109 static void alloc_cc(struct regstat *cur,int i)
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
119 static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
126 static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
138 static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
143 static void output_w32(u_int word)
145 *((u_int *)out) = word;
149 static void output_w64(uint64_t dword)
151 *((uint64_t *)out) = dword;
156 static u_int rm_rd(u_int rm, u_int rd)
160 return (rm << 16) | rd;
164 static u_int rn_rd(u_int rn, u_int rd)
168 return (rn << 5) | rd;
171 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
176 return (rm << 16) | (rn << 5) | rd;
179 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
185 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
194 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
200 static u_int imm16_rd(u_int imm16, u_int rd)
202 assert(imm16 < 0x10000);
204 return (imm16 << 5) | rd;
207 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
209 assert(imm12 < 0x1000);
212 return (imm12 << 10) | (rn << 5) | rd;
215 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
217 assert(imm9 < 0x200);
220 return (imm9 << 12) | (rn << 5) | rd;
223 static u_int imm19_rt(u_int imm19, u_int rt)
225 assert(imm19 < 0x80000);
227 return (imm19 << 5) | rt;
230 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
240 static u_int genjmp(const u_char *addr)
242 intptr_t offset = addr - out;
243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
244 if (offset < -134217728 || offset > 134217727) {
245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
249 return ((u_int)offset >> 2) & 0x03ffffff;
252 static u_int genjmpcc(const u_char *addr)
254 intptr_t offset = addr - out;
255 if ((uintptr_t)addr < 3) return 0;
256 if (offset < -1048576 || offset > 1048572) {
257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
261 return ((u_int)offset >> 2) & 0x7ffff;
264 static uint32_t is_mask(u_int value)
266 return value && ((value + 1) & value) == 0;
269 // This function returns true if the argument contains a
270 // non-empty sequence of ones (possibly rotated) with the remainder zero.
271 static uint32_t is_rotated_mask(u_int value)
273 if (value == 0 || value == ~0)
275 if (is_mask((value - 1) | value))
277 return is_mask((~value - 1) | ~value);
280 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
282 int lzeros, tzeros, ones;
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
304 static void emit_mov(u_int rs, u_int rt)
306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
310 static void emit_mov64(u_int rs, u_int rt)
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
316 static void emit_add(u_int rs1, u_int rs2, u_int rt)
318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
322 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
328 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
334 static void emit_neg(u_int rs, u_int rt)
336 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
337 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
340 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
342 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
343 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
346 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
348 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
349 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
352 static void emit_movz(u_int imm, u_int rt)
354 assem_debug("movz %s,#%#x\n", regname[rt], imm);
355 output_w32(0x52800000 | imm16_rd(imm, rt));
358 static void emit_movz_lsl16(u_int imm, u_int rt)
360 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x52a00000 | imm16_rd(imm, rt));
364 static void emit_movn(u_int imm, u_int rt)
366 assem_debug("movn %s,#%#x\n", regname[rt], imm);
367 output_w32(0x12800000 | imm16_rd(imm, rt));
370 static void emit_movn_lsl16(u_int imm,u_int rt)
372 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
373 output_w32(0x12a00000 | imm16_rd(imm, rt));
376 static void emit_movk(u_int imm,u_int rt)
378 assem_debug("movk %s,#%#x\n", regname[rt], imm);
379 output_w32(0x72800000 | imm16_rd(imm, rt));
382 static void emit_movk_lsl16(u_int imm,u_int rt)
385 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
386 output_w32(0x72a00000 | imm16_rd(imm, rt));
389 static void emit_zeroreg(u_int rt)
394 static void emit_movimm(u_int imm, u_int rt)
398 else if ((~imm) < 65536)
400 else if ((imm&0xffff) == 0)
401 emit_movz_lsl16(imm >> 16, rt);
402 else if (((~imm)&0xffff) == 0)
403 emit_movn_lsl16(~imm >> 16, rt);
404 else if (is_rotated_mask(imm)) {
406 gen_logical_imm(imm, &immr, &imms);
407 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
408 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
411 emit_movz(imm & 0xffff, rt);
412 emit_movk_lsl16(imm >> 16, rt);
416 static void emit_readword(void *addr, u_int rt)
418 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
419 if (!(offset & 3) && offset <= 16380) {
420 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
421 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
427 static void emit_readdword(void *addr, u_int rt)
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 7) && offset <= 32760) {
431 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
432 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 static void emit_readshword(void *addr, u_int rt)
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
449 static void emit_loadreg(u_int r, u_int hr)
456 void *addr = &psxRegs.GPR.r[r];
458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
462 case INVCP: addr = &invc_ptr; is64 = 1; break;
463 default: assert(r < 34); break;
466 emit_readdword(addr, hr);
468 emit_readword(addr, hr);
472 static void emit_writeword(u_int rt, void *addr)
474 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
475 if (!(offset & 3) && offset <= 16380) {
476 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
477 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
483 static void emit_writedword(u_int rt, void *addr)
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 7) && offset <= 32760) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
488 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
494 static void emit_storereg(u_int r, u_int hr)
497 void *addr = &psxRegs.GPR.r[r];
499 //case HIREG: addr = &hi; break;
500 //case LOREG: addr = &lo; break;
501 case CCREG: addr = &cycle_count; break;
502 default: assert(r < 34); break;
504 emit_writeword(hr, addr);
507 static void emit_test(u_int rs, u_int rt)
509 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
510 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
513 static void emit_testimm(u_int rs, u_int imm)
516 assem_debug("tst %s,#%#x\n", regname[rs], imm);
517 assert(is_rotated_mask(imm)); // good enough for PCSX
518 gen_logical_imm(imm, &immr, &imms);
519 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
522 static void emit_not(u_int rs,u_int rt)
524 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
525 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
528 static void emit_and(u_int rs1,u_int rs2,u_int rt)
530 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
531 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
534 static void emit_or(u_int rs1,u_int rs2,u_int rt)
536 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
537 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
540 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
542 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
543 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
546 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
548 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
549 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
552 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
554 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
555 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
558 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
560 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
561 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
564 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
566 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
567 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
570 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
572 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
573 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
576 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
578 unused const char *st = s ? "s" : "";
579 s = s ? 0x20000000 : 0;
580 is64 = is64 ? 0x80000000 : 0;
582 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
583 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
585 else if (-imm < 4096) {
586 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
587 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
589 else if (imm < 16777216) {
590 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
591 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
597 else if (-imm < 16777216) {
598 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
599 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
602 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
609 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
611 emit_addimm_s(0, 0, rs, imm, rt);
614 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
616 emit_addimm_s(0, 1, rs, imm, rt);
619 static void emit_addimm_and_set_flags(int imm, u_int rt)
621 emit_addimm_s(1, 0, rt, imm, rt);
624 static void emit_addimm_no_flags(u_int imm,u_int rt)
626 emit_addimm(rt,imm,rt);
629 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
631 const char *names[] = { "and", "orr", "eor", "ands" };
632 const char *name = names[op];
635 if (is_rotated_mask(imm)) {
636 gen_logical_imm(imm, &immr, &imms);
637 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
638 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
641 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
642 host_tempreg_acquire();
643 emit_movimm(imm, HOST_TEMPREG);
644 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
645 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
646 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
647 host_tempreg_release();
652 static void emit_andimm(u_int rs, u_int imm, u_int rt)
657 emit_logicop_imm(0, rs, imm, rt);
660 static void emit_orimm(u_int rs, u_int imm, u_int rt)
667 emit_logicop_imm(1, rs, imm, rt);
670 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
677 emit_logicop_imm(2, rs, imm, rt);
680 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
682 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
686 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
688 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
692 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
694 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
698 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
700 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
704 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
710 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
712 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
716 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
718 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
719 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
722 static void emit_signextend16(u_int rs, u_int rt)
724 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
725 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
728 static void emit_shl(u_int rs,u_int rshift,u_int rt)
730 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
731 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
734 static void emit_shr(u_int rs,u_int rshift,u_int rt)
736 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
740 static void emit_sar(u_int rs,u_int rshift,u_int rt)
742 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
746 static void emit_cmpimm(u_int rs, u_int imm)
749 assem_debug("cmp %s,%#x\n", regname[rs], imm);
750 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
752 else if (-imm < 4096) {
753 assem_debug("cmn %s,%#x\n", regname[rs], imm);
754 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
756 else if (imm < 16777216 && !(imm & 0xfff)) {
757 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
758 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
761 host_tempreg_acquire();
762 emit_movimm(imm, HOST_TEMPREG);
763 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
764 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
765 host_tempreg_release();
769 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
771 assert(imm == 0 || imm == 1);
772 assert(cond0 < 0x10);
773 assert(cond1 < 0x10);
775 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
776 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
778 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
779 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
783 static void emit_cmovne_imm(u_int imm,u_int rt)
785 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
788 static void emit_cmovl_imm(u_int imm,u_int rt)
790 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
793 static void emit_cmovb_imm(int imm,u_int rt)
795 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
798 static void emit_cmoveq_reg(u_int rs,u_int rt)
800 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
801 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
804 static void emit_cmovne_reg(u_int rs,u_int rt)
806 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
810 static void emit_cmovl_reg(u_int rs,u_int rt)
812 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
816 static void emit_cmovb_reg(u_int rs,u_int rt)
818 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
822 static void emit_cmovs_reg(u_int rs,u_int rt)
824 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
825 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
828 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
830 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
831 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
834 static void emit_slti32(u_int rs,int imm,u_int rt)
836 if(rs!=rt) emit_zeroreg(rt);
838 if(rs==rt) emit_movimm(0,rt);
839 emit_cmovl_imm(1,rt);
842 static void emit_sltiu32(u_int rs,int imm,u_int rt)
844 if(rs!=rt) emit_zeroreg(rt);
846 if(rs==rt) emit_movimm(0,rt);
847 emit_cmovb_imm(1,rt);
850 static void emit_cmp(u_int rs,u_int rt)
852 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
853 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
856 static void emit_set_gz32(u_int rs, u_int rt)
858 //assem_debug("set_gz32\n");
861 emit_cmovl_imm(0,rt);
864 static void emit_set_nz32(u_int rs, u_int rt)
866 //assem_debug("set_nz32\n");
867 if(rs!=rt) emit_mov(rs,rt);
869 emit_cmovne_imm(1,rt);
872 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
874 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
875 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
877 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
878 emit_cmovl_imm(1,rt);
881 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
883 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
884 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
886 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
887 emit_cmovb_imm(1,rt);
890 static int can_jump_or_call(const void *a)
892 intptr_t diff = (u_char *)a - out;
893 return (-134217728 <= diff && diff <= 134217727);
896 static void emit_call(const void *a)
898 intptr_t diff = (u_char *)a - out;
899 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
901 if (-134217728 <= diff && diff <= 134217727)
902 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
907 static void emit_jmp(const void *a)
909 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
910 u_int offset = genjmp(a);
911 output_w32(0x14000000 | offset);
914 static void emit_jne(const void *a)
916 assem_debug("bne %p\n", a);
917 u_int offset = genjmpcc(a);
918 output_w32(0x54000000 | (offset << 5) | COND_NE);
921 static void emit_jeq(const void *a)
923 assem_debug("beq %p\n", a);
924 u_int offset = genjmpcc(a);
925 output_w32(0x54000000 | (offset << 5) | COND_EQ);
928 static void emit_js(const void *a)
930 assem_debug("bmi %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_MI);
935 static void emit_jns(const void *a)
937 assem_debug("bpl %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_PL);
942 static void emit_jl(const void *a)
944 assem_debug("blt %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_LT);
949 static void emit_jge(const void *a)
951 assem_debug("bge %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_GE);
956 static void emit_jno(const void *a)
958 assem_debug("bvc %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_VC);
963 static void emit_jc(const void *a)
965 assem_debug("bcs %p\n", a);
966 u_int offset = genjmpcc(a);
967 output_w32(0x54000000 | (offset << 5) | COND_CS);
970 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
972 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
973 u_int offset = genjmpcc(a);
974 is64 = is64 ? 0x80000000 : 0;
975 isnz = isnz ? 0x01000000 : 0;
976 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
979 static void emit_cbz(const void *a, u_int r)
984 static void emit_jmpreg(u_int r)
986 assem_debug("br %s\n", regname64[r]);
987 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
990 static void emit_retreg(u_int r)
992 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
993 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
996 static void emit_ret(void)
1001 static void emit_adr(void *addr, u_int rt)
1003 intptr_t offset = (u_char *)addr - out;
1004 assert(-1048576 <= offset && offset < 1048576);
1006 assem_debug("adr x%d,#%#lx\n", rt, offset);
1007 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1010 static void emit_adrp(void *addr, u_int rt)
1012 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1013 assert(-4294967296l <= offset && offset < 4294967296l);
1016 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1017 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1020 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1022 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1023 assert(-256 <= offset && offset < 256);
1024 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1027 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1029 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1030 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1033 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1035 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1036 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1039 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1041 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1045 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1047 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1051 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1053 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1057 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1059 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1063 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1065 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1069 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1071 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1075 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1077 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1078 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1081 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1083 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1084 assert(-256 <= offset && offset < 256);
1085 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1088 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1090 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1091 assert(-256 <= offset && offset < 256);
1092 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1095 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1097 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1098 assert(-256 <= offset && offset < 256);
1099 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1102 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1104 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1105 assert(-256 <= offset && offset < 256);
1106 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1109 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1111 if (!(offset & 3) && (u_int)offset <= 16380) {
1112 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1113 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1115 else if (-256 <= offset && offset < 256) {
1116 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1117 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1123 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1125 if (!(offset & 1) && (u_int)offset <= 8190) {
1126 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1127 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1129 else if (-256 <= offset && offset < 256) {
1130 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1131 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1137 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1139 if ((u_int)offset < 4096) {
1140 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1141 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1143 else if (-256 <= offset && offset < 256) {
1144 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1145 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1151 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1153 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1157 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1159 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1160 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1163 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1165 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1166 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1169 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1171 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1175 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1177 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1181 static void emit_clz(u_int rs, u_int rt)
1183 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1184 output_w32(0x5ac01000 | rn_rd(rs, rt));
1187 // special case for checking invalid_code
1188 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1190 host_tempreg_acquire();
1191 emit_shrimm(r, 12, HOST_TEMPREG);
1192 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1193 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1194 emit_cmpimm(HOST_TEMPREG, imm);
1195 host_tempreg_release();
1198 // special for loadlr_assemble, rs2 is destroyed
1199 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1201 emit_shl(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
1205 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1207 emit_shr(rs2, shift, rs2);
1208 emit_bic(rs1, rs2, rt);
1211 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1213 output_w32(0x58000000 | imm19_rt(ofs, rt));
1216 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1218 u_int op = 0xb9000000;
1219 unused const char *ldst = is_st ? "st" : "ld";
1220 unused char rp = is64 ? 'x' : 'w';
1221 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1222 is64 = is64 ? 1 : 0;
1223 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1224 ofs = (ofs >> (2+is64));
1225 if (!is_st) op |= 0x00400000;
1226 if (is64) op |= 0x40000000;
1227 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1230 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1232 u_int op = 0x29000000;
1233 unused const char *ldst = is_st ? "st" : "ld";
1234 unused char rp = is64 ? 'x' : 'w';
1235 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1236 is64 = is64 ? 1 : 0;
1237 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1238 ofs = (ofs >> (2+is64));
1239 assert(-64 <= ofs && ofs <= 63);
1241 if (!is_st) op |= 0x00400000;
1242 if (is64) op |= 0x80000000;
1243 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1246 static void save_load_regs_all(int is_store, u_int reglist)
1250 for (r = 0; reglist; r++, reglist >>= 1) {
1254 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1260 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1263 assert(ofs <= SSP_CALLER_REGS);
1266 // Save registers before function call
1267 static void save_regs(u_int reglist)
1269 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1270 save_load_regs_all(1, reglist);
1273 // Restore registers after function call
1274 static void restore_regs(u_int reglist)
1276 reglist &= CALLER_SAVE_REGS;
1277 save_load_regs_all(0, reglist);
1280 /* Stubs/epilogue */
1282 static void literal_pool(int n)
1287 static void literal_pool_jumpover(int n)
1291 // parsed by get_pointer, find_extjump_insn
1292 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1294 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1296 emit_movz(target & 0xffff, 0);
1297 emit_movk_lsl16(target >> 16, 0);
1299 // addr is in the current recompiled block (max 256k)
1300 // offset shouldn't exceed +/-1MB
1302 emit_far_jump(linker);
1305 static void check_extjump2(void *src)
1308 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1312 // put rt_val into rt, potentially making use of rs with value rs_val
1313 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1315 int diff = rt_val - rs_val;
1316 if ((-4096 < diff && diff < 4096)
1317 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1318 emit_addimm(rs, diff, rt);
1319 else if (rt_val == ~rs_val)
1321 else if (is_rotated_mask(rs_val ^ rt_val))
1322 emit_xorimm(rs, rs_val ^ rt_val, rt);
1324 emit_movimm(rt_val, rt);
1327 // return 1 if the above function can do it's job cheaply
1328 static int is_similar_value(u_int v1, u_int v2)
1331 return (-4096 < diff && diff < 4096)
1332 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1334 || is_rotated_mask(v1 ^ v2);
1338 static void pass_args64(u_int a0, u_int a1)
1342 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1344 else if(a0!=0&&a1==0) {
1346 if (a0>=0) emit_mov64(a0,0);
1349 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1350 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1354 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1357 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1359 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1360 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1362 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1364 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1369 #include "pcsxmem.h"
1370 //#include "pcsxmem_inline.c"
1372 static void do_readstub(int n)
1374 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1375 set_jump_target(stubs[n].addr, out);
1376 enum stub_type type = stubs[n].type;
1378 int rs = stubs[n].b;
1379 const struct regstat *i_regs = (void *)stubs[n].c;
1380 u_int reglist = stubs[n].e;
1381 const signed char *i_regmap = i_regs->regmap;
1383 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1384 rt=get_reg(i_regmap,FTEMP);
1386 rt=get_reg(i_regmap,rt1[i]);
1389 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1390 void *restore_jump = NULL, *handler_jump = NULL;
1392 for (r = 0; r < HOST_CCREG; r++) {
1393 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1398 if(rt>=0&&rt1[i]!=0)
1405 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1407 emit_readdword(&mem_rtab,temp);
1408 emit_shrimm(rs,12,temp2);
1409 emit_readdword_dualindexedx8(temp,temp2,temp2);
1410 emit_adds64(temp2,temp2,temp2);
1413 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1415 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1416 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1417 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1418 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1419 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1425 emit_jmp(0); // jump to reg restore
1428 emit_jmp(stubs[n].retaddr); // return address
1429 set_jump_target(handler_jump, out);
1434 if(type==LOADB_STUB||type==LOADBU_STUB)
1435 handler=jump_handler_read8;
1436 if(type==LOADH_STUB||type==LOADHU_STUB)
1437 handler=jump_handler_read16;
1438 if(type==LOADW_STUB)
1439 handler=jump_handler_read32;
1441 pass_args64(rs,temp2);
1442 int cc=get_reg(i_regmap,CCREG);
1444 emit_loadreg(CCREG,2);
1445 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1446 emit_far_call(handler);
1447 // (no cycle reload after read)
1448 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1449 loadstore_extend(type,0,rt);
1452 set_jump_target(restore_jump, out);
1453 restore_regs(reglist);
1454 emit_jmp(stubs[n].retaddr);
1457 static void inline_readstub(enum stub_type type, int i, u_int addr,
1458 const signed char regmap[], int target, int adj, u_int reglist)
1460 int rs=get_reg(regmap,target);
1461 int rt=get_reg(regmap,target);
1462 if(rs<0) rs=get_reg(regmap,-1);
1465 uintptr_t host_addr = 0;
1467 int cc=get_reg(regmap,CCREG);
1468 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1470 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1471 if (handler == NULL) {
1474 if (addr != host_addr) {
1475 if (host_addr >= 0x100000000ull)
1476 abort(); // ROREG not implemented
1477 emit_movimm_from(addr, rs, host_addr, rs);
1480 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1481 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1482 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1483 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1484 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1489 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1491 if(type==LOADB_STUB||type==LOADBU_STUB)
1492 handler=jump_handler_read8;
1493 if(type==LOADH_STUB||type==LOADHU_STUB)
1494 handler=jump_handler_read16;
1495 if(type==LOADW_STUB)
1496 handler=jump_handler_read32;
1499 // call a memhandler
1500 if(rt>=0&&rt1[i]!=0)
1504 emit_movimm(addr,0);
1508 emit_loadreg(CCREG,2);
1509 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1511 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1512 emit_adrp((void *)l1, 1);
1513 emit_addimm64(1, l1 & 0xfff, 1);
1516 emit_far_call(do_memhandler_pre);
1518 emit_far_call(handler);
1520 // (no cycle reload after read)
1521 if(rt>=0&&rt1[i]!=0)
1522 loadstore_extend(type, 0, rt);
1523 restore_regs(reglist);
1526 static void do_writestub(int n)
1528 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1529 set_jump_target(stubs[n].addr, out);
1530 enum stub_type type=stubs[n].type;
1533 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1534 u_int reglist=stubs[n].e;
1535 signed char *i_regmap=i_regs->regmap;
1537 if(itype[i]==C1LS||itype[i]==C2LS) {
1538 rt=get_reg(i_regmap,r=FTEMP);
1540 rt=get_reg(i_regmap,r=rs2[i]);
1544 int rtmp,temp=-1,temp2,regs_saved=0;
1545 void *restore_jump = NULL, *handler_jump = NULL;
1546 int reglist2=reglist|(1<<rs)|(1<<rt);
1547 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1548 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1556 for(rtmp=0;rtmp<=3;rtmp++)
1557 if(rtmp!=rs&&rtmp!=rt)
1560 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1563 host_tempreg_acquire();
1566 emit_readdword(&mem_wtab,temp);
1567 emit_shrimm(rs,12,temp2);
1568 emit_readdword_dualindexedx8(temp,temp2,temp2);
1569 emit_adds64(temp2,temp2,temp2);
1573 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1574 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1575 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1580 emit_jmp(0); // jump to reg restore
1583 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1584 set_jump_target(handler_jump, out);
1586 // TODO FIXME: regalloc should prefer callee-saved regs
1591 case STOREB_STUB: handler=jump_handler_write8; break;
1592 case STOREH_STUB: handler=jump_handler_write16; break;
1593 case STOREW_STUB: handler=jump_handler_write32; break;
1599 emit_mov64(temp2,3);
1600 host_tempreg_release();
1602 int cc=get_reg(i_regmap,CCREG);
1604 emit_loadreg(CCREG,2);
1605 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1606 // returns new cycle_count
1607 emit_far_call(handler);
1608 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1610 emit_storereg(CCREG,2);
1612 set_jump_target(restore_jump, out);
1613 restore_regs(reglist);
1614 emit_jmp(stubs[n].retaddr);
1617 static void inline_writestub(enum stub_type type, int i, u_int addr,
1618 const signed char regmap[], int target, int adj, u_int reglist)
1620 int rs = get_reg(regmap,-1);
1621 int rt = get_reg(regmap,target);
1624 uintptr_t host_addr = 0;
1625 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1626 if (handler == NULL) {
1627 if (addr != host_addr) {
1628 if (host_addr >= 0x100000000ull)
1629 abort(); // ROREG not implemented
1630 emit_movimm_from(addr, rs, host_addr, rs);
1633 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1634 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1635 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1641 // call a memhandler
1643 emit_writeword(rs, &address); // some handlers still need it
1644 loadstore_extend(type, rt, 0);
1646 cc = cc_use = get_reg(regmap, CCREG);
1648 emit_loadreg(CCREG, (cc_use = 2));
1649 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
1651 emit_far_call(do_memhandler_pre);
1652 emit_far_call(handler);
1653 emit_far_call(do_memhandler_post);
1654 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
1656 emit_storereg(CCREG, cc_use);
1657 restore_regs(reglist);
1660 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1662 int ret = memcmp(source, copy, size);
1663 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1667 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1668 static void do_dirty_stub_base(u_int vaddr, u_int source_len)
1670 assert(source_len <= MAXBLOCK*4);
1671 emit_loadlp_ofs(0, 0); // ldr x1, source
1672 emit_loadlp_ofs(0, 1); // ldr x2, copy
1673 emit_movz(source_len, 2);
1674 emit_far_call(verify_code_arm64);
1677 emit_movz(vaddr & 0xffff, 0);
1678 emit_movk_lsl16(vaddr >> 16, 0);
1679 emit_far_call(get_addr);
1681 set_jump_target(jmp, out);
1684 static void assert_dirty_stub(const u_int *ptr)
1686 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1687 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1688 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
1689 assert( ptr[8] == 0xd61f0000); // br x0
1692 static void set_loadlp(u_int *loadl, void *lit)
1694 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1695 assert((*loadl & ~0x1f) == 0x58000000);
1696 assert((ofs & 3) == 0);
1697 assert(ofs < 0x100000);
1698 *loadl |= (ofs >> 2) << 5;
1701 static void do_dirty_stub_emit_literals(u_int *loadlps)
1703 set_loadlp(&loadlps[0], out);
1704 output_w64((uintptr_t)source);
1705 set_loadlp(&loadlps[1], out);
1706 output_w64((uintptr_t)copy);
1709 static void *do_dirty_stub(int i, u_int source_len)
1711 assem_debug("do_dirty_stub %x\n",start+i*4);
1712 u_int *loadlps = (void *)out;
1713 do_dirty_stub_base(start + i*4, source_len);
1717 entry = instr_addr[i];
1718 emit_jmp(instr_addr[i]);
1719 do_dirty_stub_emit_literals(loadlps);
1723 static void do_dirty_stub_ds(u_int source_len)
1725 u_int *loadlps = (void *)out;
1726 do_dirty_stub_base(start + 1, source_len);
1727 void *lit_jumpover = out;
1728 emit_jmp(out + 8*2);
1729 do_dirty_stub_emit_literals(loadlps);
1730 set_jump_target(lit_jumpover, out);
1733 static uint64_t get_from_ldr_literal(const u_int *i)
1736 assert((i[0] & 0xff000000) == 0x58000000);
1739 return *(uint64_t *)(i + ofs);
1742 static uint64_t get_from_movz(const u_int *i)
1744 assert((i[0] & 0x7fe00000) == 0x52800000);
1745 return (i[0] >> 5) & 0xffff;
1748 // Find the "clean" entry point from a "dirty" entry point
1749 // by skipping past the call to verify_code
1750 static void *get_clean_addr(u_int *addr)
1752 assert_dirty_stub(addr);
1756 static int verify_dirty(const u_int *ptr)
1758 const void *source, *copy;
1760 assert_dirty_stub(ptr);
1761 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1762 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1763 len = get_from_movz(&ptr[2]); // movz w3, #source_len
1764 return !memcmp(source, copy, len);
1767 static int isclean(void *addr)
1769 const u_int *ptr = addr;
1770 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1771 assert_dirty_stub(ptr);
1777 // get source that block at addr was compiled from (host pointers)
1778 static void get_bounds(void *addr, u_char **start, u_char **end)
1780 const u_int *ptr = addr;
1781 assert_dirty_stub(ptr);
1782 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1783 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
1788 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1790 save_load_regs_all(1, reglist);
1791 cop2_do_stall_check(op, i, i_regs, 0);
1794 emit_far_call(pcnt_gte_start);
1796 // pointer to cop2 regs
1797 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1800 static void c2op_epilogue(u_int op,u_int reglist)
1804 emit_far_call(pcnt_gte_end);
1806 save_load_regs_all(0, reglist);
1809 static void c2op_assemble(int i, const struct regstat *i_regs)
1811 u_int c2op=source[i]&0x3f;
1812 u_int hr,reglist_full=0,reglist;
1813 int need_flags,need_ir;
1814 for(hr=0;hr<HOST_REGS;hr++) {
1815 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1817 reglist=reglist_full&CALLER_SAVE_REGS;
1819 if (gte_handlers[c2op]!=NULL) {
1820 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1821 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1822 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1823 source[i],gte_unneeded[i+1],need_flags,need_ir);
1824 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1826 //int shift = (source[i] >> 19) & 1;
1827 //int lm = (source[i] >> 10) & 1;
1831 c2op_prologue(c2op, i, i_regs, reglist);
1832 emit_movimm(source[i],1); // opcode
1833 emit_writeword(1,&psxRegs.code);
1834 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1837 c2op_epilogue(c2op,reglist);
1841 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1843 //value = value & 0x7ffff000;
1844 //if (value & 0x7f87e000) value |= 0x80000000;
1845 emit_andimm(sl, 0x7fffe000, temp);
1846 emit_testimm(temp, 0xff87ffff);
1847 emit_andimm(sl, 0x7ffff000, temp);
1848 host_tempreg_acquire();
1849 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1850 emit_cmovne_reg(HOST_TEMPREG, temp);
1851 host_tempreg_release();
1852 assert(0); // testing needed
1855 static void do_mfc2_31_one(u_int copr,signed char temp)
1857 emit_readshword(®_cop2d[copr],temp);
1858 emit_bicsar_imm(temp,31,temp);
1859 emit_cmpimm(temp,0xf80);
1860 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1861 emit_andimm(temp,0xf80,temp);
1864 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1867 host_tempreg_acquire();
1868 temp = HOST_TEMPREG;
1870 do_mfc2_31_one(9,temp);
1871 emit_shrimm(temp,7,tl);
1872 do_mfc2_31_one(10,temp);
1873 emit_orrshr_imm(temp,2,tl);
1874 do_mfc2_31_one(11,temp);
1875 emit_orrshl_imm(temp,3,tl);
1876 emit_writeword(tl,®_cop2d[29]);
1878 if (temp == HOST_TEMPREG)
1879 host_tempreg_release();
1882 static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1895 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1896 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1897 signed char hi=get_reg(i_regs->regmap,HIREG);
1898 signed char lo=get_reg(i_regs->regmap,LOREG);
1904 if(opcode2[i]==0x18) // MULT
1905 emit_smull(m1,m2,hi);
1907 emit_umull(m1,m2,hi);
1910 emit_shrimm64(hi,32,hi);
1916 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1917 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1918 signed char quotient=get_reg(i_regs->regmap,LOREG);
1919 signed char remainder=get_reg(i_regs->regmap,HIREG);
1920 assert(numerator>=0);
1921 assert(denominator>=0);
1922 assert(quotient>=0);
1923 assert(remainder>=0);
1925 if (opcode2[i] == 0x1A) // DIV
1926 emit_sdiv(numerator,denominator,quotient);
1928 emit_udiv(numerator,denominator,quotient);
1929 emit_msub(quotient,denominator,numerator,remainder);
1931 // div 0 quotient (remainder is already correct)
1932 host_tempreg_acquire();
1933 if (opcode2[i] == 0x1A) // DIV
1934 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1936 emit_movimm(~0,HOST_TEMPREG);
1937 emit_test(denominator,denominator);
1938 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1939 host_tempreg_release();
1948 signed char hr=get_reg(i_regs->regmap,HIREG);
1949 signed char lr=get_reg(i_regs->regmap,LOREG);
1950 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1953 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1954 assert(numerator >= 0);
1956 emit_mov(numerator,hr);
1958 if (opcode2[i] == 0x1A) // DIV
1959 emit_sub_asrimm(0,numerator,31,lr);
1965 if (hr >= 0) emit_zeroreg(hr);
1966 if (lr >= 0) emit_movimm(~0,lr);
1971 // Multiply by zero is zero.
1972 if (hr >= 0) emit_zeroreg(hr);
1973 if (lr >= 0) emit_zeroreg(lr);
1977 #define multdiv_assemble multdiv_assemble_arm64
1979 static void do_jump_vaddr(u_int rs)
1983 emit_far_call(get_addr_ht);
1987 static void do_preload_rhash(u_int r) {
1988 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1989 // register. On ARM the hash can be done with a single instruction (below)
1992 static void do_preload_rhtbl(u_int ht) {
1993 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1996 static void do_rhash(u_int rs,u_int rh) {
1997 emit_andimm(rs, 0xf8, rh);
2000 static void do_miniht_load(int ht, u_int rh) {
2001 emit_add64(ht, rh, ht);
2002 emit_ldst(0, 0, rh, ht, 0);
2005 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2011 set_jump_target(jaddr, out);
2012 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2013 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2017 // parsed by set_jump_target?
2018 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2019 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2020 emit_movk(return_address&0xffff,rt);
2021 add_to_linker(out,return_address,1);
2023 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2024 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2027 static void clear_cache_arm64(char *start, char *end)
2029 // Don't rely on GCC's __clear_cache implementation, as it caches
2030 // icache/dcache cache line sizes, that can vary between cores on
2031 // big.LITTLE architectures.
2032 uint64_t addr, ctr_el0;
2033 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2034 size_t isize, dsize;
2036 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2037 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2038 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2040 // use the global minimum cache line size
2041 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2042 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2044 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2045 not required for instruction to data coherence. */
2046 if ((ctr_el0 & (1 << 28)) == 0x0) {
2047 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2048 for (; addr < (uint64_t)end; addr += dsize)
2049 // use "civac" instead of "cvau", as this is the suggested workaround for
2050 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2051 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2053 __asm__ volatile("dsb ish" : : : "memory");
2055 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2056 Unification is not required for instruction to data coherence. */
2057 if ((ctr_el0 & (1 << 29)) == 0x0) {
2058 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2059 for (; addr < (uint64_t)end; addr += isize)
2060 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2062 __asm__ volatile("dsb ish" : : : "memory");
2065 __asm__ volatile("isb" : : : "memory");
2068 // CPU-architecture-specific initialization
2069 static void arch_init(void)
2071 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2072 struct tramp_insns *ops = ndrc->tramp.ops;
2074 assert(!(diff & 3));
2075 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2076 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2077 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2078 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2080 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2083 // vim:shiftwidth=2:expandtab