1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define unused __attribute__((unused))
28 void do_memhandler_pre();
29 void do_memhandler_post();
32 static void set_jump_target(void *addr, void *target)
35 intptr_t offset = (u_char *)target - (u_char *)addr;
37 if ((*ptr&0xFC000000) == 0x14000000) { // b
38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
45 // should only happen when jumping to an already compiled block (see add_jump_out)
46 // a workaround would be to do a trampoline jump via a stub at the end of the block
47 assert(-1048576 <= offset && offset < 1048576);
48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
50 else if((*ptr&0x9f000000)==0x10000000) { // adr
51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 abort(); // should not happen
59 // from a pointer to external jump stub (which was produced by emit_extjump2)
60 // find where the jumping insn is
61 static void *find_extjump_insn(void *stub)
63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
69 // find where external branch is liked to using addr of it's stub:
70 // get address that the stub loads (dyna_linker arg1),
71 // treat it as a pointer to branch insn,
72 // return addr where that branch jumps to
73 static void *get_pointer(void *stub)
75 int *i_ptr = find_extjump_insn(stub);
76 if ((*i_ptr&0xfc000000) == 0x14000000) // b
77 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
78 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
79 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
80 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
85 // Allocate a specific ARM register.
86 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
106 // Alloc cycle count into dedicated register
107 static void alloc_cc(struct regstat *cur,int i)
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
117 static unused const char *regname[32] = {
118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124 static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136 static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141 static void output_w32(u_int word)
143 *((u_int *)out) = word;
147 static void output_w64(uint64_t dword)
149 *((uint64_t *)out) = dword;
154 static u_int rm_rd(u_int rm, u_int rd)
158 return (rm << 16) | rd;
162 static u_int rn_rd(u_int rn, u_int rd)
166 return (rn << 5) | rd;
169 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
174 return (rm << 16) | (rn << 5) | rd;
177 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
189 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198 static u_int imm16_rd(u_int imm16, u_int rd)
200 assert(imm16 < 0x10000);
202 return (imm16 << 5) | rd;
205 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
207 assert(imm12 < 0x1000);
210 return (imm12 << 10) | (rn << 5) | rd;
213 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
215 assert(imm9 < 0x200);
218 return (imm9 << 12) | (rn << 5) | rd;
221 static u_int imm19_rt(u_int imm19, u_int rt)
223 assert(imm19 < 0x80000);
225 return (imm19 << 5) | rt;
228 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
235 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238 static u_int genjmp(const u_char *addr)
240 intptr_t offset = addr - out;
241 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
242 if (offset < -134217728 || offset > 134217727) {
243 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
247 return ((u_int)offset >> 2) & 0x03ffffff;
250 static u_int genjmpcc(const u_char *addr)
252 intptr_t offset = addr - out;
253 if ((uintptr_t)addr < 3) return 0;
254 if (offset < -1048576 || offset > 1048572) {
255 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
259 return ((u_int)offset >> 2) & 0x7ffff;
262 static uint32_t is_mask(u_int value)
264 return value && ((value + 1) & value) == 0;
267 // This function returns true if the argument contains a
268 // non-empty sequence of ones (possibly rotated) with the remainder zero.
269 static uint32_t is_rotated_mask(u_int value)
271 if (value == 0 || value == ~0)
273 if (is_mask((value - 1) | value))
275 return is_mask((~value - 1) | ~value);
278 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
280 int lzeros, tzeros, ones;
282 if (is_mask((value - 1) | value)) {
283 lzeros = __builtin_clz(value);
284 tzeros = __builtin_ctz(value);
285 ones = 32 - lzeros - tzeros;
286 *immr = (32 - tzeros) & 31;
291 if (is_mask((value - 1) | value)) {
292 lzeros = __builtin_clz(value);
293 tzeros = __builtin_ctz(value);
294 ones = 32 - lzeros - tzeros;
302 static void emit_mov(u_int rs, u_int rt)
304 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
305 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308 static void emit_mov64(u_int rs, u_int rt)
310 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
311 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
314 static void emit_add(u_int rs1, u_int rs2, u_int rt)
316 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
317 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
320 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
322 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
323 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
326 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
328 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
329 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
331 #define emit_adds_ptr emit_adds64
333 static void emit_neg(u_int rs, u_int rt)
335 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
339 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
345 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
347 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
348 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
351 static void emit_movz(u_int imm, u_int rt)
353 assem_debug("movz %s,#%#x\n", regname[rt], imm);
354 output_w32(0x52800000 | imm16_rd(imm, rt));
357 static void emit_movz_lsl16(u_int imm, u_int rt)
359 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
360 output_w32(0x52a00000 | imm16_rd(imm, rt));
363 static void emit_movn(u_int imm, u_int rt)
365 assem_debug("movn %s,#%#x\n", regname[rt], imm);
366 output_w32(0x12800000 | imm16_rd(imm, rt));
369 static void emit_movn_lsl16(u_int imm,u_int rt)
371 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x12a00000 | imm16_rd(imm, rt));
375 static void emit_movk(u_int imm,u_int rt)
377 assem_debug("movk %s,#%#x\n", regname[rt], imm);
378 output_w32(0x72800000 | imm16_rd(imm, rt));
381 static void emit_movk_lsl16(u_int imm,u_int rt)
384 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
385 output_w32(0x72a00000 | imm16_rd(imm, rt));
388 static void emit_zeroreg(u_int rt)
393 static void emit_movimm(u_int imm, u_int rt)
397 else if ((~imm) < 65536)
399 else if ((imm&0xffff) == 0)
400 emit_movz_lsl16(imm >> 16, rt);
401 else if (((~imm)&0xffff) == 0)
402 emit_movn_lsl16(~imm >> 16, rt);
403 else if (is_rotated_mask(imm)) {
405 gen_logical_imm(imm, &immr, &imms);
406 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
407 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
410 emit_movz(imm & 0xffff, rt);
411 emit_movk_lsl16(imm >> 16, rt);
415 static void emit_readword(void *addr, u_int rt)
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 3) && offset <= 16380) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
420 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
426 static void emit_readdword(void *addr, u_int rt)
428 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
429 if (!(offset & 7) && offset <= 32760) {
430 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
431 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
436 #define emit_readptr emit_readdword
438 static void emit_readshword(void *addr, u_int rt)
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
449 static void emit_loadreg(u_int r, u_int hr)
458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
462 case INVCP: addr = &invc_ptr; is64 = 1; break;
463 case ROREG: addr = &ram_offset; is64 = 1; break;
466 addr = &psxRegs.GPR.r[r];
470 emit_readdword(addr, hr);
472 emit_readword(addr, hr);
476 static void emit_writeword(u_int rt, void *addr)
478 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
479 if (!(offset & 3) && offset <= 16380) {
480 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
481 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
487 static void emit_writedword(u_int rt, void *addr)
489 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
490 if (!(offset & 7) && offset <= 32760) {
491 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
492 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
498 static void emit_storereg(u_int r, u_int hr)
501 void *addr = &psxRegs.GPR.r[r];
503 //case HIREG: addr = &hi; break;
504 //case LOREG: addr = &lo; break;
505 case CCREG: addr = &cycle_count; break;
506 default: assert(r < 34); break;
508 emit_writeword(hr, addr);
511 static void emit_test(u_int rs, u_int rt)
513 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
514 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
517 static void emit_testimm(u_int rs, u_int imm)
520 assem_debug("tst %s,#%#x\n", regname[rs], imm);
521 assert(is_rotated_mask(imm)); // good enough for PCSX
522 gen_logical_imm(imm, &immr, &imms);
523 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
526 static void emit_not(u_int rs,u_int rt)
528 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
529 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
532 static void emit_and(u_int rs1,u_int rs2,u_int rt)
534 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
535 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
538 static void emit_or(u_int rs1,u_int rs2,u_int rt)
540 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
541 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
544 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
546 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
547 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
550 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
552 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
553 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
556 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
558 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
559 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
562 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
564 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
565 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
568 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
570 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
571 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
574 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
576 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
577 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
580 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
582 unused const char *st = s ? "s" : "";
583 s = s ? 0x20000000 : 0;
584 is64 = is64 ? 0x80000000 : 0;
586 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
587 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
589 else if (-imm < 4096) {
590 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
591 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
593 else if (imm < 16777216) {
594 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
595 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
596 if ((imm & 0xfff) || s) {
597 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
598 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
601 else if (-imm < 16777216) {
602 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
603 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
604 if ((imm & 0xfff) || s) {
605 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
606 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
613 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
615 emit_addimm_s(0, 0, rs, imm, rt);
618 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
620 emit_addimm_s(0, 1, rs, imm, rt);
623 static void emit_addimm_and_set_flags(int imm, u_int rt)
625 emit_addimm_s(1, 0, rt, imm, rt);
628 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
630 const char *names[] = { "and", "orr", "eor", "ands" };
631 const char *name = names[op];
634 if (is_rotated_mask(imm)) {
635 gen_logical_imm(imm, &immr, &imms);
636 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
637 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
640 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
641 host_tempreg_acquire();
642 emit_movimm(imm, HOST_TEMPREG);
643 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
644 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
645 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
646 host_tempreg_release();
651 static void emit_andimm(u_int rs, u_int imm, u_int rt)
656 emit_logicop_imm(0, rs, imm, rt);
659 static void emit_orimm(u_int rs, u_int imm, u_int rt)
666 emit_logicop_imm(1, rs, imm, rt);
669 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
676 emit_logicop_imm(2, rs, imm, rt);
679 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
681 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
682 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
685 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
687 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
688 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
691 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
693 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
694 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
697 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
699 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
700 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
703 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
705 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
706 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
709 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
711 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
712 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
715 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
717 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
718 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
721 static void emit_signextend16(u_int rs, u_int rt)
723 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
724 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
727 static void emit_shl(u_int rs,u_int rshift,u_int rt)
729 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
730 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
733 static void emit_shr(u_int rs,u_int rshift,u_int rt)
735 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
736 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
739 static void emit_sar(u_int rs,u_int rshift,u_int rt)
741 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
742 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
745 static void emit_cmpimm(u_int rs, u_int imm)
748 assem_debug("cmp %s,%#x\n", regname[rs], imm);
749 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
751 else if (-imm < 4096) {
752 assem_debug("cmn %s,%#x\n", regname[rs], imm);
753 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
755 else if (imm < 16777216 && !(imm & 0xfff)) {
756 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
757 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
760 host_tempreg_acquire();
761 emit_movimm(imm, HOST_TEMPREG);
762 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
763 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
764 host_tempreg_release();
768 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
770 assert(imm == 0 || imm == 1);
771 assert(cond0 < 0x10);
772 assert(cond1 < 0x10);
774 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
775 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
777 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
778 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
782 static void emit_cmovne_imm(u_int imm,u_int rt)
784 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
787 static void emit_cmovl_imm(u_int imm,u_int rt)
789 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
792 static void emit_cmovb_imm(int imm,u_int rt)
794 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
797 static void emit_cmoveq_reg(u_int rs,u_int rt)
799 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
800 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
803 static void emit_cmovne_reg(u_int rs,u_int rt)
805 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
806 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
809 static void emit_cmovl_reg(u_int rs,u_int rt)
811 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
812 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
815 static void emit_cmovb_reg(u_int rs,u_int rt)
817 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
818 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
821 static void emit_cmovs_reg(u_int rs,u_int rt)
823 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
824 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
827 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
829 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
830 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
833 static void emit_slti32(u_int rs,int imm,u_int rt)
835 if(rs!=rt) emit_zeroreg(rt);
837 if(rs==rt) emit_movimm(0,rt);
838 emit_cmovl_imm(1,rt);
841 static void emit_sltiu32(u_int rs,int imm,u_int rt)
843 if(rs!=rt) emit_zeroreg(rt);
845 if(rs==rt) emit_movimm(0,rt);
846 emit_cmovb_imm(1,rt);
849 static void emit_cmp(u_int rs,u_int rt)
851 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
852 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
855 static void emit_set_gz32(u_int rs, u_int rt)
857 //assem_debug("set_gz32\n");
860 emit_cmovl_imm(0,rt);
863 static void emit_set_nz32(u_int rs, u_int rt)
865 //assem_debug("set_nz32\n");
866 if(rs!=rt) emit_mov(rs,rt);
868 emit_cmovne_imm(1,rt);
871 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
873 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
874 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
876 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
877 emit_cmovl_imm(1,rt);
880 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
882 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
883 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
885 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
886 emit_cmovb_imm(1,rt);
889 static int can_jump_or_call(const void *a)
891 intptr_t diff = (u_char *)a - out;
892 return (-134217728 <= diff && diff <= 134217727);
895 static void emit_call(const void *a)
897 intptr_t diff = (u_char *)a - out;
898 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
900 if (-134217728 <= diff && diff <= 134217727)
901 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
906 static void emit_jmp(const void *a)
908 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
909 u_int offset = genjmp(a);
910 output_w32(0x14000000 | offset);
913 static void emit_jne(const void *a)
915 assem_debug("bne %p\n", a);
916 u_int offset = genjmpcc(a);
917 output_w32(0x54000000 | (offset << 5) | COND_NE);
920 static void emit_jeq(const void *a)
922 assem_debug("beq %p\n", a);
923 u_int offset = genjmpcc(a);
924 output_w32(0x54000000 | (offset << 5) | COND_EQ);
927 static void emit_js(const void *a)
929 assem_debug("bmi %p\n", a);
930 u_int offset = genjmpcc(a);
931 output_w32(0x54000000 | (offset << 5) | COND_MI);
934 static void emit_jns(const void *a)
936 assem_debug("bpl %p\n", a);
937 u_int offset = genjmpcc(a);
938 output_w32(0x54000000 | (offset << 5) | COND_PL);
941 static void emit_jl(const void *a)
943 assem_debug("blt %p\n", a);
944 u_int offset = genjmpcc(a);
945 output_w32(0x54000000 | (offset << 5) | COND_LT);
948 static void emit_jge(const void *a)
950 assem_debug("bge %p\n", a);
951 u_int offset = genjmpcc(a);
952 output_w32(0x54000000 | (offset << 5) | COND_GE);
955 static void emit_jno(const void *a)
957 assem_debug("bvc %p\n", a);
958 u_int offset = genjmpcc(a);
959 output_w32(0x54000000 | (offset << 5) | COND_VC);
962 static void emit_jc(const void *a)
964 assem_debug("bcs %p\n", a);
965 u_int offset = genjmpcc(a);
966 output_w32(0x54000000 | (offset << 5) | COND_CS);
969 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
971 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
972 u_int offset = genjmpcc(a);
973 is64 = is64 ? 0x80000000 : 0;
974 isnz = isnz ? 0x01000000 : 0;
975 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
978 static void emit_cbz(const void *a, u_int r)
983 static void emit_jmpreg(u_int r)
985 assem_debug("br %s\n", regname64[r]);
986 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
989 static void emit_retreg(u_int r)
991 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
992 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
995 static void emit_ret(void)
1000 static void emit_adr(void *addr, u_int rt)
1002 intptr_t offset = (u_char *)addr - out;
1003 assert(-1048576 <= offset && offset < 1048576);
1005 assem_debug("adr x%d,#%#lx\n", rt, offset);
1006 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1009 static void emit_adrp(void *addr, u_int rt)
1011 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1012 assert(-4294967296l <= offset && offset < 4294967296l);
1015 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1016 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1019 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1021 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1022 assert(-256 <= offset && offset < 256);
1023 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1026 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1028 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1032 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1034 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1038 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1040 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1044 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1046 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1047 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1049 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1051 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1053 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1057 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1059 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1063 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1065 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1069 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1071 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1075 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1077 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1078 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1081 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1083 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1084 assert(-256 <= offset && offset < 256);
1085 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1088 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1090 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1091 assert(-256 <= offset && offset < 256);
1092 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1095 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1097 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1098 assert(-256 <= offset && offset < 256);
1099 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1102 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1104 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1105 assert(-256 <= offset && offset < 256);
1106 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1109 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1111 if (!(offset & 3) && (u_int)offset <= 16380) {
1112 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1113 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1115 else if (-256 <= offset && offset < 256) {
1116 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1117 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1123 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1125 if (!(offset & 1) && (u_int)offset <= 8190) {
1126 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1127 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1129 else if (-256 <= offset && offset < 256) {
1130 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1131 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1137 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1139 if ((u_int)offset < 4096) {
1140 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1141 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1143 else if (-256 <= offset && offset < 256) {
1144 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1145 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1151 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1153 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1157 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1159 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1160 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1163 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1165 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1166 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1169 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1171 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1175 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1177 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1181 static void emit_clz(u_int rs, u_int rt)
1183 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1184 output_w32(0x5ac01000 | rn_rd(rs, rt));
1187 // special case for checking invalid_code
1188 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1190 host_tempreg_acquire();
1191 emit_shrimm(r, 12, HOST_TEMPREG);
1192 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1193 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1194 emit_cmpimm(HOST_TEMPREG, imm);
1195 host_tempreg_release();
1198 // special for loadlr_assemble, rs2 is destroyed
1199 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1201 emit_shl(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
1205 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1207 emit_shr(rs2, shift, rs2);
1208 emit_bic(rs1, rs2, rt);
1211 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1213 output_w32(0x58000000 | imm19_rt(ofs, rt));
1216 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1218 u_int op = 0xb9000000;
1219 unused const char *ldst = is_st ? "st" : "ld";
1220 unused char rp = is64 ? 'x' : 'w';
1221 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1222 is64 = is64 ? 1 : 0;
1223 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1224 ofs = (ofs >> (2+is64));
1225 if (!is_st) op |= 0x00400000;
1226 if (is64) op |= 0x40000000;
1227 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1230 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1232 u_int op = 0x29000000;
1233 unused const char *ldst = is_st ? "st" : "ld";
1234 unused char rp = is64 ? 'x' : 'w';
1235 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1236 is64 = is64 ? 1 : 0;
1237 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1238 ofs = (ofs >> (2+is64));
1239 assert(-64 <= ofs && ofs <= 63);
1241 if (!is_st) op |= 0x00400000;
1242 if (is64) op |= 0x80000000;
1243 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1246 static void save_load_regs_all(int is_store, u_int reglist)
1250 for (r = 0; reglist; r++, reglist >>= 1) {
1254 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1260 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1263 assert(ofs <= SSP_CALLER_REGS);
1266 // Save registers before function call
1267 static void save_regs(u_int reglist)
1269 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1270 save_load_regs_all(1, reglist);
1273 // Restore registers after function call
1274 static void restore_regs(u_int reglist)
1276 reglist &= CALLER_SAVE_REGS;
1277 save_load_regs_all(0, reglist);
1280 /* Stubs/epilogue */
1282 static void literal_pool(int n)
1287 static void literal_pool_jumpover(int n)
1291 // parsed by get_pointer, find_extjump_insn
1292 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1294 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1296 emit_movz(target & 0xffff, 0);
1297 emit_movk_lsl16(target >> 16, 0);
1299 // addr is in the current recompiled block (max 256k)
1300 // offset shouldn't exceed +/-1MB
1302 emit_far_jump(linker);
1305 static void check_extjump2(void *src)
1308 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1312 // put rt_val into rt, potentially making use of rs with value rs_val
1313 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1315 int diff = rt_val - rs_val;
1316 if ((-4096 < diff && diff < 4096)
1317 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1318 emit_addimm(rs, diff, rt);
1319 else if (rt_val == ~rs_val)
1321 else if (is_rotated_mask(rs_val ^ rt_val))
1322 emit_xorimm(rs, rs_val ^ rt_val, rt);
1324 emit_movimm(rt_val, rt);
1327 // return 1 if the above function can do it's job cheaply
1328 static int is_similar_value(u_int v1, u_int v2)
1331 return (-4096 < diff && diff < 4096)
1332 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1334 || is_rotated_mask(v1 ^ v2);
1337 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1339 if (rt_val < 0x100000000ull) {
1340 emit_movimm_from(rs_val, rs, rt_val, rt);
1343 // just move the whole thing. At least on Linux all addresses
1344 // seem to be 48bit, so 3 insns - not great not terrible
1345 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1346 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1347 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1348 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1349 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1350 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1352 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1353 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1358 static void pass_args64(u_int a0, u_int a1)
1362 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1364 else if(a0!=0&&a1==0) {
1366 if (a0>=0) emit_mov64(a0,0);
1369 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1370 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1374 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1377 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1379 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1380 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1382 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1384 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1389 #include "pcsxmem.h"
1390 //#include "pcsxmem_inline.c"
1392 static void do_readstub(int n)
1394 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1395 set_jump_target(stubs[n].addr, out);
1396 enum stub_type type = stubs[n].type;
1398 int rs = stubs[n].b;
1399 const struct regstat *i_regs = (void *)stubs[n].c;
1400 u_int reglist = stubs[n].e;
1401 const signed char *i_regmap = i_regs->regmap;
1403 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1404 rt=get_reg(i_regmap,FTEMP);
1406 rt=get_reg(i_regmap,dops[i].rt1);
1409 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1410 void *restore_jump = NULL, *handler_jump = NULL;
1412 for (r = 0; r < HOST_CCREG; r++) {
1413 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1418 if(rt>=0&&dops[i].rt1!=0)
1425 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1427 emit_readdword(&mem_rtab,temp);
1428 emit_shrimm(rs,12,temp2);
1429 emit_readdword_dualindexedx8(temp,temp2,temp2);
1430 emit_adds64(temp2,temp2,temp2);
1433 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1435 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1436 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1437 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1438 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1439 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1445 emit_jmp(0); // jump to reg restore
1448 emit_jmp(stubs[n].retaddr); // return address
1449 set_jump_target(handler_jump, out);
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1461 pass_args64(rs,temp2);
1462 int cc=get_reg(i_regmap,CCREG);
1464 emit_loadreg(CCREG,2);
1465 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1466 emit_far_call(handler);
1467 // (no cycle reload after read)
1468 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1469 loadstore_extend(type,0,rt);
1472 set_jump_target(restore_jump, out);
1473 restore_regs(reglist);
1474 emit_jmp(stubs[n].retaddr);
1477 static void inline_readstub(enum stub_type type, int i, u_int addr,
1478 const signed char regmap[], int target, int adj, u_int reglist)
1480 int rs=get_reg(regmap,target);
1481 int rt=get_reg(regmap,target);
1482 if(rs<0) rs=get_reg(regmap,-1);
1485 uintptr_t host_addr = 0;
1487 int cc=get_reg(regmap,CCREG);
1488 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1490 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1491 if (handler == NULL) {
1492 if(rt<0||dops[i].rt1==0)
1494 if (addr != host_addr)
1495 emit_movimm_from64(addr, rs, host_addr, rs);
1497 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1498 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1499 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1500 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1501 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1506 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1508 if(type==LOADB_STUB||type==LOADBU_STUB)
1509 handler=jump_handler_read8;
1510 if(type==LOADH_STUB||type==LOADHU_STUB)
1511 handler=jump_handler_read16;
1512 if(type==LOADW_STUB)
1513 handler=jump_handler_read32;
1516 // call a memhandler
1517 if(rt>=0&&dops[i].rt1!=0)
1521 emit_movimm(addr,0);
1525 emit_loadreg(CCREG,2);
1526 emit_addimm(cc<0?2:cc,adj,2);
1528 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1529 emit_adrp((void *)l1, 1);
1530 emit_addimm64(1, l1 & 0xfff, 1);
1533 emit_far_call(do_memhandler_pre);
1535 emit_far_call(handler);
1537 // (no cycle reload after read)
1538 if(rt>=0&&dops[i].rt1!=0)
1539 loadstore_extend(type, 0, rt);
1540 restore_regs(reglist);
1543 static void do_writestub(int n)
1545 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1546 set_jump_target(stubs[n].addr, out);
1547 enum stub_type type=stubs[n].type;
1550 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1551 u_int reglist=stubs[n].e;
1552 signed char *i_regmap=i_regs->regmap;
1554 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1555 rt=get_reg(i_regmap,r=FTEMP);
1557 rt=get_reg(i_regmap,r=dops[i].rs2);
1561 int rtmp,temp=-1,temp2,regs_saved=0;
1562 void *restore_jump = NULL, *handler_jump = NULL;
1563 int reglist2=reglist|(1<<rs)|(1<<rt);
1564 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1565 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1573 for(rtmp=0;rtmp<=3;rtmp++)
1574 if(rtmp!=rs&&rtmp!=rt)
1577 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1580 host_tempreg_acquire();
1583 emit_readdword(&mem_wtab,temp);
1584 emit_shrimm(rs,12,temp2);
1585 emit_readdword_dualindexedx8(temp,temp2,temp2);
1586 emit_adds64(temp2,temp2,temp2);
1590 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1591 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1592 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1597 emit_jmp(0); // jump to reg restore
1600 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1601 set_jump_target(handler_jump, out);
1607 case STOREB_STUB: handler=jump_handler_write8; break;
1608 case STOREH_STUB: handler=jump_handler_write16; break;
1609 case STOREW_STUB: handler=jump_handler_write32; break;
1615 emit_mov64(temp2,3);
1616 host_tempreg_release();
1618 int cc=get_reg(i_regmap,CCREG);
1620 emit_loadreg(CCREG,2);
1621 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1622 // returns new cycle_count
1623 emit_far_call(handler);
1624 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1626 emit_storereg(CCREG,2);
1628 set_jump_target(restore_jump, out);
1629 restore_regs(reglist);
1630 emit_jmp(stubs[n].retaddr);
1633 static void inline_writestub(enum stub_type type, int i, u_int addr,
1634 const signed char regmap[], int target, int adj, u_int reglist)
1636 int rs = get_reg(regmap,-1);
1637 int rt = get_reg(regmap,target);
1640 uintptr_t host_addr = 0;
1641 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1642 if (handler == NULL) {
1643 if (addr != host_addr)
1644 emit_movimm_from64(addr, rs, host_addr, rs);
1646 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1647 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1648 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1654 // call a memhandler
1656 emit_writeword(rs, &address); // some handlers still need it
1657 loadstore_extend(type, rt, 0);
1659 cc = cc_use = get_reg(regmap, CCREG);
1661 emit_loadreg(CCREG, (cc_use = 2));
1662 emit_addimm(cc_use, adj, 2);
1664 emit_far_call(do_memhandler_pre);
1665 emit_far_call(handler);
1666 emit_far_call(do_memhandler_post);
1667 emit_addimm(0, -adj, cc_use);
1669 emit_storereg(CCREG, cc_use);
1670 restore_regs(reglist);
1673 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1675 int ret = memcmp(source, copy, size);
1676 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1680 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1681 static void do_dirty_stub_base(u_int vaddr, u_int source_len)
1683 assert(source_len <= MAXBLOCK*4);
1684 emit_loadlp_ofs(0, 0); // ldr x1, source
1685 emit_loadlp_ofs(0, 1); // ldr x2, copy
1686 emit_movz(source_len, 2);
1687 emit_far_call(verify_code_arm64);
1690 emit_movz(vaddr & 0xffff, 0);
1691 emit_movk_lsl16(vaddr >> 16, 0);
1692 emit_far_call(get_addr);
1694 set_jump_target(jmp, out);
1697 static void assert_dirty_stub(const u_int *ptr)
1699 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1700 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1701 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
1702 assert( ptr[8] == 0xd61f0000); // br x0
1705 static void set_loadlp(u_int *loadl, void *lit)
1707 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1708 assert((*loadl & ~0x1f) == 0x58000000);
1709 assert((ofs & 3) == 0);
1710 assert(ofs < 0x100000);
1711 *loadl |= (ofs >> 2) << 5;
1714 static void do_dirty_stub_emit_literals(u_int *loadlps)
1716 set_loadlp(&loadlps[0], out);
1717 output_w64((uintptr_t)source);
1718 set_loadlp(&loadlps[1], out);
1719 output_w64((uintptr_t)copy);
1722 static void *do_dirty_stub(int i, u_int source_len)
1724 assem_debug("do_dirty_stub %x\n",start+i*4);
1725 u_int *loadlps = (void *)out;
1726 do_dirty_stub_base(start + i*4, source_len);
1730 entry = instr_addr[i];
1731 emit_jmp(instr_addr[i]);
1732 do_dirty_stub_emit_literals(loadlps);
1736 static void do_dirty_stub_ds(u_int source_len)
1738 u_int *loadlps = (void *)out;
1739 do_dirty_stub_base(start + 1, source_len);
1740 void *lit_jumpover = out;
1741 emit_jmp(out + 8*2);
1742 do_dirty_stub_emit_literals(loadlps);
1743 set_jump_target(lit_jumpover, out);
1746 static uint64_t get_from_ldr_literal(const u_int *i)
1749 assert((i[0] & 0xff000000) == 0x58000000);
1752 return *(uint64_t *)(i + ofs);
1755 static uint64_t get_from_movz(const u_int *i)
1757 assert((i[0] & 0x7fe00000) == 0x52800000);
1758 return (i[0] >> 5) & 0xffff;
1761 // Find the "clean" entry point from a "dirty" entry point
1762 // by skipping past the call to verify_code
1763 static void *get_clean_addr(u_int *addr)
1765 assert_dirty_stub(addr);
1769 static int verify_dirty(const u_int *ptr)
1771 const void *source, *copy;
1773 assert_dirty_stub(ptr);
1774 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1775 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1776 len = get_from_movz(&ptr[2]); // movz w3, #source_len
1777 return !memcmp(source, copy, len);
1780 static int isclean(void *addr)
1782 const u_int *ptr = addr;
1783 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1784 assert_dirty_stub(ptr);
1790 // get source that block at addr was compiled from (host pointers)
1791 static void get_bounds(void *addr, u_char **start, u_char **end)
1793 const u_int *ptr = addr;
1794 assert_dirty_stub(ptr);
1795 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1796 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
1801 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1803 save_load_regs_all(1, reglist);
1804 cop2_do_stall_check(op, i, i_regs, 0);
1807 emit_far_call(pcnt_gte_start);
1809 // pointer to cop2 regs
1810 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1813 static void c2op_epilogue(u_int op,u_int reglist)
1817 emit_far_call(pcnt_gte_end);
1819 save_load_regs_all(0, reglist);
1822 static void c2op_assemble(int i, const struct regstat *i_regs)
1824 u_int c2op=source[i]&0x3f;
1825 u_int hr,reglist_full=0,reglist;
1826 int need_flags,need_ir;
1827 for(hr=0;hr<HOST_REGS;hr++) {
1828 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1830 reglist=reglist_full&CALLER_SAVE_REGS;
1832 if (gte_handlers[c2op]!=NULL) {
1833 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1834 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1835 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1836 source[i],gte_unneeded[i+1],need_flags,need_ir);
1837 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1839 //int shift = (source[i] >> 19) & 1;
1840 //int lm = (source[i] >> 10) & 1;
1844 c2op_prologue(c2op, i, i_regs, reglist);
1845 emit_movimm(source[i],1); // opcode
1846 emit_writeword(1,&psxRegs.code);
1847 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1850 c2op_epilogue(c2op,reglist);
1854 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1856 //value = value & 0x7ffff000;
1857 //if (value & 0x7f87e000) value |= 0x80000000;
1858 emit_andimm(sl, 0x7fffe000, temp);
1859 emit_testimm(temp, 0xff87ffff);
1860 emit_andimm(sl, 0x7ffff000, temp);
1861 host_tempreg_acquire();
1862 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1863 emit_cmovne_reg(HOST_TEMPREG, temp);
1864 host_tempreg_release();
1865 assert(0); // testing needed
1868 static void do_mfc2_31_one(u_int copr,signed char temp)
1870 emit_readshword(®_cop2d[copr],temp);
1871 emit_bicsar_imm(temp,31,temp);
1872 emit_cmpimm(temp,0xf80);
1873 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1874 emit_andimm(temp,0xf80,temp);
1877 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1880 host_tempreg_acquire();
1881 temp = HOST_TEMPREG;
1883 do_mfc2_31_one(9,temp);
1884 emit_shrimm(temp,7,tl);
1885 do_mfc2_31_one(10,temp);
1886 emit_orrshr_imm(temp,2,tl);
1887 do_mfc2_31_one(11,temp);
1888 emit_orrshl_imm(temp,3,tl);
1889 emit_writeword(tl,®_cop2d[29]);
1891 if (temp == HOST_TEMPREG)
1892 host_tempreg_release();
1895 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1901 if(dops[i].rs1&&dops[i].rs2)
1903 switch(dops[i].opcode2)
1908 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1909 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1910 signed char hi=get_reg(i_regs->regmap,HIREG);
1911 signed char lo=get_reg(i_regs->regmap,LOREG);
1917 if(dops[i].opcode2==0x18) // MULT
1918 emit_smull(m1,m2,hi);
1920 emit_umull(m1,m2,hi);
1923 emit_shrimm64(hi,32,hi);
1929 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1930 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1931 signed char quotient=get_reg(i_regs->regmap,LOREG);
1932 signed char remainder=get_reg(i_regs->regmap,HIREG);
1933 assert(numerator>=0);
1934 assert(denominator>=0);
1935 assert(quotient>=0);
1936 assert(remainder>=0);
1938 if (dops[i].opcode2 == 0x1A) // DIV
1939 emit_sdiv(numerator,denominator,quotient);
1941 emit_udiv(numerator,denominator,quotient);
1942 emit_msub(quotient,denominator,numerator,remainder);
1944 // div 0 quotient (remainder is already correct)
1945 host_tempreg_acquire();
1946 if (dops[i].opcode2 == 0x1A) // DIV
1947 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1949 emit_movimm(~0,HOST_TEMPREG);
1950 emit_test(denominator,denominator);
1951 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1952 host_tempreg_release();
1961 signed char hr=get_reg(i_regs->regmap,HIREG);
1962 signed char lr=get_reg(i_regs->regmap,LOREG);
1963 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1966 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1967 assert(numerator >= 0);
1969 emit_mov(numerator,hr);
1971 if (dops[i].opcode2 == 0x1A) // DIV
1972 emit_sub_asrimm(0,numerator,31,lr);
1978 if (hr >= 0) emit_zeroreg(hr);
1979 if (lr >= 0) emit_movimm(~0,lr);
1984 // Multiply by zero is zero.
1985 if (hr >= 0) emit_zeroreg(hr);
1986 if (lr >= 0) emit_zeroreg(lr);
1990 #define multdiv_assemble multdiv_assemble_arm64
1992 static void do_jump_vaddr(u_int rs)
1996 emit_far_call(get_addr_ht);
2000 static void do_preload_rhash(u_int r) {
2001 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2002 // register. On ARM the hash can be done with a single instruction (below)
2005 static void do_preload_rhtbl(u_int ht) {
2006 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
2009 static void do_rhash(u_int rs,u_int rh) {
2010 emit_andimm(rs, 0xf8, rh);
2013 static void do_miniht_load(int ht, u_int rh) {
2014 emit_add64(ht, rh, ht);
2015 emit_ldst(0, 0, rh, ht, 0);
2018 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2024 set_jump_target(jaddr, out);
2025 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2026 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2030 // parsed by set_jump_target?
2031 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2032 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2033 emit_movk(return_address&0xffff,rt);
2034 add_to_linker(out,return_address,1);
2036 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2037 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2040 static void clear_cache_arm64(char *start, char *end)
2042 // Don't rely on GCC's __clear_cache implementation, as it caches
2043 // icache/dcache cache line sizes, that can vary between cores on
2044 // big.LITTLE architectures.
2045 uint64_t addr, ctr_el0;
2046 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2047 size_t isize, dsize;
2049 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2050 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2051 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2053 // use the global minimum cache line size
2054 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2055 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2057 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2058 not required for instruction to data coherence. */
2059 if ((ctr_el0 & (1 << 28)) == 0x0) {
2060 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2061 for (; addr < (uint64_t)end; addr += dsize)
2062 // use "civac" instead of "cvau", as this is the suggested workaround for
2063 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2064 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2066 __asm__ volatile("dsb ish" : : : "memory");
2068 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2069 Unification is not required for instruction to data coherence. */
2070 if ((ctr_el0 & (1 << 29)) == 0x0) {
2071 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2072 for (; addr < (uint64_t)end; addr += isize)
2073 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2075 __asm__ volatile("dsb ish" : : : "memory");
2078 __asm__ volatile("isb" : : : "memory");
2081 // CPU-architecture-specific initialization
2082 static void arch_init(void)
2084 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2085 struct tramp_insns *ops = ndrc->tramp.ops;
2087 assert(!(diff & 3));
2088 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2089 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2090 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2091 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2093 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2096 // vim:shiftwidth=2:expandtab