1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define unused __attribute__((unused))
28 void do_memhandler_pre();
29 void do_memhandler_post();
32 static void set_jump_target(void *addr, void *target)
35 intptr_t offset = (u_char *)target - (u_char *)addr;
37 if ((*ptr&0xFC000000) == 0x14000000) { // b
38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
45 // should only happen when jumping to an already compiled block (see add_jump_out)
46 // a workaround would be to do a trampoline jump via a stub at the end of the block
47 assert(-1048576 <= offset && offset < 1048576);
48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
50 else if((*ptr&0x9f000000)==0x10000000) { // adr
51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 abort(); // should not happen
59 // from a pointer to external jump stub (which was produced by emit_extjump2)
60 // find where the jumping insn is
61 static void *find_extjump_insn(void *stub)
63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
69 // find where external branch is liked to using addr of it's stub:
70 // get address that the stub loads (dyna_linker arg1),
71 // treat it as a pointer to branch insn,
72 // return addr where that branch jumps to
73 static void *get_pointer(void *stub)
75 int *i_ptr = find_extjump_insn(stub);
76 if ((*i_ptr&0xfc000000) == 0x14000000) // b
77 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
78 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
79 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
80 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
85 // Allocate a specific ARM register.
86 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
106 // Alloc cycle count into dedicated register
107 static void alloc_cc(struct regstat *cur,int i)
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
117 static unused const char *regname[32] = {
118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124 static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136 static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141 static void output_w32(u_int word)
143 *((u_int *)out) = word;
147 static void output_w64(uint64_t dword)
149 *((uint64_t *)out) = dword;
154 static u_int rm_rd(u_int rm, u_int rd)
158 return (rm << 16) | rd;
162 static u_int rn_rd(u_int rn, u_int rd)
166 return (rn << 5) | rd;
169 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
174 return (rm << 16) | (rn << 5) | rd;
177 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
189 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198 static u_int imm16_rd(u_int imm16, u_int rd)
200 assert(imm16 < 0x10000);
202 return (imm16 << 5) | rd;
205 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
207 assert(imm12 < 0x1000);
210 return (imm12 << 10) | (rn << 5) | rd;
213 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
215 assert(imm9 < 0x200);
218 return (imm9 << 12) | (rn << 5) | rd;
221 static u_int imm19_rt(u_int imm19, u_int rt)
223 assert(imm19 < 0x80000);
225 return (imm19 << 5) | rt;
228 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
235 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238 static u_int genjmp(const u_char *addr)
240 intptr_t offset = addr - out;
241 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
242 if (offset < -134217728 || offset > 134217727) {
243 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
247 return ((u_int)offset >> 2) & 0x03ffffff;
250 static u_int genjmpcc(const u_char *addr)
252 intptr_t offset = addr - out;
253 if ((uintptr_t)addr < 3) return 0;
254 if (offset < -1048576 || offset > 1048572) {
255 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
259 return ((u_int)offset >> 2) & 0x7ffff;
262 static uint32_t is_mask(u_int value)
264 return value && ((value + 1) & value) == 0;
267 // This function returns true if the argument contains a
268 // non-empty sequence of ones (possibly rotated) with the remainder zero.
269 static uint32_t is_rotated_mask(u_int value)
271 if (value == 0 || value == ~0)
273 if (is_mask((value - 1) | value))
275 return is_mask((~value - 1) | ~value);
278 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
280 int lzeros, tzeros, ones;
282 if (is_mask((value - 1) | value)) {
283 lzeros = __builtin_clz(value);
284 tzeros = __builtin_ctz(value);
285 ones = 32 - lzeros - tzeros;
286 *immr = (32 - tzeros) & 31;
291 if (is_mask((value - 1) | value)) {
292 lzeros = __builtin_clz(value);
293 tzeros = __builtin_ctz(value);
294 ones = 32 - lzeros - tzeros;
302 static void emit_mov(u_int rs, u_int rt)
304 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
305 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308 static void emit_mov64(u_int rs, u_int rt)
310 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
311 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
314 static void emit_add(u_int rs1, u_int rs2, u_int rt)
316 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
317 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
320 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
322 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
323 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
326 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
328 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
329 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
331 #define emit_adds_ptr emit_adds64
333 static void emit_neg(u_int rs, u_int rt)
335 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
339 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
345 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
347 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
348 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
351 static void emit_movz(u_int imm, u_int rt)
353 assem_debug("movz %s,#%#x\n", regname[rt], imm);
354 output_w32(0x52800000 | imm16_rd(imm, rt));
357 static void emit_movz_lsl16(u_int imm, u_int rt)
359 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
360 output_w32(0x52a00000 | imm16_rd(imm, rt));
363 static void emit_movn(u_int imm, u_int rt)
365 assem_debug("movn %s,#%#x\n", regname[rt], imm);
366 output_w32(0x12800000 | imm16_rd(imm, rt));
369 static void emit_movn_lsl16(u_int imm,u_int rt)
371 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x12a00000 | imm16_rd(imm, rt));
375 static void emit_movk(u_int imm,u_int rt)
377 assem_debug("movk %s,#%#x\n", regname[rt], imm);
378 output_w32(0x72800000 | imm16_rd(imm, rt));
381 static void emit_movk_lsl16(u_int imm,u_int rt)
384 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
385 output_w32(0x72a00000 | imm16_rd(imm, rt));
388 static void emit_zeroreg(u_int rt)
393 static void emit_movimm(u_int imm, u_int rt)
397 else if ((~imm) < 65536)
399 else if ((imm&0xffff) == 0)
400 emit_movz_lsl16(imm >> 16, rt);
401 else if (((~imm)&0xffff) == 0)
402 emit_movn_lsl16(~imm >> 16, rt);
403 else if (is_rotated_mask(imm)) {
405 gen_logical_imm(imm, &immr, &imms);
406 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
407 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
410 emit_movz(imm & 0xffff, rt);
411 emit_movk_lsl16(imm >> 16, rt);
415 static void emit_readword(void *addr, u_int rt)
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 3) && offset <= 16380) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
420 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
426 static void emit_readdword(void *addr, u_int rt)
428 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
429 if (!(offset & 7) && offset <= 32760) {
430 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
431 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
436 #define emit_readptr emit_readdword
438 static void emit_readshword(void *addr, u_int rt)
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
449 static void emit_loadreg(u_int r, u_int hr)
457 //case HIREG: addr = &hi; break;
458 //case LOREG: addr = &lo; break;
459 case CCREG: addr = &cycle_count; break;
460 case CSREG: addr = &Status; break;
461 case INVCP: addr = &invc_ptr; is64 = 1; break;
462 case ROREG: addr = &ram_offset; is64 = 1; break;
465 addr = &psxRegs.GPR.r[r];
469 emit_readdword(addr, hr);
471 emit_readword(addr, hr);
475 static void emit_writeword(u_int rt, void *addr)
477 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
478 if (!(offset & 3) && offset <= 16380) {
479 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
480 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
486 static void emit_writedword(u_int rt, void *addr)
488 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
489 if (!(offset & 7) && offset <= 32760) {
490 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
491 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
497 static void emit_storereg(u_int r, u_int hr)
500 void *addr = &psxRegs.GPR.r[r];
502 //case HIREG: addr = &hi; break;
503 //case LOREG: addr = &lo; break;
504 case CCREG: addr = &cycle_count; break;
505 default: assert(r < 34); break;
507 emit_writeword(hr, addr);
510 static void emit_test(u_int rs, u_int rt)
512 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
513 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
516 static void emit_testimm(u_int rs, u_int imm)
519 assem_debug("tst %s,#%#x\n", regname[rs], imm);
520 assert(is_rotated_mask(imm)); // good enough for PCSX
521 gen_logical_imm(imm, &immr, &imms);
522 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
525 static void emit_not(u_int rs,u_int rt)
527 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
528 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
531 static void emit_and(u_int rs1,u_int rs2,u_int rt)
533 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
534 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
537 static void emit_or(u_int rs1,u_int rs2,u_int rt)
539 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
540 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
543 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
545 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
546 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
549 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
551 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
552 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
555 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
557 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
558 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
561 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
563 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
564 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
567 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
569 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
570 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
573 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
575 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
576 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
579 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
581 unused const char *st = s ? "s" : "";
582 s = s ? 0x20000000 : 0;
583 is64 = is64 ? 0x80000000 : 0;
585 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
586 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
588 else if (-imm < 4096) {
589 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
590 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
592 else if (imm < 16777216) {
593 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
594 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
595 if ((imm & 0xfff) || s) {
596 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
597 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
600 else if (-imm < 16777216) {
601 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
602 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
603 if ((imm & 0xfff) || s) {
604 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
605 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
612 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
614 emit_addimm_s(0, 0, rs, imm, rt);
617 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
619 emit_addimm_s(0, 1, rs, imm, rt);
622 static void emit_addimm_and_set_flags(int imm, u_int rt)
624 emit_addimm_s(1, 0, rt, imm, rt);
627 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
629 const char *names[] = { "and", "orr", "eor", "ands" };
630 const char *name = names[op];
633 if (is_rotated_mask(imm)) {
634 gen_logical_imm(imm, &immr, &imms);
635 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
636 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
639 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
640 host_tempreg_acquire();
641 emit_movimm(imm, HOST_TEMPREG);
642 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
643 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
644 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
645 host_tempreg_release();
650 static void emit_andimm(u_int rs, u_int imm, u_int rt)
655 emit_logicop_imm(0, rs, imm, rt);
658 static void emit_orimm(u_int rs, u_int imm, u_int rt)
665 emit_logicop_imm(1, rs, imm, rt);
668 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
675 emit_logicop_imm(2, rs, imm, rt);
678 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
680 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
681 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
684 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
686 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
687 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
690 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
692 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
693 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
696 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
698 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
699 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
702 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
704 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
705 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
708 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
710 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
711 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
714 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
716 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
717 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
720 static void emit_signextend16(u_int rs, u_int rt)
722 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
723 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
726 static void emit_shl(u_int rs,u_int rshift,u_int rt)
728 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
729 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
732 static void emit_shr(u_int rs,u_int rshift,u_int rt)
734 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
735 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
738 static void emit_sar(u_int rs,u_int rshift,u_int rt)
740 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
741 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
744 static void emit_cmpimm(u_int rs, u_int imm)
747 assem_debug("cmp %s,%#x\n", regname[rs], imm);
748 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
750 else if (-imm < 4096) {
751 assem_debug("cmn %s,%#x\n", regname[rs], imm);
752 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
754 else if (imm < 16777216 && !(imm & 0xfff)) {
755 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
756 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
759 host_tempreg_acquire();
760 emit_movimm(imm, HOST_TEMPREG);
761 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
762 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
763 host_tempreg_release();
767 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
769 assert(imm == 0 || imm == 1);
770 assert(cond0 < 0x10);
771 assert(cond1 < 0x10);
773 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
774 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
776 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
777 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
781 static void emit_cmovne_imm(u_int imm,u_int rt)
783 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
786 static void emit_cmovl_imm(u_int imm,u_int rt)
788 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
791 static void emit_cmovb_imm(int imm,u_int rt)
793 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
796 static void emit_cmoveq_reg(u_int rs,u_int rt)
798 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
799 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
802 static void emit_cmovne_reg(u_int rs,u_int rt)
804 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
805 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
808 static void emit_cmovl_reg(u_int rs,u_int rt)
810 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
811 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
814 static void emit_cmovb_reg(u_int rs,u_int rt)
816 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
817 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
820 static void emit_cmovs_reg(u_int rs,u_int rt)
822 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
823 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
826 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
828 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
829 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
832 static void emit_slti32(u_int rs,int imm,u_int rt)
834 if(rs!=rt) emit_zeroreg(rt);
836 if(rs==rt) emit_movimm(0,rt);
837 emit_cmovl_imm(1,rt);
840 static void emit_sltiu32(u_int rs,int imm,u_int rt)
842 if(rs!=rt) emit_zeroreg(rt);
844 if(rs==rt) emit_movimm(0,rt);
845 emit_cmovb_imm(1,rt);
848 static void emit_cmp(u_int rs,u_int rt)
850 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
851 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
854 static void emit_set_gz32(u_int rs, u_int rt)
856 //assem_debug("set_gz32\n");
859 emit_cmovl_imm(0,rt);
862 static void emit_set_nz32(u_int rs, u_int rt)
864 //assem_debug("set_nz32\n");
865 if(rs!=rt) emit_mov(rs,rt);
867 emit_cmovne_imm(1,rt);
870 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
872 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
873 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
875 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
876 emit_cmovl_imm(1,rt);
879 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
881 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
882 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
884 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
885 emit_cmovb_imm(1,rt);
888 static int can_jump_or_call(const void *a)
890 intptr_t diff = (u_char *)a - out;
891 return (-134217728 <= diff && diff <= 134217727);
894 static void emit_call(const void *a)
896 intptr_t diff = (u_char *)a - out;
897 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
899 if (-134217728 <= diff && diff <= 134217727)
900 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
905 static void emit_jmp(const void *a)
907 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
908 u_int offset = genjmp(a);
909 output_w32(0x14000000 | offset);
912 static void emit_jne(const void *a)
914 assem_debug("bne %p\n", a);
915 u_int offset = genjmpcc(a);
916 output_w32(0x54000000 | (offset << 5) | COND_NE);
919 static void emit_jeq(const void *a)
921 assem_debug("beq %p\n", a);
922 u_int offset = genjmpcc(a);
923 output_w32(0x54000000 | (offset << 5) | COND_EQ);
926 static void emit_js(const void *a)
928 assem_debug("bmi %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_MI);
933 static void emit_jns(const void *a)
935 assem_debug("bpl %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_PL);
940 static void emit_jl(const void *a)
942 assem_debug("blt %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_LT);
947 static void emit_jge(const void *a)
949 assem_debug("bge %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_GE);
954 static void emit_jno(const void *a)
956 assem_debug("bvc %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_VC);
961 static void emit_jc(const void *a)
963 assem_debug("bcs %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_CS);
968 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
970 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
971 u_int offset = genjmpcc(a);
972 is64 = is64 ? 0x80000000 : 0;
973 isnz = isnz ? 0x01000000 : 0;
974 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
977 static void emit_cbz(const void *a, u_int r)
982 static void emit_jmpreg(u_int r)
984 assem_debug("br %s\n", regname64[r]);
985 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
988 static void emit_retreg(u_int r)
990 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
991 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
994 static void emit_ret(void)
999 static void emit_adr(void *addr, u_int rt)
1001 intptr_t offset = (u_char *)addr - out;
1002 assert(-1048576 <= offset && offset < 1048576);
1004 assem_debug("adr x%d,#%#lx\n", rt, offset);
1005 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1008 static void emit_adrp(void *addr, u_int rt)
1010 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1011 assert(-4294967296l <= offset && offset < 4294967296l);
1014 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1015 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1018 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1020 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1021 assert(-256 <= offset && offset < 256);
1022 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1025 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1027 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1028 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1031 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1033 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1034 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1037 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1039 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1040 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1043 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1045 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1048 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1050 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1053 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1056 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1059 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1062 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1065 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1068 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1071 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1074 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1076 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1077 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1080 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1082 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1083 assert(-256 <= offset && offset < 256);
1084 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1087 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1089 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1090 assert(-256 <= offset && offset < 256);
1091 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1094 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1096 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1097 assert(-256 <= offset && offset < 256);
1098 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1101 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1103 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1104 assert(-256 <= offset && offset < 256);
1105 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1108 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1110 if (!(offset & 3) && (u_int)offset <= 16380) {
1111 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1112 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1114 else if (-256 <= offset && offset < 256) {
1115 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1116 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1122 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1124 if (!(offset & 1) && (u_int)offset <= 8190) {
1125 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1126 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1128 else if (-256 <= offset && offset < 256) {
1129 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1130 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1136 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1138 if ((u_int)offset < 4096) {
1139 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1140 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1142 else if (-256 <= offset && offset < 256) {
1143 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1150 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1152 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1153 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1156 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1158 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1159 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1162 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1164 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1165 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1168 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1170 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1174 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1176 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1180 static void emit_clz(u_int rs, u_int rt)
1182 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1183 output_w32(0x5ac01000 | rn_rd(rs, rt));
1186 // special case for checking invalid_code
1187 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1189 host_tempreg_acquire();
1190 emit_shrimm(r, 12, HOST_TEMPREG);
1191 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1192 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1193 emit_cmpimm(HOST_TEMPREG, imm);
1194 host_tempreg_release();
1197 // special for loadlr_assemble, rs2 is destroyed
1198 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1200 emit_shl(rs2, shift, rs2);
1201 emit_bic(rs1, rs2, rt);
1204 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1206 emit_shr(rs2, shift, rs2);
1207 emit_bic(rs1, rs2, rt);
1210 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1212 output_w32(0x58000000 | imm19_rt(ofs, rt));
1215 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1217 u_int op = 0xb9000000;
1218 unused const char *ldst = is_st ? "st" : "ld";
1219 unused char rp = is64 ? 'x' : 'w';
1220 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1221 is64 = is64 ? 1 : 0;
1222 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1223 ofs = (ofs >> (2+is64));
1224 if (!is_st) op |= 0x00400000;
1225 if (is64) op |= 0x40000000;
1226 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1229 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1231 u_int op = 0x29000000;
1232 unused const char *ldst = is_st ? "st" : "ld";
1233 unused char rp = is64 ? 'x' : 'w';
1234 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1235 is64 = is64 ? 1 : 0;
1236 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1237 ofs = (ofs >> (2+is64));
1238 assert(-64 <= ofs && ofs <= 63);
1240 if (!is_st) op |= 0x00400000;
1241 if (is64) op |= 0x80000000;
1242 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1245 static void save_load_regs_all(int is_store, u_int reglist)
1249 for (r = 0; reglist; r++, reglist >>= 1) {
1253 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1259 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1262 assert(ofs <= SSP_CALLER_REGS);
1265 // Save registers before function call
1266 static void save_regs(u_int reglist)
1268 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1269 save_load_regs_all(1, reglist);
1272 // Restore registers after function call
1273 static void restore_regs(u_int reglist)
1275 reglist &= CALLER_SAVE_REGS;
1276 save_load_regs_all(0, reglist);
1279 /* Stubs/epilogue */
1281 static void literal_pool(int n)
1286 static void literal_pool_jumpover(int n)
1290 // parsed by get_pointer, find_extjump_insn
1291 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1293 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1295 emit_movz(target & 0xffff, 0);
1296 emit_movk_lsl16(target >> 16, 0);
1298 // addr is in the current recompiled block (max 256k)
1299 // offset shouldn't exceed +/-1MB
1301 emit_far_jump(linker);
1304 static void check_extjump2(void *src)
1307 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1311 // put rt_val into rt, potentially making use of rs with value rs_val
1312 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1314 int diff = rt_val - rs_val;
1315 if ((-4096 < diff && diff < 4096)
1316 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1317 emit_addimm(rs, diff, rt);
1318 else if (rt_val == ~rs_val)
1320 else if (is_rotated_mask(rs_val ^ rt_val))
1321 emit_xorimm(rs, rs_val ^ rt_val, rt);
1323 emit_movimm(rt_val, rt);
1326 // return 1 if the above function can do it's job cheaply
1327 static int is_similar_value(u_int v1, u_int v2)
1330 return (-4096 < diff && diff < 4096)
1331 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1333 || is_rotated_mask(v1 ^ v2);
1336 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1338 if (rt_val < 0x100000000ull) {
1339 emit_movimm_from(rs_val, rs, rt_val, rt);
1342 // just move the whole thing. At least on Linux all addresses
1343 // seem to be 48bit, so 3 insns - not great not terrible
1344 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1345 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1346 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1347 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1348 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1349 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1351 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1352 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1357 static void pass_args64(u_int a0, u_int a1)
1361 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1363 else if(a0!=0&&a1==0) {
1365 if (a0>=0) emit_mov64(a0,0);
1368 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1369 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1373 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1376 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1378 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1379 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1381 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1383 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1388 #include "pcsxmem.h"
1389 //#include "pcsxmem_inline.c"
1391 static void do_readstub(int n)
1393 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1394 set_jump_target(stubs[n].addr, out);
1395 enum stub_type type = stubs[n].type;
1397 int rs = stubs[n].b;
1398 const struct regstat *i_regs = (void *)stubs[n].c;
1399 u_int reglist = stubs[n].e;
1400 const signed char *i_regmap = i_regs->regmap;
1402 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1403 rt=get_reg(i_regmap,FTEMP);
1405 rt=get_reg(i_regmap,dops[i].rt1);
1408 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1409 void *restore_jump = NULL, *handler_jump = NULL;
1411 for (r = 0; r < HOST_CCREG; r++) {
1412 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1417 if(rt>=0&&dops[i].rt1!=0)
1424 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1426 emit_readdword(&mem_rtab,temp);
1427 emit_shrimm(rs,12,temp2);
1428 emit_readdword_dualindexedx8(temp,temp2,temp2);
1429 emit_adds64(temp2,temp2,temp2);
1432 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1434 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1435 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1436 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1437 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1438 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1444 emit_jmp(0); // jump to reg restore
1447 emit_jmp(stubs[n].retaddr); // return address
1448 set_jump_target(handler_jump, out);
1453 if(type==LOADB_STUB||type==LOADBU_STUB)
1454 handler=jump_handler_read8;
1455 if(type==LOADH_STUB||type==LOADHU_STUB)
1456 handler=jump_handler_read16;
1457 if(type==LOADW_STUB)
1458 handler=jump_handler_read32;
1460 pass_args64(rs,temp2);
1461 int cc=get_reg(i_regmap,CCREG);
1463 emit_loadreg(CCREG,2);
1464 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1465 emit_far_call(handler);
1466 // (no cycle reload after read)
1467 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1468 loadstore_extend(type,0,rt);
1471 set_jump_target(restore_jump, out);
1472 restore_regs(reglist);
1473 emit_jmp(stubs[n].retaddr);
1476 static void inline_readstub(enum stub_type type, int i, u_int addr,
1477 const signed char regmap[], int target, int adj, u_int reglist)
1479 int rs=get_reg(regmap,target);
1480 int rt=get_reg(regmap,target);
1481 if(rs<0) rs=get_reg_temp(regmap);
1484 uintptr_t host_addr = 0;
1486 int cc=get_reg(regmap,CCREG);
1487 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1489 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1490 if (handler == NULL) {
1491 if(rt<0||dops[i].rt1==0)
1493 if (addr != host_addr)
1494 emit_movimm_from64(addr, rs, host_addr, rs);
1496 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1497 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1498 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1499 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1500 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1505 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1507 if(type==LOADB_STUB||type==LOADBU_STUB)
1508 handler=jump_handler_read8;
1509 if(type==LOADH_STUB||type==LOADHU_STUB)
1510 handler=jump_handler_read16;
1511 if(type==LOADW_STUB)
1512 handler=jump_handler_read32;
1515 // call a memhandler
1516 if(rt>=0&&dops[i].rt1!=0)
1520 emit_movimm(addr,0);
1524 emit_loadreg(CCREG,2);
1525 emit_addimm(cc<0?2:cc,adj,2);
1527 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1528 emit_adrp((void *)l1, 1);
1529 emit_addimm64(1, l1 & 0xfff, 1);
1532 emit_far_call(do_memhandler_pre);
1534 emit_far_call(handler);
1536 // (no cycle reload after read)
1537 if(rt>=0&&dops[i].rt1!=0)
1538 loadstore_extend(type, 0, rt);
1539 restore_regs(reglist);
1542 static void do_writestub(int n)
1544 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1545 set_jump_target(stubs[n].addr, out);
1546 enum stub_type type=stubs[n].type;
1549 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1550 u_int reglist=stubs[n].e;
1551 signed char *i_regmap=i_regs->regmap;
1553 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1554 rt=get_reg(i_regmap,r=FTEMP);
1556 rt=get_reg(i_regmap,r=dops[i].rs2);
1560 int rtmp,temp=-1,temp2,regs_saved=0;
1561 void *restore_jump = NULL, *handler_jump = NULL;
1562 int reglist2=reglist|(1<<rs)|(1<<rt);
1563 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1564 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1572 for(rtmp=0;rtmp<=3;rtmp++)
1573 if(rtmp!=rs&&rtmp!=rt)
1576 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1579 host_tempreg_acquire();
1582 emit_readdword(&mem_wtab,temp);
1583 emit_shrimm(rs,12,temp2);
1584 emit_readdword_dualindexedx8(temp,temp2,temp2);
1585 emit_adds64(temp2,temp2,temp2);
1589 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1590 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1591 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1596 emit_jmp(0); // jump to reg restore
1599 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1600 set_jump_target(handler_jump, out);
1606 case STOREB_STUB: handler=jump_handler_write8; break;
1607 case STOREH_STUB: handler=jump_handler_write16; break;
1608 case STOREW_STUB: handler=jump_handler_write32; break;
1614 emit_mov64(temp2,3);
1615 host_tempreg_release();
1617 int cc=get_reg(i_regmap,CCREG);
1619 emit_loadreg(CCREG,2);
1620 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1621 // returns new cycle_count
1622 emit_far_call(handler);
1623 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1625 emit_storereg(CCREG,2);
1627 set_jump_target(restore_jump, out);
1628 restore_regs(reglist);
1629 emit_jmp(stubs[n].retaddr);
1632 static void inline_writestub(enum stub_type type, int i, u_int addr,
1633 const signed char regmap[], int target, int adj, u_int reglist)
1635 int rs = get_reg_temp(regmap);
1636 int rt = get_reg(regmap,target);
1639 uintptr_t host_addr = 0;
1640 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1641 if (handler == NULL) {
1642 if (addr != host_addr)
1643 emit_movimm_from64(addr, rs, host_addr, rs);
1645 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1646 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1647 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1653 // call a memhandler
1655 emit_writeword(rs, &address); // some handlers still need it
1656 loadstore_extend(type, rt, 0);
1658 cc = cc_use = get_reg(regmap, CCREG);
1660 emit_loadreg(CCREG, (cc_use = 2));
1661 emit_addimm(cc_use, adj, 2);
1663 emit_far_call(do_memhandler_pre);
1664 emit_far_call(handler);
1665 emit_far_call(do_memhandler_post);
1666 emit_addimm(0, -adj, cc_use);
1668 emit_storereg(CCREG, cc_use);
1669 restore_regs(reglist);
1672 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1674 int ret = memcmp(source, copy, size);
1675 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1679 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1680 static void do_dirty_stub_base(u_int vaddr, u_int source_len)
1682 assert(source_len <= MAXBLOCK*4);
1683 emit_loadlp_ofs(0, 0); // ldr x1, source
1684 emit_loadlp_ofs(0, 1); // ldr x2, copy
1685 emit_movz(source_len, 2);
1686 emit_far_call(verify_code_arm64);
1689 emit_movz(vaddr & 0xffff, 0);
1690 emit_movk_lsl16(vaddr >> 16, 0);
1691 emit_far_call(get_addr);
1693 set_jump_target(jmp, out);
1696 static void assert_dirty_stub(const u_int *ptr)
1698 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1699 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1700 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
1701 assert( ptr[8] == 0xd61f0000); // br x0
1704 static void set_loadlp(u_int *loadl, void *lit)
1706 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1707 assert((*loadl & ~0x1f) == 0x58000000);
1708 assert((ofs & 3) == 0);
1709 assert(ofs < 0x100000);
1710 *loadl |= (ofs >> 2) << 5;
1713 static void do_dirty_stub_emit_literals(u_int *loadlps)
1715 set_loadlp(&loadlps[0], out);
1716 output_w64((uintptr_t)source);
1717 set_loadlp(&loadlps[1], out);
1718 output_w64((uintptr_t)copy);
1721 static void *do_dirty_stub(int i, u_int source_len)
1723 assem_debug("do_dirty_stub %x\n",start+i*4);
1724 u_int *loadlps = (void *)out;
1725 do_dirty_stub_base(start + i*4, source_len);
1729 entry = instr_addr[i];
1730 emit_jmp(instr_addr[i]);
1731 do_dirty_stub_emit_literals(loadlps);
1735 static void do_dirty_stub_ds(u_int source_len)
1737 u_int *loadlps = (void *)out;
1738 do_dirty_stub_base(start + 1, source_len);
1739 void *lit_jumpover = out;
1740 emit_jmp(out + 8*2);
1741 do_dirty_stub_emit_literals(loadlps);
1742 set_jump_target(lit_jumpover, out);
1745 static uint64_t get_from_ldr_literal(const u_int *i)
1748 assert((i[0] & 0xff000000) == 0x58000000);
1751 return *(uint64_t *)(i + ofs);
1754 static uint64_t get_from_movz(const u_int *i)
1756 assert((i[0] & 0x7fe00000) == 0x52800000);
1757 return (i[0] >> 5) & 0xffff;
1760 // Find the "clean" entry point from a "dirty" entry point
1761 // by skipping past the call to verify_code
1762 static void *get_clean_addr(u_int *addr)
1764 assert_dirty_stub(addr);
1768 static int verify_dirty(const u_int *ptr)
1770 const void *source, *copy;
1772 assert_dirty_stub(ptr);
1773 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1774 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1775 len = get_from_movz(&ptr[2]); // movz w3, #source_len
1776 return !memcmp(source, copy, len);
1779 static int isclean(void *addr)
1781 const u_int *ptr = addr;
1782 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1783 assert_dirty_stub(ptr);
1789 // get source that block at addr was compiled from (host pointers)
1790 static void get_bounds(void *addr, u_char **start, u_char **end)
1792 const u_int *ptr = addr;
1793 assert_dirty_stub(ptr);
1794 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1795 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
1800 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1802 save_load_regs_all(1, reglist);
1803 cop2_do_stall_check(op, i, i_regs, 0);
1806 emit_far_call(pcnt_gte_start);
1808 // pointer to cop2 regs
1809 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1812 static void c2op_epilogue(u_int op,u_int reglist)
1816 emit_far_call(pcnt_gte_end);
1818 save_load_regs_all(0, reglist);
1821 static void c2op_assemble(int i, const struct regstat *i_regs)
1823 u_int c2op=source[i]&0x3f;
1824 u_int hr,reglist_full=0,reglist;
1825 int need_flags,need_ir;
1826 for(hr=0;hr<HOST_REGS;hr++) {
1827 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1829 reglist=reglist_full&CALLER_SAVE_REGS;
1831 if (gte_handlers[c2op]!=NULL) {
1832 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1833 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1834 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1835 source[i],gte_unneeded[i+1],need_flags,need_ir);
1836 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1838 //int shift = (source[i] >> 19) & 1;
1839 //int lm = (source[i] >> 10) & 1;
1843 c2op_prologue(c2op, i, i_regs, reglist);
1844 emit_movimm(source[i],1); // opcode
1845 emit_writeword(1,&psxRegs.code);
1846 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1849 c2op_epilogue(c2op,reglist);
1853 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1855 //value = value & 0x7ffff000;
1856 //if (value & 0x7f87e000) value |= 0x80000000;
1857 emit_andimm(sl, 0x7fffe000, temp);
1858 emit_testimm(temp, 0xff87ffff);
1859 emit_andimm(sl, 0x7ffff000, temp);
1860 host_tempreg_acquire();
1861 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1862 emit_cmovne_reg(HOST_TEMPREG, temp);
1863 host_tempreg_release();
1864 assert(0); // testing needed
1867 static void do_mfc2_31_one(u_int copr,signed char temp)
1869 emit_readshword(®_cop2d[copr],temp);
1870 emit_bicsar_imm(temp,31,temp);
1871 emit_cmpimm(temp,0xf80);
1872 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1873 emit_andimm(temp,0xf80,temp);
1876 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1879 host_tempreg_acquire();
1880 temp = HOST_TEMPREG;
1882 do_mfc2_31_one(9,temp);
1883 emit_shrimm(temp,7,tl);
1884 do_mfc2_31_one(10,temp);
1885 emit_orrshr_imm(temp,2,tl);
1886 do_mfc2_31_one(11,temp);
1887 emit_orrshl_imm(temp,3,tl);
1888 emit_writeword(tl,®_cop2d[29]);
1890 if (temp == HOST_TEMPREG)
1891 host_tempreg_release();
1894 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1900 if(dops[i].rs1&&dops[i].rs2)
1902 switch(dops[i].opcode2)
1907 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1908 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1909 signed char hi=get_reg(i_regs->regmap,HIREG);
1910 signed char lo=get_reg(i_regs->regmap,LOREG);
1916 if(dops[i].opcode2==0x18) // MULT
1917 emit_smull(m1,m2,hi);
1919 emit_umull(m1,m2,hi);
1922 emit_shrimm64(hi,32,hi);
1928 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1929 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1930 signed char quotient=get_reg(i_regs->regmap,LOREG);
1931 signed char remainder=get_reg(i_regs->regmap,HIREG);
1932 assert(numerator>=0);
1933 assert(denominator>=0);
1934 assert(quotient>=0);
1935 assert(remainder>=0);
1937 if (dops[i].opcode2 == 0x1A) // DIV
1938 emit_sdiv(numerator,denominator,quotient);
1940 emit_udiv(numerator,denominator,quotient);
1941 emit_msub(quotient,denominator,numerator,remainder);
1943 // div 0 quotient (remainder is already correct)
1944 host_tempreg_acquire();
1945 if (dops[i].opcode2 == 0x1A) // DIV
1946 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1948 emit_movimm(~0,HOST_TEMPREG);
1949 emit_test(denominator,denominator);
1950 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1951 host_tempreg_release();
1960 signed char hr=get_reg(i_regs->regmap,HIREG);
1961 signed char lr=get_reg(i_regs->regmap,LOREG);
1962 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1965 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1966 assert(numerator >= 0);
1968 emit_mov(numerator,hr);
1970 if (dops[i].opcode2 == 0x1A) // DIV
1971 emit_sub_asrimm(0,numerator,31,lr);
1977 if (hr >= 0) emit_zeroreg(hr);
1978 if (lr >= 0) emit_movimm(~0,lr);
1983 // Multiply by zero is zero.
1984 if (hr >= 0) emit_zeroreg(hr);
1985 if (lr >= 0) emit_zeroreg(lr);
1989 #define multdiv_assemble multdiv_assemble_arm64
1991 static void do_jump_vaddr(u_int rs)
1995 emit_far_call(get_addr_ht);
1999 static void do_preload_rhash(u_int r) {
2000 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2001 // register. On ARM the hash can be done with a single instruction (below)
2004 static void do_preload_rhtbl(u_int ht) {
2005 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
2008 static void do_rhash(u_int rs,u_int rh) {
2009 emit_andimm(rs, 0xf8, rh);
2012 static void do_miniht_load(int ht, u_int rh) {
2013 emit_add64(ht, rh, ht);
2014 emit_ldst(0, 0, rh, ht, 0);
2017 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2023 set_jump_target(jaddr, out);
2024 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2025 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2029 // parsed by set_jump_target?
2030 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2031 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2032 emit_movk(return_address&0xffff,rt);
2033 add_to_linker(out,return_address,1);
2035 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2036 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2039 static void clear_cache_arm64(char *start, char *end)
2041 // Don't rely on GCC's __clear_cache implementation, as it caches
2042 // icache/dcache cache line sizes, that can vary between cores on
2043 // big.LITTLE architectures.
2044 uint64_t addr, ctr_el0;
2045 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2046 size_t isize, dsize;
2048 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2049 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2050 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2052 // use the global minimum cache line size
2053 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2054 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2056 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2057 not required for instruction to data coherence. */
2058 if ((ctr_el0 & (1 << 28)) == 0x0) {
2059 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2060 for (; addr < (uint64_t)end; addr += dsize)
2061 // use "civac" instead of "cvau", as this is the suggested workaround for
2062 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2063 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2065 __asm__ volatile("dsb ish" : : : "memory");
2067 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2068 Unification is not required for instruction to data coherence. */
2069 if ((ctr_el0 & (1 << 29)) == 0x0) {
2070 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2071 for (; addr < (uint64_t)end; addr += isize)
2072 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2074 __asm__ volatile("dsb ish" : : : "memory");
2077 __asm__ volatile("isb" : : : "memory");
2080 // CPU-architecture-specific initialization
2081 static void arch_init(void)
2083 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2084 struct tramp_insns *ops = ndrc->tramp.ops;
2086 assert(!(diff & 3));
2087 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2088 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2089 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2090 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2092 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2095 // vim:shiftwidth=2:expandtab