1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define unused __attribute__((unused))
28 void do_memhandler_pre();
29 void do_memhandler_post();
32 static void set_jump_target(void *addr, void *target)
35 intptr_t offset = (u_char *)target - (u_char *)addr;
37 if ((*ptr&0xFC000000) == 0x14000000) { // b
38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
45 // should only happen when jumping to an already compiled block (see add_jump_out)
46 // a workaround would be to do a trampoline jump via a stub at the end of the block
47 assert(-1048576 <= offset && offset < 1048576);
48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
50 else if((*ptr&0x9f000000)==0x10000000) { // adr
51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 abort(); // should not happen
59 // from a pointer to external jump stub (which was produced by emit_extjump2)
60 // find where the jumping insn is
61 static void *find_extjump_insn(void *stub)
63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
69 // find where external branch is liked to using addr of it's stub:
70 // get address that the stub loads (dyna_linker arg1),
71 // treat it as a pointer to branch insn,
72 // return addr where that branch jumps to
73 static void *get_pointer(void *stub)
75 int *i_ptr = find_extjump_insn(stub);
76 if ((*i_ptr&0xfc000000) == 0x14000000) // b
77 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
78 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
79 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
80 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
85 // Allocate a specific ARM register.
86 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
106 // Alloc cycle count into dedicated register
107 static void alloc_cc(struct regstat *cur,int i)
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
117 static unused const char *regname[32] = {
118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124 static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136 static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141 static void output_w32(u_int word)
143 *((u_int *)out) = word;
147 static void output_w64(uint64_t dword)
149 *((uint64_t *)out) = dword;
154 static u_int rm_rd(u_int rm, u_int rd)
158 return (rm << 16) | rd;
162 static u_int rn_rd(u_int rn, u_int rd)
166 return (rn << 5) | rd;
169 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
174 return (rm << 16) | (rn << 5) | rd;
177 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
189 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198 static u_int imm16_rd(u_int imm16, u_int rd)
200 assert(imm16 < 0x10000);
202 return (imm16 << 5) | rd;
205 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
207 assert(imm12 < 0x1000);
210 return (imm12 << 10) | (rn << 5) | rd;
213 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
215 assert(imm9 < 0x200);
218 return (imm9 << 12) | (rn << 5) | rd;
221 static u_int imm19_rt(u_int imm19, u_int rt)
223 assert(imm19 < 0x80000);
225 return (imm19 << 5) | rt;
228 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
235 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238 static u_int genjmp(const u_char *addr)
240 intptr_t offset = addr - out;
241 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
242 if (offset < -134217728 || offset > 134217727) {
243 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
247 return ((u_int)offset >> 2) & 0x03ffffff;
250 static u_int genjmpcc(const u_char *addr)
252 intptr_t offset = addr - out;
253 if ((uintptr_t)addr < 3) return 0;
254 if (offset < -1048576 || offset > 1048572) {
255 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
259 return ((u_int)offset >> 2) & 0x7ffff;
262 static uint32_t is_mask(u_int value)
264 return value && ((value + 1) & value) == 0;
267 // This function returns true if the argument contains a
268 // non-empty sequence of ones (possibly rotated) with the remainder zero.
269 static uint32_t is_rotated_mask(u_int value)
271 if (value == 0 || value == ~0)
273 if (is_mask((value - 1) | value))
275 return is_mask((~value - 1) | ~value);
278 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
280 int lzeros, tzeros, ones;
282 if (is_mask((value - 1) | value)) {
283 lzeros = __builtin_clz(value);
284 tzeros = __builtin_ctz(value);
285 ones = 32 - lzeros - tzeros;
286 *immr = (32 - tzeros) & 31;
291 if (is_mask((value - 1) | value)) {
292 lzeros = __builtin_clz(value);
293 tzeros = __builtin_ctz(value);
294 ones = 32 - lzeros - tzeros;
302 static void emit_mov(u_int rs, u_int rt)
304 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
305 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308 static void emit_mov64(u_int rs, u_int rt)
310 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
311 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
314 static void emit_add(u_int rs1, u_int rs2, u_int rt)
316 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
317 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
320 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
322 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
323 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
326 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
328 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
329 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
331 #define emit_adds_ptr emit_adds64
333 static void emit_neg(u_int rs, u_int rt)
335 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
339 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
345 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
347 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
348 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
351 static void emit_movz(u_int imm, u_int rt)
353 assem_debug("movz %s,#%#x\n", regname[rt], imm);
354 output_w32(0x52800000 | imm16_rd(imm, rt));
357 static void emit_movz_lsl16(u_int imm, u_int rt)
359 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
360 output_w32(0x52a00000 | imm16_rd(imm, rt));
363 static void emit_movn(u_int imm, u_int rt)
365 assem_debug("movn %s,#%#x\n", regname[rt], imm);
366 output_w32(0x12800000 | imm16_rd(imm, rt));
369 static void emit_movn_lsl16(u_int imm,u_int rt)
371 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x12a00000 | imm16_rd(imm, rt));
375 static void emit_movk(u_int imm,u_int rt)
377 assem_debug("movk %s,#%#x\n", regname[rt], imm);
378 output_w32(0x72800000 | imm16_rd(imm, rt));
381 static void emit_movk_lsl16(u_int imm,u_int rt)
384 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
385 output_w32(0x72a00000 | imm16_rd(imm, rt));
388 static void emit_zeroreg(u_int rt)
393 static void emit_movimm(u_int imm, u_int rt)
397 else if ((~imm) < 65536)
399 else if ((imm&0xffff) == 0)
400 emit_movz_lsl16(imm >> 16, rt);
401 else if (((~imm)&0xffff) == 0)
402 emit_movn_lsl16(~imm >> 16, rt);
403 else if (is_rotated_mask(imm)) {
405 gen_logical_imm(imm, &immr, &imms);
406 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
407 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
410 emit_movz(imm & 0xffff, rt);
411 emit_movk_lsl16(imm >> 16, rt);
415 static void emit_readword(void *addr, u_int rt)
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 3) && offset <= 16380) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
420 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
426 static void emit_readdword(void *addr, u_int rt)
428 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
429 if (!(offset & 7) && offset <= 32760) {
430 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
431 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
436 #define emit_readptr emit_readdword
438 static void emit_readshword(void *addr, u_int rt)
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
449 static void emit_loadreg(u_int r, u_int hr)
456 void *addr = &psxRegs.GPR.r[r];
458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
462 case INVCP: addr = &invc_ptr; is64 = 1; break;
463 case ROREG: addr = &ram_offset; is64 = 1; break;
464 default: assert(r < 34); break;
467 emit_readdword(addr, hr);
469 emit_readword(addr, hr);
473 static void emit_writeword(u_int rt, void *addr)
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 3) && offset <= 16380) {
477 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
478 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
484 static void emit_writedword(u_int rt, void *addr)
486 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
487 if (!(offset & 7) && offset <= 32760) {
488 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
489 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
495 static void emit_storereg(u_int r, u_int hr)
498 void *addr = &psxRegs.GPR.r[r];
500 //case HIREG: addr = &hi; break;
501 //case LOREG: addr = &lo; break;
502 case CCREG: addr = &cycle_count; break;
503 default: assert(r < 34); break;
505 emit_writeword(hr, addr);
508 static void emit_test(u_int rs, u_int rt)
510 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
511 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
514 static void emit_testimm(u_int rs, u_int imm)
517 assem_debug("tst %s,#%#x\n", regname[rs], imm);
518 assert(is_rotated_mask(imm)); // good enough for PCSX
519 gen_logical_imm(imm, &immr, &imms);
520 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
523 static void emit_not(u_int rs,u_int rt)
525 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
526 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
529 static void emit_and(u_int rs1,u_int rs2,u_int rt)
531 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
532 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
535 static void emit_or(u_int rs1,u_int rs2,u_int rt)
537 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
538 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
541 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
543 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
544 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
547 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
549 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
550 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
553 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
555 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
556 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
559 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
561 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
562 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
565 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
567 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
568 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
571 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
573 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
574 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
577 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
579 unused const char *st = s ? "s" : "";
580 s = s ? 0x20000000 : 0;
581 is64 = is64 ? 0x80000000 : 0;
583 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
584 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
586 else if (-imm < 4096) {
587 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
588 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
590 else if (imm < 16777216) {
591 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
592 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
593 if ((imm & 0xfff) || s) {
594 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
595 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
598 else if (-imm < 16777216) {
599 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
600 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
603 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
610 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
612 emit_addimm_s(0, 0, rs, imm, rt);
615 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
617 emit_addimm_s(0, 1, rs, imm, rt);
620 static void emit_addimm_and_set_flags(int imm, u_int rt)
622 emit_addimm_s(1, 0, rt, imm, rt);
625 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
627 const char *names[] = { "and", "orr", "eor", "ands" };
628 const char *name = names[op];
631 if (is_rotated_mask(imm)) {
632 gen_logical_imm(imm, &immr, &imms);
633 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
634 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
637 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
638 host_tempreg_acquire();
639 emit_movimm(imm, HOST_TEMPREG);
640 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
641 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
642 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
643 host_tempreg_release();
648 static void emit_andimm(u_int rs, u_int imm, u_int rt)
653 emit_logicop_imm(0, rs, imm, rt);
656 static void emit_orimm(u_int rs, u_int imm, u_int rt)
663 emit_logicop_imm(1, rs, imm, rt);
666 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
673 emit_logicop_imm(2, rs, imm, rt);
676 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
678 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
679 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
682 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
684 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
685 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
688 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
690 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
694 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
696 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
700 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
702 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
706 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
708 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
712 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
714 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
718 static void emit_signextend16(u_int rs, u_int rt)
720 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
724 static void emit_shl(u_int rs,u_int rshift,u_int rt)
726 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
727 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
730 static void emit_shr(u_int rs,u_int rshift,u_int rt)
732 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
733 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
736 static void emit_sar(u_int rs,u_int rshift,u_int rt)
738 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
742 static void emit_cmpimm(u_int rs, u_int imm)
745 assem_debug("cmp %s,%#x\n", regname[rs], imm);
746 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
748 else if (-imm < 4096) {
749 assem_debug("cmn %s,%#x\n", regname[rs], imm);
750 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
752 else if (imm < 16777216 && !(imm & 0xfff)) {
753 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
754 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
757 host_tempreg_acquire();
758 emit_movimm(imm, HOST_TEMPREG);
759 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
760 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
761 host_tempreg_release();
765 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
767 assert(imm == 0 || imm == 1);
768 assert(cond0 < 0x10);
769 assert(cond1 < 0x10);
771 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
772 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
774 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
775 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
779 static void emit_cmovne_imm(u_int imm,u_int rt)
781 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
784 static void emit_cmovl_imm(u_int imm,u_int rt)
786 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
789 static void emit_cmovb_imm(int imm,u_int rt)
791 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
794 static void emit_cmoveq_reg(u_int rs,u_int rt)
796 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
797 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
800 static void emit_cmovne_reg(u_int rs,u_int rt)
802 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
803 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
806 static void emit_cmovl_reg(u_int rs,u_int rt)
808 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
812 static void emit_cmovb_reg(u_int rs,u_int rt)
814 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
818 static void emit_cmovs_reg(u_int rs,u_int rt)
820 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
824 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
826 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
827 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
830 static void emit_slti32(u_int rs,int imm,u_int rt)
832 if(rs!=rt) emit_zeroreg(rt);
834 if(rs==rt) emit_movimm(0,rt);
835 emit_cmovl_imm(1,rt);
838 static void emit_sltiu32(u_int rs,int imm,u_int rt)
840 if(rs!=rt) emit_zeroreg(rt);
842 if(rs==rt) emit_movimm(0,rt);
843 emit_cmovb_imm(1,rt);
846 static void emit_cmp(u_int rs,u_int rt)
848 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
849 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
852 static void emit_set_gz32(u_int rs, u_int rt)
854 //assem_debug("set_gz32\n");
857 emit_cmovl_imm(0,rt);
860 static void emit_set_nz32(u_int rs, u_int rt)
862 //assem_debug("set_nz32\n");
863 if(rs!=rt) emit_mov(rs,rt);
865 emit_cmovne_imm(1,rt);
868 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
870 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
871 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
873 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
874 emit_cmovl_imm(1,rt);
877 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
879 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
880 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
882 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
883 emit_cmovb_imm(1,rt);
886 static int can_jump_or_call(const void *a)
888 intptr_t diff = (u_char *)a - out;
889 return (-134217728 <= diff && diff <= 134217727);
892 static void emit_call(const void *a)
894 intptr_t diff = (u_char *)a - out;
895 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
897 if (-134217728 <= diff && diff <= 134217727)
898 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
903 static void emit_jmp(const void *a)
905 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
906 u_int offset = genjmp(a);
907 output_w32(0x14000000 | offset);
910 static void emit_jne(const void *a)
912 assem_debug("bne %p\n", a);
913 u_int offset = genjmpcc(a);
914 output_w32(0x54000000 | (offset << 5) | COND_NE);
917 static void emit_jeq(const void *a)
919 assem_debug("beq %p\n", a);
920 u_int offset = genjmpcc(a);
921 output_w32(0x54000000 | (offset << 5) | COND_EQ);
924 static void emit_js(const void *a)
926 assem_debug("bmi %p\n", a);
927 u_int offset = genjmpcc(a);
928 output_w32(0x54000000 | (offset << 5) | COND_MI);
931 static void emit_jns(const void *a)
933 assem_debug("bpl %p\n", a);
934 u_int offset = genjmpcc(a);
935 output_w32(0x54000000 | (offset << 5) | COND_PL);
938 static void emit_jl(const void *a)
940 assem_debug("blt %p\n", a);
941 u_int offset = genjmpcc(a);
942 output_w32(0x54000000 | (offset << 5) | COND_LT);
945 static void emit_jge(const void *a)
947 assem_debug("bge %p\n", a);
948 u_int offset = genjmpcc(a);
949 output_w32(0x54000000 | (offset << 5) | COND_GE);
952 static void emit_jno(const void *a)
954 assem_debug("bvc %p\n", a);
955 u_int offset = genjmpcc(a);
956 output_w32(0x54000000 | (offset << 5) | COND_VC);
959 static void emit_jc(const void *a)
961 assem_debug("bcs %p\n", a);
962 u_int offset = genjmpcc(a);
963 output_w32(0x54000000 | (offset << 5) | COND_CS);
966 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
968 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
969 u_int offset = genjmpcc(a);
970 is64 = is64 ? 0x80000000 : 0;
971 isnz = isnz ? 0x01000000 : 0;
972 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
975 static void emit_cbz(const void *a, u_int r)
980 static void emit_jmpreg(u_int r)
982 assem_debug("br %s\n", regname64[r]);
983 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
986 static void emit_retreg(u_int r)
988 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
989 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
992 static void emit_ret(void)
997 static void emit_adr(void *addr, u_int rt)
999 intptr_t offset = (u_char *)addr - out;
1000 assert(-1048576 <= offset && offset < 1048576);
1002 assem_debug("adr x%d,#%#lx\n", rt, offset);
1003 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1006 static void emit_adrp(void *addr, u_int rt)
1008 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1009 assert(-4294967296l <= offset && offset < 4294967296l);
1012 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1013 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1016 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1018 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1019 assert(-256 <= offset && offset < 256);
1020 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1023 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1025 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1026 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1029 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1031 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1035 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1037 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1041 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1043 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1046 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1048 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1050 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1051 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1054 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1056 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1060 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1062 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1066 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1068 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1072 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1074 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1075 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1078 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1080 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1081 assert(-256 <= offset && offset < 256);
1082 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1085 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1087 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1088 assert(-256 <= offset && offset < 256);
1089 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1092 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1094 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1095 assert(-256 <= offset && offset < 256);
1096 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1099 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1101 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1102 assert(-256 <= offset && offset < 256);
1103 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1106 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1108 if (!(offset & 3) && (u_int)offset <= 16380) {
1109 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1110 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1112 else if (-256 <= offset && offset < 256) {
1113 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1114 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1120 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1122 if (!(offset & 1) && (u_int)offset <= 8190) {
1123 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1124 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1126 else if (-256 <= offset && offset < 256) {
1127 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1128 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1134 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1136 if ((u_int)offset < 4096) {
1137 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1138 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1140 else if (-256 <= offset && offset < 256) {
1141 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1142 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1148 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1150 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1151 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1154 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1156 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1157 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1160 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1162 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1163 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1166 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1168 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1169 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1172 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1174 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1178 static void emit_clz(u_int rs, u_int rt)
1180 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1181 output_w32(0x5ac01000 | rn_rd(rs, rt));
1184 // special case for checking invalid_code
1185 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1187 host_tempreg_acquire();
1188 emit_shrimm(r, 12, HOST_TEMPREG);
1189 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1190 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1191 emit_cmpimm(HOST_TEMPREG, imm);
1192 host_tempreg_release();
1195 // special for loadlr_assemble, rs2 is destroyed
1196 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1198 emit_shl(rs2, shift, rs2);
1199 emit_bic(rs1, rs2, rt);
1202 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1204 emit_shr(rs2, shift, rs2);
1205 emit_bic(rs1, rs2, rt);
1208 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1210 output_w32(0x58000000 | imm19_rt(ofs, rt));
1213 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1215 u_int op = 0xb9000000;
1216 unused const char *ldst = is_st ? "st" : "ld";
1217 unused char rp = is64 ? 'x' : 'w';
1218 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1219 is64 = is64 ? 1 : 0;
1220 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1221 ofs = (ofs >> (2+is64));
1222 if (!is_st) op |= 0x00400000;
1223 if (is64) op |= 0x40000000;
1224 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1227 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1229 u_int op = 0x29000000;
1230 unused const char *ldst = is_st ? "st" : "ld";
1231 unused char rp = is64 ? 'x' : 'w';
1232 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1233 is64 = is64 ? 1 : 0;
1234 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1235 ofs = (ofs >> (2+is64));
1236 assert(-64 <= ofs && ofs <= 63);
1238 if (!is_st) op |= 0x00400000;
1239 if (is64) op |= 0x80000000;
1240 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1243 static void save_load_regs_all(int is_store, u_int reglist)
1247 for (r = 0; reglist; r++, reglist >>= 1) {
1251 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1257 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1260 assert(ofs <= SSP_CALLER_REGS);
1263 // Save registers before function call
1264 static void save_regs(u_int reglist)
1266 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1267 save_load_regs_all(1, reglist);
1270 // Restore registers after function call
1271 static void restore_regs(u_int reglist)
1273 reglist &= CALLER_SAVE_REGS;
1274 save_load_regs_all(0, reglist);
1277 /* Stubs/epilogue */
1279 static void literal_pool(int n)
1284 static void literal_pool_jumpover(int n)
1288 // parsed by get_pointer, find_extjump_insn
1289 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1291 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1293 emit_movz(target & 0xffff, 0);
1294 emit_movk_lsl16(target >> 16, 0);
1296 // addr is in the current recompiled block (max 256k)
1297 // offset shouldn't exceed +/-1MB
1299 emit_far_jump(linker);
1302 static void check_extjump2(void *src)
1305 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1309 // put rt_val into rt, potentially making use of rs with value rs_val
1310 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1312 int diff = rt_val - rs_val;
1313 if ((-4096 < diff && diff < 4096)
1314 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1315 emit_addimm(rs, diff, rt);
1316 else if (rt_val == ~rs_val)
1318 else if (is_rotated_mask(rs_val ^ rt_val))
1319 emit_xorimm(rs, rs_val ^ rt_val, rt);
1321 emit_movimm(rt_val, rt);
1324 // return 1 if the above function can do it's job cheaply
1325 static int is_similar_value(u_int v1, u_int v2)
1328 return (-4096 < diff && diff < 4096)
1329 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1331 || is_rotated_mask(v1 ^ v2);
1334 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1336 if (rt_val < 0x100000000ull) {
1337 emit_movimm_from(rs_val, rs, rt_val, rt);
1340 // just move the whole thing. At least on Linux all addresses
1341 // seem to be 48bit, so 3 insns - not great not terrible
1342 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1343 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1344 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1345 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1346 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1347 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1349 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1350 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1355 static void pass_args64(u_int a0, u_int a1)
1359 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1361 else if(a0!=0&&a1==0) {
1363 if (a0>=0) emit_mov64(a0,0);
1366 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1367 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1371 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1374 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1376 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1377 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1379 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1381 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1386 #include "pcsxmem.h"
1387 //#include "pcsxmem_inline.c"
1389 static void do_readstub(int n)
1391 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1392 set_jump_target(stubs[n].addr, out);
1393 enum stub_type type = stubs[n].type;
1395 int rs = stubs[n].b;
1396 const struct regstat *i_regs = (void *)stubs[n].c;
1397 u_int reglist = stubs[n].e;
1398 const signed char *i_regmap = i_regs->regmap;
1400 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1401 rt=get_reg(i_regmap,FTEMP);
1403 rt=get_reg(i_regmap,dops[i].rt1);
1406 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1407 void *restore_jump = NULL, *handler_jump = NULL;
1409 for (r = 0; r < HOST_CCREG; r++) {
1410 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1415 if(rt>=0&&dops[i].rt1!=0)
1422 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1424 emit_readdword(&mem_rtab,temp);
1425 emit_shrimm(rs,12,temp2);
1426 emit_readdword_dualindexedx8(temp,temp2,temp2);
1427 emit_adds64(temp2,temp2,temp2);
1430 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1432 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1433 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1434 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1435 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1436 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1442 emit_jmp(0); // jump to reg restore
1445 emit_jmp(stubs[n].retaddr); // return address
1446 set_jump_target(handler_jump, out);
1451 if(type==LOADB_STUB||type==LOADBU_STUB)
1452 handler=jump_handler_read8;
1453 if(type==LOADH_STUB||type==LOADHU_STUB)
1454 handler=jump_handler_read16;
1455 if(type==LOADW_STUB)
1456 handler=jump_handler_read32;
1458 pass_args64(rs,temp2);
1459 int cc=get_reg(i_regmap,CCREG);
1461 emit_loadreg(CCREG,2);
1462 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1463 emit_far_call(handler);
1464 // (no cycle reload after read)
1465 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1466 loadstore_extend(type,0,rt);
1469 set_jump_target(restore_jump, out);
1470 restore_regs(reglist);
1471 emit_jmp(stubs[n].retaddr);
1474 static void inline_readstub(enum stub_type type, int i, u_int addr,
1475 const signed char regmap[], int target, int adj, u_int reglist)
1477 int rs=get_reg(regmap,target);
1478 int rt=get_reg(regmap,target);
1479 if(rs<0) rs=get_reg(regmap,-1);
1482 uintptr_t host_addr = 0;
1484 int cc=get_reg(regmap,CCREG);
1485 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1487 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1488 if (handler == NULL) {
1489 if(rt<0||dops[i].rt1==0)
1491 if (addr != host_addr)
1492 emit_movimm_from64(addr, rs, host_addr, rs);
1494 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1495 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1496 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1497 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1498 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1503 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1505 if(type==LOADB_STUB||type==LOADBU_STUB)
1506 handler=jump_handler_read8;
1507 if(type==LOADH_STUB||type==LOADHU_STUB)
1508 handler=jump_handler_read16;
1509 if(type==LOADW_STUB)
1510 handler=jump_handler_read32;
1513 // call a memhandler
1514 if(rt>=0&&dops[i].rt1!=0)
1518 emit_movimm(addr,0);
1522 emit_loadreg(CCREG,2);
1523 emit_addimm(cc<0?2:cc,adj,2);
1525 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1526 emit_adrp((void *)l1, 1);
1527 emit_addimm64(1, l1 & 0xfff, 1);
1530 emit_far_call(do_memhandler_pre);
1532 emit_far_call(handler);
1534 // (no cycle reload after read)
1535 if(rt>=0&&dops[i].rt1!=0)
1536 loadstore_extend(type, 0, rt);
1537 restore_regs(reglist);
1540 static void do_writestub(int n)
1542 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1543 set_jump_target(stubs[n].addr, out);
1544 enum stub_type type=stubs[n].type;
1547 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1548 u_int reglist=stubs[n].e;
1549 signed char *i_regmap=i_regs->regmap;
1551 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1552 rt=get_reg(i_regmap,r=FTEMP);
1554 rt=get_reg(i_regmap,r=dops[i].rs2);
1558 int rtmp,temp=-1,temp2,regs_saved=0;
1559 void *restore_jump = NULL, *handler_jump = NULL;
1560 int reglist2=reglist|(1<<rs)|(1<<rt);
1561 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1562 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1570 for(rtmp=0;rtmp<=3;rtmp++)
1571 if(rtmp!=rs&&rtmp!=rt)
1574 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1577 host_tempreg_acquire();
1580 emit_readdword(&mem_wtab,temp);
1581 emit_shrimm(rs,12,temp2);
1582 emit_readdword_dualindexedx8(temp,temp2,temp2);
1583 emit_adds64(temp2,temp2,temp2);
1587 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1588 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1589 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1594 emit_jmp(0); // jump to reg restore
1597 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1598 set_jump_target(handler_jump, out);
1604 case STOREB_STUB: handler=jump_handler_write8; break;
1605 case STOREH_STUB: handler=jump_handler_write16; break;
1606 case STOREW_STUB: handler=jump_handler_write32; break;
1612 emit_mov64(temp2,3);
1613 host_tempreg_release();
1615 int cc=get_reg(i_regmap,CCREG);
1617 emit_loadreg(CCREG,2);
1618 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1619 // returns new cycle_count
1620 emit_far_call(handler);
1621 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1623 emit_storereg(CCREG,2);
1625 set_jump_target(restore_jump, out);
1626 restore_regs(reglist);
1627 emit_jmp(stubs[n].retaddr);
1630 static void inline_writestub(enum stub_type type, int i, u_int addr,
1631 const signed char regmap[], int target, int adj, u_int reglist)
1633 int rs = get_reg(regmap,-1);
1634 int rt = get_reg(regmap,target);
1637 uintptr_t host_addr = 0;
1638 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1639 if (handler == NULL) {
1640 if (addr != host_addr)
1641 emit_movimm_from64(addr, rs, host_addr, rs);
1643 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1644 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1645 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1651 // call a memhandler
1653 emit_writeword(rs, &address); // some handlers still need it
1654 loadstore_extend(type, rt, 0);
1656 cc = cc_use = get_reg(regmap, CCREG);
1658 emit_loadreg(CCREG, (cc_use = 2));
1659 emit_addimm(cc_use, adj, 2);
1661 emit_far_call(do_memhandler_pre);
1662 emit_far_call(handler);
1663 emit_far_call(do_memhandler_post);
1664 emit_addimm(0, -adj, cc_use);
1666 emit_storereg(CCREG, cc_use);
1667 restore_regs(reglist);
1670 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1672 int ret = memcmp(source, copy, size);
1673 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1677 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1678 static void do_dirty_stub_base(u_int vaddr, u_int source_len)
1680 assert(source_len <= MAXBLOCK*4);
1681 emit_loadlp_ofs(0, 0); // ldr x1, source
1682 emit_loadlp_ofs(0, 1); // ldr x2, copy
1683 emit_movz(source_len, 2);
1684 emit_far_call(verify_code_arm64);
1687 emit_movz(vaddr & 0xffff, 0);
1688 emit_movk_lsl16(vaddr >> 16, 0);
1689 emit_far_call(get_addr);
1691 set_jump_target(jmp, out);
1694 static void assert_dirty_stub(const u_int *ptr)
1696 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1697 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1698 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
1699 assert( ptr[8] == 0xd61f0000); // br x0
1702 static void set_loadlp(u_int *loadl, void *lit)
1704 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1705 assert((*loadl & ~0x1f) == 0x58000000);
1706 assert((ofs & 3) == 0);
1707 assert(ofs < 0x100000);
1708 *loadl |= (ofs >> 2) << 5;
1711 static void do_dirty_stub_emit_literals(u_int *loadlps)
1713 set_loadlp(&loadlps[0], out);
1714 output_w64((uintptr_t)source);
1715 set_loadlp(&loadlps[1], out);
1716 output_w64((uintptr_t)copy);
1719 static void *do_dirty_stub(int i, u_int source_len)
1721 assem_debug("do_dirty_stub %x\n",start+i*4);
1722 u_int *loadlps = (void *)out;
1723 do_dirty_stub_base(start + i*4, source_len);
1727 entry = instr_addr[i];
1728 emit_jmp(instr_addr[i]);
1729 do_dirty_stub_emit_literals(loadlps);
1733 static void do_dirty_stub_ds(u_int source_len)
1735 u_int *loadlps = (void *)out;
1736 do_dirty_stub_base(start + 1, source_len);
1737 void *lit_jumpover = out;
1738 emit_jmp(out + 8*2);
1739 do_dirty_stub_emit_literals(loadlps);
1740 set_jump_target(lit_jumpover, out);
1743 static uint64_t get_from_ldr_literal(const u_int *i)
1746 assert((i[0] & 0xff000000) == 0x58000000);
1749 return *(uint64_t *)(i + ofs);
1752 static uint64_t get_from_movz(const u_int *i)
1754 assert((i[0] & 0x7fe00000) == 0x52800000);
1755 return (i[0] >> 5) & 0xffff;
1758 // Find the "clean" entry point from a "dirty" entry point
1759 // by skipping past the call to verify_code
1760 static void *get_clean_addr(u_int *addr)
1762 assert_dirty_stub(addr);
1766 static int verify_dirty(const u_int *ptr)
1768 const void *source, *copy;
1770 assert_dirty_stub(ptr);
1771 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1772 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1773 len = get_from_movz(&ptr[2]); // movz w3, #source_len
1774 return !memcmp(source, copy, len);
1777 static int isclean(void *addr)
1779 const u_int *ptr = addr;
1780 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1781 assert_dirty_stub(ptr);
1787 // get source that block at addr was compiled from (host pointers)
1788 static void get_bounds(void *addr, u_char **start, u_char **end)
1790 const u_int *ptr = addr;
1791 assert_dirty_stub(ptr);
1792 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1793 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
1798 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1800 save_load_regs_all(1, reglist);
1801 cop2_do_stall_check(op, i, i_regs, 0);
1804 emit_far_call(pcnt_gte_start);
1806 // pointer to cop2 regs
1807 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1810 static void c2op_epilogue(u_int op,u_int reglist)
1814 emit_far_call(pcnt_gte_end);
1816 save_load_regs_all(0, reglist);
1819 static void c2op_assemble(int i, const struct regstat *i_regs)
1821 u_int c2op=source[i]&0x3f;
1822 u_int hr,reglist_full=0,reglist;
1823 int need_flags,need_ir;
1824 for(hr=0;hr<HOST_REGS;hr++) {
1825 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1827 reglist=reglist_full&CALLER_SAVE_REGS;
1829 if (gte_handlers[c2op]!=NULL) {
1830 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1831 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1832 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1833 source[i],gte_unneeded[i+1],need_flags,need_ir);
1834 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1836 //int shift = (source[i] >> 19) & 1;
1837 //int lm = (source[i] >> 10) & 1;
1841 c2op_prologue(c2op, i, i_regs, reglist);
1842 emit_movimm(source[i],1); // opcode
1843 emit_writeword(1,&psxRegs.code);
1844 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1847 c2op_epilogue(c2op,reglist);
1851 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1853 //value = value & 0x7ffff000;
1854 //if (value & 0x7f87e000) value |= 0x80000000;
1855 emit_andimm(sl, 0x7fffe000, temp);
1856 emit_testimm(temp, 0xff87ffff);
1857 emit_andimm(sl, 0x7ffff000, temp);
1858 host_tempreg_acquire();
1859 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1860 emit_cmovne_reg(HOST_TEMPREG, temp);
1861 host_tempreg_release();
1862 assert(0); // testing needed
1865 static void do_mfc2_31_one(u_int copr,signed char temp)
1867 emit_readshword(®_cop2d[copr],temp);
1868 emit_bicsar_imm(temp,31,temp);
1869 emit_cmpimm(temp,0xf80);
1870 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1871 emit_andimm(temp,0xf80,temp);
1874 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1877 host_tempreg_acquire();
1878 temp = HOST_TEMPREG;
1880 do_mfc2_31_one(9,temp);
1881 emit_shrimm(temp,7,tl);
1882 do_mfc2_31_one(10,temp);
1883 emit_orrshr_imm(temp,2,tl);
1884 do_mfc2_31_one(11,temp);
1885 emit_orrshl_imm(temp,3,tl);
1886 emit_writeword(tl,®_cop2d[29]);
1888 if (temp == HOST_TEMPREG)
1889 host_tempreg_release();
1892 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1898 if(dops[i].rs1&&dops[i].rs2)
1900 switch(dops[i].opcode2)
1905 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1906 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1907 signed char hi=get_reg(i_regs->regmap,HIREG);
1908 signed char lo=get_reg(i_regs->regmap,LOREG);
1914 if(dops[i].opcode2==0x18) // MULT
1915 emit_smull(m1,m2,hi);
1917 emit_umull(m1,m2,hi);
1920 emit_shrimm64(hi,32,hi);
1926 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1927 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1928 signed char quotient=get_reg(i_regs->regmap,LOREG);
1929 signed char remainder=get_reg(i_regs->regmap,HIREG);
1930 assert(numerator>=0);
1931 assert(denominator>=0);
1932 assert(quotient>=0);
1933 assert(remainder>=0);
1935 if (dops[i].opcode2 == 0x1A) // DIV
1936 emit_sdiv(numerator,denominator,quotient);
1938 emit_udiv(numerator,denominator,quotient);
1939 emit_msub(quotient,denominator,numerator,remainder);
1941 // div 0 quotient (remainder is already correct)
1942 host_tempreg_acquire();
1943 if (dops[i].opcode2 == 0x1A) // DIV
1944 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1946 emit_movimm(~0,HOST_TEMPREG);
1947 emit_test(denominator,denominator);
1948 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1949 host_tempreg_release();
1958 signed char hr=get_reg(i_regs->regmap,HIREG);
1959 signed char lr=get_reg(i_regs->regmap,LOREG);
1960 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1963 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1964 assert(numerator >= 0);
1966 emit_mov(numerator,hr);
1968 if (dops[i].opcode2 == 0x1A) // DIV
1969 emit_sub_asrimm(0,numerator,31,lr);
1975 if (hr >= 0) emit_zeroreg(hr);
1976 if (lr >= 0) emit_movimm(~0,lr);
1981 // Multiply by zero is zero.
1982 if (hr >= 0) emit_zeroreg(hr);
1983 if (lr >= 0) emit_zeroreg(lr);
1987 #define multdiv_assemble multdiv_assemble_arm64
1989 static void do_jump_vaddr(u_int rs)
1993 emit_far_call(get_addr_ht);
1997 static void do_preload_rhash(u_int r) {
1998 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1999 // register. On ARM the hash can be done with a single instruction (below)
2002 static void do_preload_rhtbl(u_int ht) {
2003 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
2006 static void do_rhash(u_int rs,u_int rh) {
2007 emit_andimm(rs, 0xf8, rh);
2010 static void do_miniht_load(int ht, u_int rh) {
2011 emit_add64(ht, rh, ht);
2012 emit_ldst(0, 0, rh, ht, 0);
2015 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2021 set_jump_target(jaddr, out);
2022 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2023 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2027 // parsed by set_jump_target?
2028 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2029 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2030 emit_movk(return_address&0xffff,rt);
2031 add_to_linker(out,return_address,1);
2033 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2034 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2037 static void clear_cache_arm64(char *start, char *end)
2039 // Don't rely on GCC's __clear_cache implementation, as it caches
2040 // icache/dcache cache line sizes, that can vary between cores on
2041 // big.LITTLE architectures.
2042 uint64_t addr, ctr_el0;
2043 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2044 size_t isize, dsize;
2046 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2047 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2048 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2050 // use the global minimum cache line size
2051 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2052 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2054 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2055 not required for instruction to data coherence. */
2056 if ((ctr_el0 & (1 << 28)) == 0x0) {
2057 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2058 for (; addr < (uint64_t)end; addr += dsize)
2059 // use "civac" instead of "cvau", as this is the suggested workaround for
2060 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2061 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2063 __asm__ volatile("dsb ish" : : : "memory");
2065 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2066 Unification is not required for instruction to data coherence. */
2067 if ((ctr_el0 & (1 << 29)) == 0x0) {
2068 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2069 for (; addr < (uint64_t)end; addr += isize)
2070 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2072 __asm__ volatile("dsb ish" : : : "memory");
2075 __asm__ volatile("isb" : : : "memory");
2078 // CPU-architecture-specific initialization
2079 static void arch_init(void)
2081 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2082 struct tramp_insns *ops = ndrc->tramp.ops;
2084 assert(!(diff & 3));
2085 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2086 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2087 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2088 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2090 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2093 // vim:shiftwidth=2:expandtab