1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define CALLER_SAVE_REGS 0x0007ffff
28 #define unused __attribute__((unused))
30 void do_memhandler_pre();
31 void do_memhandler_post();
34 static void set_jump_target(void *addr, void *target)
37 intptr_t offset = (u_char *)target - (u_char *)addr;
39 if ((*ptr&0xFC000000) == 0x14000000) { // b
40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_jump_out)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
49 assert(-1048576 <= offset && offset < 1048576);
50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
52 else if((*ptr&0x9f000000)==0x10000000) { // adr
53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
58 abort(); // should not happen
61 // from a pointer to external jump stub (which was produced by emit_extjump2)
62 // find where the jumping insn is
63 static void *find_extjump_insn(void *stub)
65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
71 // find where external branch is liked to using addr of it's stub:
72 // get address that the stub loads (dyna_linker arg1),
73 // treat it as a pointer to branch insn,
74 // return addr where that branch jumps to
75 static void *get_pointer(void *stub)
77 int *i_ptr = find_extjump_insn(stub);
78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
87 // Allocate a specific ARM register.
88 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
108 // Alloc cycle count into dedicated register
109 static void alloc_cc(struct regstat *cur,int i)
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
119 static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
126 static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
138 static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
143 static void output_w32(u_int word)
145 *((u_int *)out) = word;
149 static void output_w64(uint64_t dword)
151 *((uint64_t *)out) = dword;
156 static u_int rm_rd(u_int rm, u_int rd)
160 return (rm << 16) | rd;
164 static u_int rn_rd(u_int rn, u_int rd)
168 return (rn << 5) | rd;
171 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
176 return (rm << 16) | (rn << 5) | rd;
179 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
185 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
194 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
200 static u_int imm16_rd(u_int imm16, u_int rd)
202 assert(imm16 < 0x10000);
204 return (imm16 << 5) | rd;
207 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
209 assert(imm12 < 0x1000);
212 return (imm12 << 10) | (rn << 5) | rd;
215 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
217 assert(imm9 < 0x200);
220 return (imm9 << 12) | (rn << 5) | rd;
223 static u_int imm19_rt(u_int imm19, u_int rt)
225 assert(imm19 < 0x80000);
227 return (imm19 << 5) | rt;
230 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
240 static u_int genjmp(const u_char *addr)
242 intptr_t offset = addr - out;
243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
244 if (offset < -134217728 || offset > 134217727) {
245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
249 return ((u_int)offset >> 2) & 0x03ffffff;
252 static u_int genjmpcc(const u_char *addr)
254 intptr_t offset = addr - out;
255 if ((uintptr_t)addr < 3) return 0;
256 if (offset < -1048576 || offset > 1048572) {
257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
261 return ((u_int)offset >> 2) & 0x7ffff;
264 static uint32_t is_mask(u_int value)
266 return value && ((value + 1) & value) == 0;
269 // This function returns true if the argument contains a
270 // non-empty sequence of ones (possibly rotated) with the remainder zero.
271 static uint32_t is_rotated_mask(u_int value)
273 if (value == 0 || value == ~0)
275 if (is_mask((value - 1) | value))
277 return is_mask((~value - 1) | ~value);
280 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
282 int lzeros, tzeros, ones;
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
304 static void emit_mov(u_int rs, u_int rt)
306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
310 static void emit_mov64(u_int rs, u_int rt)
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
316 static void emit_add(u_int rs1, u_int rs2, u_int rt)
318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
322 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
328 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
333 #define emit_adds_ptr emit_adds64
335 static void emit_neg(u_int rs, u_int rt)
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
341 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
343 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
344 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
347 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
349 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
350 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
353 static void emit_movz(u_int imm, u_int rt)
355 assem_debug("movz %s,#%#x\n", regname[rt], imm);
356 output_w32(0x52800000 | imm16_rd(imm, rt));
359 static void emit_movz_lsl16(u_int imm, u_int rt)
361 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
362 output_w32(0x52a00000 | imm16_rd(imm, rt));
365 static void emit_movn(u_int imm, u_int rt)
367 assem_debug("movn %s,#%#x\n", regname[rt], imm);
368 output_w32(0x12800000 | imm16_rd(imm, rt));
371 static void emit_movn_lsl16(u_int imm,u_int rt)
373 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x12a00000 | imm16_rd(imm, rt));
377 static void emit_movk(u_int imm,u_int rt)
379 assem_debug("movk %s,#%#x\n", regname[rt], imm);
380 output_w32(0x72800000 | imm16_rd(imm, rt));
383 static void emit_movk_lsl16(u_int imm,u_int rt)
386 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
387 output_w32(0x72a00000 | imm16_rd(imm, rt));
390 static void emit_zeroreg(u_int rt)
395 static void emit_movimm(u_int imm, u_int rt)
399 else if ((~imm) < 65536)
401 else if ((imm&0xffff) == 0)
402 emit_movz_lsl16(imm >> 16, rt);
403 else if (((~imm)&0xffff) == 0)
404 emit_movn_lsl16(~imm >> 16, rt);
405 else if (is_rotated_mask(imm)) {
407 gen_logical_imm(imm, &immr, &imms);
408 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
409 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
412 emit_movz(imm & 0xffff, rt);
413 emit_movk_lsl16(imm >> 16, rt);
417 static void emit_readword(void *addr, u_int rt)
419 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
420 if (!(offset & 3) && offset <= 16380) {
421 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
422 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
428 static void emit_readdword(void *addr, u_int rt)
430 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
431 if (!(offset & 7) && offset <= 32760) {
432 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
433 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 #define emit_readptr emit_readdword
440 static void emit_readshword(void *addr, u_int rt)
442 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
443 if (!(offset & 1) && offset <= 8190) {
444 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
445 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
451 static void emit_loadreg(u_int r, u_int hr)
458 void *addr = &psxRegs.GPR.r[r];
460 //case HIREG: addr = &hi; break;
461 //case LOREG: addr = &lo; break;
462 case CCREG: addr = &cycle_count; break;
463 case CSREG: addr = &Status; break;
464 case INVCP: addr = &invc_ptr; is64 = 1; break;
465 default: assert(r < 34); break;
468 emit_readdword(addr, hr);
470 emit_readword(addr, hr);
474 static void emit_writeword(u_int rt, void *addr)
476 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
477 if (!(offset & 3) && offset <= 16380) {
478 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
479 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
485 static void emit_writedword(u_int rt, void *addr)
487 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
488 if (!(offset & 7) && offset <= 32760) {
489 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
490 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
496 static void emit_storereg(u_int r, u_int hr)
499 void *addr = &psxRegs.GPR.r[r];
501 //case HIREG: addr = &hi; break;
502 //case LOREG: addr = &lo; break;
503 case CCREG: addr = &cycle_count; break;
504 default: assert(r < 34); break;
506 emit_writeword(hr, addr);
509 static void emit_test(u_int rs, u_int rt)
511 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
512 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
515 static void emit_testimm(u_int rs, u_int imm)
518 assem_debug("tst %s,#%#x\n", regname[rs], imm);
519 assert(is_rotated_mask(imm)); // good enough for PCSX
520 gen_logical_imm(imm, &immr, &imms);
521 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
524 static void emit_not(u_int rs,u_int rt)
526 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
527 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
530 static void emit_and(u_int rs1,u_int rs2,u_int rt)
532 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
533 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
536 static void emit_or(u_int rs1,u_int rs2,u_int rt)
538 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
539 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
542 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
544 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
545 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
548 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
550 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
551 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
554 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
556 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
557 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
560 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
562 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
563 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
566 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
568 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
569 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
572 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
574 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
575 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
578 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
580 unused const char *st = s ? "s" : "";
581 s = s ? 0x20000000 : 0;
582 is64 = is64 ? 0x80000000 : 0;
584 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
585 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
587 else if (-imm < 4096) {
588 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
589 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
591 else if (imm < 16777216) {
592 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
593 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
594 if ((imm & 0xfff) || s) {
595 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
596 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
599 else if (-imm < 16777216) {
600 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
601 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
602 if ((imm & 0xfff) || s) {
603 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
604 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
611 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
613 emit_addimm_s(0, 0, rs, imm, rt);
616 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
618 emit_addimm_s(0, 1, rs, imm, rt);
621 static void emit_addimm_and_set_flags(int imm, u_int rt)
623 emit_addimm_s(1, 0, rt, imm, rt);
626 static void emit_addimm_no_flags(u_int imm,u_int rt)
628 emit_addimm(rt,imm,rt);
631 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
633 const char *names[] = { "and", "orr", "eor", "ands" };
634 const char *name = names[op];
637 if (is_rotated_mask(imm)) {
638 gen_logical_imm(imm, &immr, &imms);
639 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
640 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
643 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
644 host_tempreg_acquire();
645 emit_movimm(imm, HOST_TEMPREG);
646 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
647 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
648 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
649 host_tempreg_release();
654 static void emit_andimm(u_int rs, u_int imm, u_int rt)
659 emit_logicop_imm(0, rs, imm, rt);
662 static void emit_orimm(u_int rs, u_int imm, u_int rt)
669 emit_logicop_imm(1, rs, imm, rt);
672 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
679 emit_logicop_imm(2, rs, imm, rt);
682 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
684 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
685 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
688 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
690 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
694 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
696 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
700 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
702 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
706 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
712 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
718 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
720 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
724 static void emit_signextend16(u_int rs, u_int rt)
726 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
727 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
730 static void emit_shl(u_int rs,u_int rshift,u_int rt)
732 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
733 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
736 static void emit_shr(u_int rs,u_int rshift,u_int rt)
738 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
742 static void emit_sar(u_int rs,u_int rshift,u_int rt)
744 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
748 static void emit_cmpimm(u_int rs, u_int imm)
751 assem_debug("cmp %s,%#x\n", regname[rs], imm);
752 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
754 else if (-imm < 4096) {
755 assem_debug("cmn %s,%#x\n", regname[rs], imm);
756 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
758 else if (imm < 16777216 && !(imm & 0xfff)) {
759 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
760 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
763 host_tempreg_acquire();
764 emit_movimm(imm, HOST_TEMPREG);
765 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
766 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
767 host_tempreg_release();
771 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
773 assert(imm == 0 || imm == 1);
774 assert(cond0 < 0x10);
775 assert(cond1 < 0x10);
777 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
778 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
780 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
781 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
785 static void emit_cmovne_imm(u_int imm,u_int rt)
787 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
790 static void emit_cmovl_imm(u_int imm,u_int rt)
792 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
795 static void emit_cmovb_imm(int imm,u_int rt)
797 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
800 static void emit_cmoveq_reg(u_int rs,u_int rt)
802 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
803 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
806 static void emit_cmovne_reg(u_int rs,u_int rt)
808 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
812 static void emit_cmovl_reg(u_int rs,u_int rt)
814 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
818 static void emit_cmovb_reg(u_int rs,u_int rt)
820 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
824 static void emit_cmovs_reg(u_int rs,u_int rt)
826 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
830 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
832 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
833 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
836 static void emit_slti32(u_int rs,int imm,u_int rt)
838 if(rs!=rt) emit_zeroreg(rt);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovl_imm(1,rt);
844 static void emit_sltiu32(u_int rs,int imm,u_int rt)
846 if(rs!=rt) emit_zeroreg(rt);
848 if(rs==rt) emit_movimm(0,rt);
849 emit_cmovb_imm(1,rt);
852 static void emit_cmp(u_int rs,u_int rt)
854 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
855 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
858 static void emit_set_gz32(u_int rs, u_int rt)
860 //assem_debug("set_gz32\n");
863 emit_cmovl_imm(0,rt);
866 static void emit_set_nz32(u_int rs, u_int rt)
868 //assem_debug("set_nz32\n");
869 if(rs!=rt) emit_mov(rs,rt);
871 emit_cmovne_imm(1,rt);
874 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
876 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovl_imm(1,rt);
883 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
885 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovb_imm(1,rt);
892 static int can_jump_or_call(const void *a)
894 intptr_t diff = (u_char *)a - out;
895 return (-134217728 <= diff && diff <= 134217727);
898 static void emit_call(const void *a)
900 intptr_t diff = (u_char *)a - out;
901 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
903 if (-134217728 <= diff && diff <= 134217727)
904 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
909 static void emit_jmp(const void *a)
911 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
912 u_int offset = genjmp(a);
913 output_w32(0x14000000 | offset);
916 static void emit_jne(const void *a)
918 assem_debug("bne %p\n", a);
919 u_int offset = genjmpcc(a);
920 output_w32(0x54000000 | (offset << 5) | COND_NE);
923 static void emit_jeq(const void *a)
925 assem_debug("beq %p\n", a);
926 u_int offset = genjmpcc(a);
927 output_w32(0x54000000 | (offset << 5) | COND_EQ);
930 static void emit_js(const void *a)
932 assem_debug("bmi %p\n", a);
933 u_int offset = genjmpcc(a);
934 output_w32(0x54000000 | (offset << 5) | COND_MI);
937 static void emit_jns(const void *a)
939 assem_debug("bpl %p\n", a);
940 u_int offset = genjmpcc(a);
941 output_w32(0x54000000 | (offset << 5) | COND_PL);
944 static void emit_jl(const void *a)
946 assem_debug("blt %p\n", a);
947 u_int offset = genjmpcc(a);
948 output_w32(0x54000000 | (offset << 5) | COND_LT);
951 static void emit_jge(const void *a)
953 assem_debug("bge %p\n", a);
954 u_int offset = genjmpcc(a);
955 output_w32(0x54000000 | (offset << 5) | COND_GE);
958 static void emit_jno(const void *a)
960 assem_debug("bvc %p\n", a);
961 u_int offset = genjmpcc(a);
962 output_w32(0x54000000 | (offset << 5) | COND_VC);
965 static void emit_jc(const void *a)
967 assem_debug("bcs %p\n", a);
968 u_int offset = genjmpcc(a);
969 output_w32(0x54000000 | (offset << 5) | COND_CS);
972 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
974 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
975 u_int offset = genjmpcc(a);
976 is64 = is64 ? 0x80000000 : 0;
977 isnz = isnz ? 0x01000000 : 0;
978 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
981 static void emit_cbz(const void *a, u_int r)
986 static void emit_jmpreg(u_int r)
988 assem_debug("br %s\n", regname64[r]);
989 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
992 static void emit_retreg(u_int r)
994 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
995 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
998 static void emit_ret(void)
1003 static void emit_adr(void *addr, u_int rt)
1005 intptr_t offset = (u_char *)addr - out;
1006 assert(-1048576 <= offset && offset < 1048576);
1008 assem_debug("adr x%d,#%#lx\n", rt, offset);
1009 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1012 static void emit_adrp(void *addr, u_int rt)
1014 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1015 assert(-4294967296l <= offset && offset < 4294967296l);
1018 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1019 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1022 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1024 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1025 assert(-256 <= offset && offset < 256);
1026 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1029 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1031 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1035 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1037 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1041 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1043 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1047 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1049 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1050 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1052 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1054 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1056 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1060 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1062 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1066 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1068 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1072 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1074 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1075 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1078 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1080 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1081 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1084 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1086 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1087 assert(-256 <= offset && offset < 256);
1088 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1091 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1093 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1094 assert(-256 <= offset && offset < 256);
1095 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1098 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1100 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1105 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1107 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1108 assert(-256 <= offset && offset < 256);
1109 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1112 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1114 if (!(offset & 3) && (u_int)offset <= 16380) {
1115 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1116 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1118 else if (-256 <= offset && offset < 256) {
1119 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1120 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1126 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1128 if (!(offset & 1) && (u_int)offset <= 8190) {
1129 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1130 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1132 else if (-256 <= offset && offset < 256) {
1133 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1134 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1142 if ((u_int)offset < 4096) {
1143 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1146 else if (-256 <= offset && offset < 256) {
1147 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1148 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1154 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1156 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1157 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1160 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1162 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1163 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1166 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1168 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1169 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1172 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1174 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1178 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1180 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1184 static void emit_clz(u_int rs, u_int rt)
1186 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1187 output_w32(0x5ac01000 | rn_rd(rs, rt));
1190 // special case for checking invalid_code
1191 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1193 host_tempreg_acquire();
1194 emit_shrimm(r, 12, HOST_TEMPREG);
1195 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1196 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1197 emit_cmpimm(HOST_TEMPREG, imm);
1198 host_tempreg_release();
1201 // special for loadlr_assemble, rs2 is destroyed
1202 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1204 emit_shl(rs2, shift, rs2);
1205 emit_bic(rs1, rs2, rt);
1208 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1210 emit_shr(rs2, shift, rs2);
1211 emit_bic(rs1, rs2, rt);
1214 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1216 output_w32(0x58000000 | imm19_rt(ofs, rt));
1219 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1221 u_int op = 0xb9000000;
1222 unused const char *ldst = is_st ? "st" : "ld";
1223 unused char rp = is64 ? 'x' : 'w';
1224 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1225 is64 = is64 ? 1 : 0;
1226 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1227 ofs = (ofs >> (2+is64));
1228 if (!is_st) op |= 0x00400000;
1229 if (is64) op |= 0x40000000;
1230 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1233 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1235 u_int op = 0x29000000;
1236 unused const char *ldst = is_st ? "st" : "ld";
1237 unused char rp = is64 ? 'x' : 'w';
1238 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1239 is64 = is64 ? 1 : 0;
1240 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1241 ofs = (ofs >> (2+is64));
1242 assert(-64 <= ofs && ofs <= 63);
1244 if (!is_st) op |= 0x00400000;
1245 if (is64) op |= 0x80000000;
1246 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1249 static void save_load_regs_all(int is_store, u_int reglist)
1253 for (r = 0; reglist; r++, reglist >>= 1) {
1257 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1263 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1266 assert(ofs <= SSP_CALLER_REGS);
1269 // Save registers before function call
1270 static void save_regs(u_int reglist)
1272 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1273 save_load_regs_all(1, reglist);
1276 // Restore registers after function call
1277 static void restore_regs(u_int reglist)
1279 reglist &= CALLER_SAVE_REGS;
1280 save_load_regs_all(0, reglist);
1283 /* Stubs/epilogue */
1285 static void literal_pool(int n)
1290 static void literal_pool_jumpover(int n)
1294 // parsed by get_pointer, find_extjump_insn
1295 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1297 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1299 emit_movz(target & 0xffff, 0);
1300 emit_movk_lsl16(target >> 16, 0);
1302 // addr is in the current recompiled block (max 256k)
1303 // offset shouldn't exceed +/-1MB
1305 emit_far_jump(linker);
1308 static void check_extjump2(void *src)
1311 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1315 // put rt_val into rt, potentially making use of rs with value rs_val
1316 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1318 int diff = rt_val - rs_val;
1319 if ((-4096 < diff && diff < 4096)
1320 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1321 emit_addimm(rs, diff, rt);
1322 else if (rt_val == ~rs_val)
1324 else if (is_rotated_mask(rs_val ^ rt_val))
1325 emit_xorimm(rs, rs_val ^ rt_val, rt);
1327 emit_movimm(rt_val, rt);
1330 // return 1 if the above function can do it's job cheaply
1331 static int is_similar_value(u_int v1, u_int v2)
1334 return (-4096 < diff && diff < 4096)
1335 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1337 || is_rotated_mask(v1 ^ v2);
1341 static void pass_args64(u_int a0, u_int a1)
1345 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1347 else if(a0!=0&&a1==0) {
1349 if (a0>=0) emit_mov64(a0,0);
1352 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1353 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1357 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1360 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1362 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1363 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1365 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1367 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1372 #include "pcsxmem.h"
1373 //#include "pcsxmem_inline.c"
1375 static void do_readstub(int n)
1377 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1378 set_jump_target(stubs[n].addr, out);
1379 enum stub_type type = stubs[n].type;
1381 int rs = stubs[n].b;
1382 const struct regstat *i_regs = (void *)stubs[n].c;
1383 u_int reglist = stubs[n].e;
1384 const signed char *i_regmap = i_regs->regmap;
1386 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1387 rt=get_reg(i_regmap,FTEMP);
1389 rt=get_reg(i_regmap,dops[i].rt1);
1392 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1393 void *restore_jump = NULL, *handler_jump = NULL;
1395 for (r = 0; r < HOST_CCREG; r++) {
1396 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1401 if(rt>=0&&dops[i].rt1!=0)
1408 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1410 emit_readdword(&mem_rtab,temp);
1411 emit_shrimm(rs,12,temp2);
1412 emit_readdword_dualindexedx8(temp,temp2,temp2);
1413 emit_adds64(temp2,temp2,temp2);
1416 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1418 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1419 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1420 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1421 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1422 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1428 emit_jmp(0); // jump to reg restore
1431 emit_jmp(stubs[n].retaddr); // return address
1432 set_jump_target(handler_jump, out);
1437 if(type==LOADB_STUB||type==LOADBU_STUB)
1438 handler=jump_handler_read8;
1439 if(type==LOADH_STUB||type==LOADHU_STUB)
1440 handler=jump_handler_read16;
1441 if(type==LOADW_STUB)
1442 handler=jump_handler_read32;
1444 pass_args64(rs,temp2);
1445 int cc=get_reg(i_regmap,CCREG);
1447 emit_loadreg(CCREG,2);
1448 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1449 emit_far_call(handler);
1450 // (no cycle reload after read)
1451 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1452 loadstore_extend(type,0,rt);
1455 set_jump_target(restore_jump, out);
1456 restore_regs(reglist);
1457 emit_jmp(stubs[n].retaddr);
1460 static void inline_readstub(enum stub_type type, int i, u_int addr,
1461 const signed char regmap[], int target, int adj, u_int reglist)
1463 int rs=get_reg(regmap,target);
1464 int rt=get_reg(regmap,target);
1465 if(rs<0) rs=get_reg(regmap,-1);
1468 uintptr_t host_addr = 0;
1470 int cc=get_reg(regmap,CCREG);
1471 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1473 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1474 if (handler == NULL) {
1475 if(rt<0||dops[i].rt1==0)
1477 if (addr != host_addr) {
1478 if (host_addr >= 0x100000000ull)
1479 abort(); // ROREG not implemented
1480 emit_movimm_from(addr, rs, host_addr, rs);
1483 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1484 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1485 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1486 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1487 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1492 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1494 if(type==LOADB_STUB||type==LOADBU_STUB)
1495 handler=jump_handler_read8;
1496 if(type==LOADH_STUB||type==LOADHU_STUB)
1497 handler=jump_handler_read16;
1498 if(type==LOADW_STUB)
1499 handler=jump_handler_read32;
1502 // call a memhandler
1503 if(rt>=0&&dops[i].rt1!=0)
1507 emit_movimm(addr,0);
1511 emit_loadreg(CCREG,2);
1512 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1514 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1515 emit_adrp((void *)l1, 1);
1516 emit_addimm64(1, l1 & 0xfff, 1);
1519 emit_far_call(do_memhandler_pre);
1521 emit_far_call(handler);
1523 // (no cycle reload after read)
1524 if(rt>=0&&dops[i].rt1!=0)
1525 loadstore_extend(type, 0, rt);
1526 restore_regs(reglist);
1529 static void do_writestub(int n)
1531 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1532 set_jump_target(stubs[n].addr, out);
1533 enum stub_type type=stubs[n].type;
1536 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1537 u_int reglist=stubs[n].e;
1538 signed char *i_regmap=i_regs->regmap;
1540 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1541 rt=get_reg(i_regmap,r=FTEMP);
1543 rt=get_reg(i_regmap,r=dops[i].rs2);
1547 int rtmp,temp=-1,temp2,regs_saved=0;
1548 void *restore_jump = NULL, *handler_jump = NULL;
1549 int reglist2=reglist|(1<<rs)|(1<<rt);
1550 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1551 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1559 for(rtmp=0;rtmp<=3;rtmp++)
1560 if(rtmp!=rs&&rtmp!=rt)
1563 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1566 host_tempreg_acquire();
1569 emit_readdword(&mem_wtab,temp);
1570 emit_shrimm(rs,12,temp2);
1571 emit_readdword_dualindexedx8(temp,temp2,temp2);
1572 emit_adds64(temp2,temp2,temp2);
1576 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1577 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1578 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1583 emit_jmp(0); // jump to reg restore
1586 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1587 set_jump_target(handler_jump, out);
1589 // TODO FIXME: regalloc should prefer callee-saved regs
1594 case STOREB_STUB: handler=jump_handler_write8; break;
1595 case STOREH_STUB: handler=jump_handler_write16; break;
1596 case STOREW_STUB: handler=jump_handler_write32; break;
1602 emit_mov64(temp2,3);
1603 host_tempreg_release();
1605 int cc=get_reg(i_regmap,CCREG);
1607 emit_loadreg(CCREG,2);
1608 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1609 // returns new cycle_count
1610 emit_far_call(handler);
1611 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1613 emit_storereg(CCREG,2);
1615 set_jump_target(restore_jump, out);
1616 restore_regs(reglist);
1617 emit_jmp(stubs[n].retaddr);
1620 static void inline_writestub(enum stub_type type, int i, u_int addr,
1621 const signed char regmap[], int target, int adj, u_int reglist)
1623 int rs = get_reg(regmap,-1);
1624 int rt = get_reg(regmap,target);
1627 uintptr_t host_addr = 0;
1628 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1629 if (handler == NULL) {
1630 if (addr != host_addr) {
1631 if (host_addr >= 0x100000000ull)
1632 abort(); // ROREG not implemented
1633 emit_movimm_from(addr, rs, host_addr, rs);
1636 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1637 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1638 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1644 // call a memhandler
1646 emit_writeword(rs, &address); // some handlers still need it
1647 loadstore_extend(type, rt, 0);
1649 cc = cc_use = get_reg(regmap, CCREG);
1651 emit_loadreg(CCREG, (cc_use = 2));
1652 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
1654 emit_far_call(do_memhandler_pre);
1655 emit_far_call(handler);
1656 emit_far_call(do_memhandler_post);
1657 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
1659 emit_storereg(CCREG, cc_use);
1660 restore_regs(reglist);
1663 static int verify_code_arm64(const void *source, const void *copy, u_int size)
1665 int ret = memcmp(source, copy, size);
1666 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1670 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1671 static void do_dirty_stub_base(u_int vaddr, u_int source_len)
1673 assert(source_len <= MAXBLOCK*4);
1674 emit_loadlp_ofs(0, 0); // ldr x1, source
1675 emit_loadlp_ofs(0, 1); // ldr x2, copy
1676 emit_movz(source_len, 2);
1677 emit_far_call(verify_code_arm64);
1680 emit_movz(vaddr & 0xffff, 0);
1681 emit_movk_lsl16(vaddr >> 16, 0);
1682 emit_far_call(get_addr);
1684 set_jump_target(jmp, out);
1687 static void assert_dirty_stub(const u_int *ptr)
1689 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1690 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1691 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
1692 assert( ptr[8] == 0xd61f0000); // br x0
1695 static void set_loadlp(u_int *loadl, void *lit)
1697 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1698 assert((*loadl & ~0x1f) == 0x58000000);
1699 assert((ofs & 3) == 0);
1700 assert(ofs < 0x100000);
1701 *loadl |= (ofs >> 2) << 5;
1704 static void do_dirty_stub_emit_literals(u_int *loadlps)
1706 set_loadlp(&loadlps[0], out);
1707 output_w64((uintptr_t)source);
1708 set_loadlp(&loadlps[1], out);
1709 output_w64((uintptr_t)copy);
1712 static void *do_dirty_stub(int i, u_int source_len)
1714 assem_debug("do_dirty_stub %x\n",start+i*4);
1715 u_int *loadlps = (void *)out;
1716 do_dirty_stub_base(start + i*4, source_len);
1720 entry = instr_addr[i];
1721 emit_jmp(instr_addr[i]);
1722 do_dirty_stub_emit_literals(loadlps);
1726 static void do_dirty_stub_ds(u_int source_len)
1728 u_int *loadlps = (void *)out;
1729 do_dirty_stub_base(start + 1, source_len);
1730 void *lit_jumpover = out;
1731 emit_jmp(out + 8*2);
1732 do_dirty_stub_emit_literals(loadlps);
1733 set_jump_target(lit_jumpover, out);
1736 static uint64_t get_from_ldr_literal(const u_int *i)
1739 assert((i[0] & 0xff000000) == 0x58000000);
1742 return *(uint64_t *)(i + ofs);
1745 static uint64_t get_from_movz(const u_int *i)
1747 assert((i[0] & 0x7fe00000) == 0x52800000);
1748 return (i[0] >> 5) & 0xffff;
1751 // Find the "clean" entry point from a "dirty" entry point
1752 // by skipping past the call to verify_code
1753 static void *get_clean_addr(u_int *addr)
1755 assert_dirty_stub(addr);
1759 static int verify_dirty(const u_int *ptr)
1761 const void *source, *copy;
1763 assert_dirty_stub(ptr);
1764 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1765 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1766 len = get_from_movz(&ptr[2]); // movz w3, #source_len
1767 return !memcmp(source, copy, len);
1770 static int isclean(void *addr)
1772 const u_int *ptr = addr;
1773 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1774 assert_dirty_stub(ptr);
1780 // get source that block at addr was compiled from (host pointers)
1781 static void get_bounds(void *addr, u_char **start, u_char **end)
1783 const u_int *ptr = addr;
1784 assert_dirty_stub(ptr);
1785 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1786 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
1791 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1793 save_load_regs_all(1, reglist);
1794 cop2_do_stall_check(op, i, i_regs, 0);
1797 emit_far_call(pcnt_gte_start);
1799 // pointer to cop2 regs
1800 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1803 static void c2op_epilogue(u_int op,u_int reglist)
1807 emit_far_call(pcnt_gte_end);
1809 save_load_regs_all(0, reglist);
1812 static void c2op_assemble(int i, const struct regstat *i_regs)
1814 u_int c2op=source[i]&0x3f;
1815 u_int hr,reglist_full=0,reglist;
1816 int need_flags,need_ir;
1817 for(hr=0;hr<HOST_REGS;hr++) {
1818 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1820 reglist=reglist_full&CALLER_SAVE_REGS;
1822 if (gte_handlers[c2op]!=NULL) {
1823 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1824 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1825 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1826 source[i],gte_unneeded[i+1],need_flags,need_ir);
1827 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1829 //int shift = (source[i] >> 19) & 1;
1830 //int lm = (source[i] >> 10) & 1;
1834 c2op_prologue(c2op, i, i_regs, reglist);
1835 emit_movimm(source[i],1); // opcode
1836 emit_writeword(1,&psxRegs.code);
1837 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1840 c2op_epilogue(c2op,reglist);
1844 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1846 //value = value & 0x7ffff000;
1847 //if (value & 0x7f87e000) value |= 0x80000000;
1848 emit_andimm(sl, 0x7fffe000, temp);
1849 emit_testimm(temp, 0xff87ffff);
1850 emit_andimm(sl, 0x7ffff000, temp);
1851 host_tempreg_acquire();
1852 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1853 emit_cmovne_reg(HOST_TEMPREG, temp);
1854 host_tempreg_release();
1855 assert(0); // testing needed
1858 static void do_mfc2_31_one(u_int copr,signed char temp)
1860 emit_readshword(®_cop2d[copr],temp);
1861 emit_bicsar_imm(temp,31,temp);
1862 emit_cmpimm(temp,0xf80);
1863 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1864 emit_andimm(temp,0xf80,temp);
1867 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1870 host_tempreg_acquire();
1871 temp = HOST_TEMPREG;
1873 do_mfc2_31_one(9,temp);
1874 emit_shrimm(temp,7,tl);
1875 do_mfc2_31_one(10,temp);
1876 emit_orrshr_imm(temp,2,tl);
1877 do_mfc2_31_one(11,temp);
1878 emit_orrshl_imm(temp,3,tl);
1879 emit_writeword(tl,®_cop2d[29]);
1881 if (temp == HOST_TEMPREG)
1882 host_tempreg_release();
1885 static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1891 if(dops[i].rs1&&dops[i].rs2)
1893 switch(dops[i].opcode2)
1898 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1899 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1900 signed char hi=get_reg(i_regs->regmap,HIREG);
1901 signed char lo=get_reg(i_regs->regmap,LOREG);
1907 if(dops[i].opcode2==0x18) // MULT
1908 emit_smull(m1,m2,hi);
1910 emit_umull(m1,m2,hi);
1913 emit_shrimm64(hi,32,hi);
1919 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1920 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1921 signed char quotient=get_reg(i_regs->regmap,LOREG);
1922 signed char remainder=get_reg(i_regs->regmap,HIREG);
1923 assert(numerator>=0);
1924 assert(denominator>=0);
1925 assert(quotient>=0);
1926 assert(remainder>=0);
1928 if (dops[i].opcode2 == 0x1A) // DIV
1929 emit_sdiv(numerator,denominator,quotient);
1931 emit_udiv(numerator,denominator,quotient);
1932 emit_msub(quotient,denominator,numerator,remainder);
1934 // div 0 quotient (remainder is already correct)
1935 host_tempreg_acquire();
1936 if (dops[i].opcode2 == 0x1A) // DIV
1937 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1939 emit_movimm(~0,HOST_TEMPREG);
1940 emit_test(denominator,denominator);
1941 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1942 host_tempreg_release();
1951 signed char hr=get_reg(i_regs->regmap,HIREG);
1952 signed char lr=get_reg(i_regs->regmap,LOREG);
1953 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1956 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1957 assert(numerator >= 0);
1959 emit_mov(numerator,hr);
1961 if (dops[i].opcode2 == 0x1A) // DIV
1962 emit_sub_asrimm(0,numerator,31,lr);
1968 if (hr >= 0) emit_zeroreg(hr);
1969 if (lr >= 0) emit_movimm(~0,lr);
1974 // Multiply by zero is zero.
1975 if (hr >= 0) emit_zeroreg(hr);
1976 if (lr >= 0) emit_zeroreg(lr);
1980 #define multdiv_assemble multdiv_assemble_arm64
1982 static void do_jump_vaddr(u_int rs)
1986 emit_far_call(get_addr_ht);
1990 static void do_preload_rhash(u_int r) {
1991 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1992 // register. On ARM the hash can be done with a single instruction (below)
1995 static void do_preload_rhtbl(u_int ht) {
1996 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1999 static void do_rhash(u_int rs,u_int rh) {
2000 emit_andimm(rs, 0xf8, rh);
2003 static void do_miniht_load(int ht, u_int rh) {
2004 emit_add64(ht, rh, ht);
2005 emit_ldst(0, 0, rh, ht, 0);
2008 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2014 set_jump_target(jaddr, out);
2015 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2016 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2020 // parsed by set_jump_target?
2021 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2022 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2023 emit_movk(return_address&0xffff,rt);
2024 add_to_linker(out,return_address,1);
2026 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2027 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2030 static void clear_cache_arm64(char *start, char *end)
2032 // Don't rely on GCC's __clear_cache implementation, as it caches
2033 // icache/dcache cache line sizes, that can vary between cores on
2034 // big.LITTLE architectures.
2035 uint64_t addr, ctr_el0;
2036 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2037 size_t isize, dsize;
2039 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2040 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2041 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2043 // use the global minimum cache line size
2044 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2045 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2047 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2048 not required for instruction to data coherence. */
2049 if ((ctr_el0 & (1 << 28)) == 0x0) {
2050 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2051 for (; addr < (uint64_t)end; addr += dsize)
2052 // use "civac" instead of "cvau", as this is the suggested workaround for
2053 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2054 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2056 __asm__ volatile("dsb ish" : : : "memory");
2058 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2059 Unification is not required for instruction to data coherence. */
2060 if ((ctr_el0 & (1 << 29)) == 0x0) {
2061 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2062 for (; addr < (uint64_t)end; addr += isize)
2063 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2065 __asm__ volatile("dsb ish" : : : "memory");
2068 __asm__ volatile("isb" : : : "memory");
2071 // CPU-architecture-specific initialization
2072 static void arch_init(void)
2074 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2075 struct tramp_insns *ops = ndrc->tramp.ops;
2077 assert(!(diff & 3));
2078 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2079 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2080 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2081 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2083 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2086 // vim:shiftwidth=2:expandtab