drc: rework cycle counting
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define unused __attribute__((unused))
27
d1e4ebd9 28void do_memhandler_pre();
29void do_memhandler_post();
be516ebe 30
31/* Linker */
d1e4ebd9 32static void set_jump_target(void *addr, void *target)
be516ebe 33{
d1e4ebd9 34 u_int *ptr = addr;
35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
3968e69e 37 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
40 }
3968e69e 41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 45 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 46 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 47 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
49 }
3968e69e 50 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
54 }
55 else
3968e69e 56 abort(); // should not happen
be516ebe 57}
58
59// from a pointer to external jump stub (which was produced by emit_extjump2)
60// find where the jumping insn is
61static void *find_extjump_insn(void *stub)
62{
d1e4ebd9 63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
be516ebe 67}
68
69// find where external branch is liked to using addr of it's stub:
3968e69e 70// get address that the stub loads (dyna_linker arg1),
be516ebe 71// treat it as a pointer to branch insn,
72// return addr where that branch jumps to
73static void *get_pointer(void *stub)
74{
d1e4ebd9 75 int *i_ptr = find_extjump_insn(stub);
3968e69e 76 if ((*i_ptr&0xfc000000) == 0x14000000) // b
77 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
78 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
79 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
80 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 81 assert(0);
82 return NULL;
83}
84
be516ebe 85// Allocate a specific ARM register.
86static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
87{
88 int n;
89 int dirty=0;
90
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
93 {
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
96 cur->regmap[n]=-1;
97 }
98 }
99
100 cur->regmap[hr]=reg;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
104}
105
106// Alloc cycle count into dedicated register
107static void alloc_cc(struct regstat *cur,int i)
108{
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
110}
111
112/* Special alloc */
113
114
115/* Assembler */
116
117static unused const char *regname[32] = {
d1e4ebd9 118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
122};
123
124static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129};
130
131enum {
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
134};
135
136static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 139};
140
be516ebe 141static void output_w32(u_int word)
142{
143 *((u_int *)out) = word;
144 out += 4;
145}
146
d1e4ebd9 147static void output_w64(uint64_t dword)
148{
149 *((uint64_t *)out) = dword;
150 out+=8;
151}
152
153/*
687b4580 154static u_int rm_rd(u_int rm, u_int rd)
155{
156 assert(rm < 31);
157 assert(rd < 31);
158 return (rm << 16) | rd;
159}
d1e4ebd9 160*/
687b4580 161
3968e69e 162static u_int rn_rd(u_int rn, u_int rd)
163{
164 assert(rn < 31);
165 assert(rd < 31);
166 return (rn << 5) | rd;
167}
168
be516ebe 169static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
170{
d1e4ebd9 171 assert(rm < 32);
172 assert(rn < 32);
173 assert(rd < 32);
be516ebe 174 return (rm << 16) | (rn << 5) | rd;
175}
176
3968e69e 177static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
178{
179 assert(ra < 32);
180 return rm_rn_rd(rm, rn, rd) | (ra << 10);
181}
182
d1e4ebd9 183static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
184{
185 assert(imm7 < 0x80);
186 assert(rt2 < 31);
187 assert(rn < 32);
188 assert(rt < 31);
189 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
190}
191
687b4580 192static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
193{
194 assert(imm6 <= 63);
195 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
196}
197
be516ebe 198static u_int imm16_rd(u_int imm16, u_int rd)
199{
200 assert(imm16 < 0x10000);
201 assert(rd < 31);
202 return (imm16 << 5) | rd;
203}
204
687b4580 205static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
206{
207 assert(imm12 < 0x1000);
d1e4ebd9 208 assert(rn < 32);
209 assert(rd < 32);
210 return (imm12 << 10) | (rn << 5) | rd;
211}
212
213static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
214{
215 assert(imm9 < 0x200);
687b4580 216 assert(rn < 31);
217 assert(rd < 31);
d1e4ebd9 218 return (imm9 << 12) | (rn << 5) | rd;
687b4580 219}
220
d1e4ebd9 221static u_int imm19_rt(u_int imm19, u_int rt)
222{
223 assert(imm19 < 0x80000);
224 assert(rt < 31);
225 return (imm19 << 5) | rt;
226}
227
228static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
229{
230 assert(n < 2);
231 assert(immr < 0x40);
232 assert(imms < 0x40);
233 assert(rn < 32);
234 assert(rd < 32);
235 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
236}
237
238static u_int genjmp(const u_char *addr)
be516ebe 239{
240 intptr_t offset = addr - out;
d1e4ebd9 241 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 242 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 243 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
244 abort();
be516ebe 245 return 0;
246 }
d1e4ebd9 247 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 248}
249
d1e4ebd9 250static u_int genjmpcc(const u_char *addr)
be516ebe 251{
252 intptr_t offset = addr - out;
d1e4ebd9 253 if ((uintptr_t)addr < 3) return 0;
be516ebe 254 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 255 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
256 abort();
257 return 0;
258 }
259 return ((u_int)offset >> 2) & 0x7ffff;
260}
261
262static uint32_t is_mask(u_int value)
263{
264 return value && ((value + 1) & value) == 0;
265}
266
267// This function returns true if the argument contains a
268// non-empty sequence of ones (possibly rotated) with the remainder zero.
269static uint32_t is_rotated_mask(u_int value)
270{
3968e69e 271 if (value == 0 || value == ~0)
be516ebe 272 return 0;
d1e4ebd9 273 if (is_mask((value - 1) | value))
274 return 1;
275 return is_mask((~value - 1) | ~value);
276}
277
278static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
279{
280 int lzeros, tzeros, ones;
281 assert(value != 0);
282 if (is_mask((value - 1) | value)) {
283 lzeros = __builtin_clz(value);
284 tzeros = __builtin_ctz(value);
285 ones = 32 - lzeros - tzeros;
286 *immr = (32 - tzeros) & 31;
287 *imms = ones - 1;
288 return;
be516ebe 289 }
d1e4ebd9 290 value = ~value;
291 if (is_mask((value - 1) | value)) {
292 lzeros = __builtin_clz(value);
293 tzeros = __builtin_ctz(value);
294 ones = 32 - lzeros - tzeros;
3968e69e 295 *immr = lzeros;
d1e4ebd9 296 *imms = 31 - ones;
297 return;
298 }
3968e69e 299 abort();
be516ebe 300}
301
302static void emit_mov(u_int rs, u_int rt)
303{
687b4580 304 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 305 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
306}
307
308static void emit_mov64(u_int rs, u_int rt)
309{
310 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
311 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 312}
313
687b4580 314static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 315{
d1e4ebd9 316 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
317 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 318}
319
d1e4ebd9 320static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 321{
d1e4ebd9 322 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
323 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 324}
325
d1e4ebd9 326static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 327{
3968e69e 328 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 329 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
330}
39b71d9a 331#define emit_adds_ptr emit_adds64
d1e4ebd9 332
333static void emit_neg(u_int rs, u_int rt)
334{
335 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 337}
338
687b4580 339static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 340{
d1e4ebd9 341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 343}
344
3968e69e 345static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
346{
347 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
348 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
349}
350
d1e4ebd9 351static void emit_movz(u_int imm, u_int rt)
be516ebe 352{
d1e4ebd9 353 assem_debug("movz %s,#%#x\n", regname[rt], imm);
354 output_w32(0x52800000 | imm16_rd(imm, rt));
355}
356
357static void emit_movz_lsl16(u_int imm, u_int rt)
358{
359 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
360 output_w32(0x52a00000 | imm16_rd(imm, rt));
361}
362
363static void emit_movn(u_int imm, u_int rt)
364{
365 assem_debug("movn %s,#%#x\n", regname[rt], imm);
366 output_w32(0x12800000 | imm16_rd(imm, rt));
367}
368
369static void emit_movn_lsl16(u_int imm,u_int rt)
370{
371 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x12a00000 | imm16_rd(imm, rt));
373}
374
375static void emit_movk(u_int imm,u_int rt)
376{
377 assem_debug("movk %s,#%#x\n", regname[rt], imm);
378 output_w32(0x72800000 | imm16_rd(imm, rt));
379}
380
381static void emit_movk_lsl16(u_int imm,u_int rt)
382{
383 assert(imm<65536);
3968e69e 384 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 385 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 386}
387
388static void emit_zeroreg(u_int rt)
389{
d1e4ebd9 390 emit_movz(0, rt);
be516ebe 391}
392
be516ebe 393static void emit_movimm(u_int imm, u_int rt)
394{
d1e4ebd9 395 if (imm < 65536)
396 emit_movz(imm, rt);
397 else if ((~imm) < 65536)
398 emit_movn(~imm, rt);
399 else if ((imm&0xffff) == 0)
400 emit_movz_lsl16(imm >> 16, rt);
401 else if (((~imm)&0xffff) == 0)
402 emit_movn_lsl16(~imm >> 16, rt);
403 else if (is_rotated_mask(imm)) {
404 u_int immr, imms;
405 gen_logical_imm(imm, &immr, &imms);
406 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
407 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
408 }
be516ebe 409 else {
d1e4ebd9 410 emit_movz(imm & 0xffff, rt);
411 emit_movk_lsl16(imm >> 16, rt);
be516ebe 412 }
413}
414
687b4580 415static void emit_readword(void *addr, u_int rt)
416{
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 3) && offset <= 16380) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
420 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
421 }
422 else
3968e69e 423 abort();
687b4580 424}
425
d1e4ebd9 426static void emit_readdword(void *addr, u_int rt)
427{
428 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
429 if (!(offset & 7) && offset <= 32760) {
430 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
431 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
432 }
3968e69e 433 else
434 abort();
435}
39b71d9a 436#define emit_readptr emit_readdword
3968e69e 437
438static void emit_readshword(void *addr, u_int rt)
439{
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
444 }
d1e4ebd9 445 else
446 assert(0);
447}
448
be516ebe 449static void emit_loadreg(u_int r, u_int hr)
450{
d1e4ebd9 451 int is64 = 0;
be516ebe 452 assert(r < 64);
453 if (r == 0)
454 emit_zeroreg(hr);
455 else {
7c3a5182 456 void *addr = &psxRegs.GPR.r[r];
be516ebe 457 switch (r) {
7c3a5182 458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
be516ebe 460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
d1e4ebd9 462 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 463 case ROREG: addr = &ram_offset; is64 = 1; break;
7c3a5182 464 default: assert(r < 34); break;
be516ebe 465 }
d1e4ebd9 466 if (is64)
467 emit_readdword(addr, hr);
468 else
469 emit_readword(addr, hr);
be516ebe 470 }
471}
472
687b4580 473static void emit_writeword(u_int rt, void *addr)
474{
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 3) && offset <= 16380) {
477 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
478 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
479 }
480 else
481 assert(0);
482}
483
d1e4ebd9 484static void emit_writedword(u_int rt, void *addr)
485{
486 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
487 if (!(offset & 7) && offset <= 32760) {
488 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 489 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 490 }
491 else
3968e69e 492 abort();
d1e4ebd9 493}
494
687b4580 495static void emit_storereg(u_int r, u_int hr)
be516ebe 496{
497 assert(r < 64);
7c3a5182 498 void *addr = &psxRegs.GPR.r[r];
be516ebe 499 switch (r) {
7c3a5182 500 //case HIREG: addr = &hi; break;
501 //case LOREG: addr = &lo; break;
be516ebe 502 case CCREG: addr = &cycle_count; break;
7c3a5182 503 default: assert(r < 34); break;
be516ebe 504 }
687b4580 505 emit_writeword(hr, addr);
be516ebe 506}
507
508static void emit_test(u_int rs, u_int rt)
509{
d1e4ebd9 510 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
511 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 512}
513
d1e4ebd9 514static void emit_testimm(u_int rs, u_int imm)
be516ebe 515{
d1e4ebd9 516 u_int immr, imms;
687b4580 517 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 518 assert(is_rotated_mask(imm)); // good enough for PCSX
519 gen_logical_imm(imm, &immr, &imms);
3968e69e 520 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 521}
522
523static void emit_not(u_int rs,u_int rt)
524{
525 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 526 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 527}
528
be516ebe 529static void emit_and(u_int rs1,u_int rs2,u_int rt)
530{
531 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 532 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 533}
534
535static void emit_or(u_int rs1,u_int rs2,u_int rt)
536{
537 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 538 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 539}
540
3968e69e 541static void emit_bic(u_int rs1,u_int rs2,u_int rt)
542{
543 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
544 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
545}
546
be516ebe 547static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
548{
be516ebe 549 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 550 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 551}
552
553static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
554{
be516ebe 555 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 556 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 557}
558
3968e69e 559static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
560{
561 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
562 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
563}
564
be516ebe 565static void emit_xor(u_int rs1,u_int rs2,u_int rt)
566{
567 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 568 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 569}
570
3968e69e 571static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
572{
573 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
574 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
575}
576
d1e4ebd9 577static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 578{
d1e4ebd9 579 unused const char *st = s ? "s" : "";
580 s = s ? 0x20000000 : 0;
581 is64 = is64 ? 0x80000000 : 0;
687b4580 582 if (imm < 4096) {
d1e4ebd9 583 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
584 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 585 }
586 else if (-imm < 4096) {
3968e69e 587 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 588 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
589 }
590 else if (imm < 16777216) {
591 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
592 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
593 if ((imm & 0xfff) || s) {
594 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 595 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 596 }
597 }
598 else if (-imm < 16777216) {
599 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
600 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
603 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
604 }
687b4580 605 }
606 else
3968e69e 607 abort();
be516ebe 608}
609
d1e4ebd9 610static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
611{
612 emit_addimm_s(0, 0, rs, imm, rt);
613}
614
615static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
616{
617 emit_addimm_s(0, 1, rs, imm, rt);
618}
619
be516ebe 620static void emit_addimm_and_set_flags(int imm, u_int rt)
621{
d1e4ebd9 622 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 623}
624
d1e4ebd9 625static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 626{
d1e4ebd9 627 const char *names[] = { "and", "orr", "eor", "ands" };
628 const char *name = names[op];
629 u_int immr, imms;
630 op = op << 29;
631 if (is_rotated_mask(imm)) {
632 gen_logical_imm(imm, &immr, &imms);
633 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
634 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
635 }
636 else {
637 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
638 host_tempreg_acquire();
639 emit_movimm(imm, HOST_TEMPREG);
640 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
641 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
642 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
643 host_tempreg_release();
644 }
645 (void)name;
be516ebe 646}
647
d1e4ebd9 648static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 649{
d1e4ebd9 650 if (imm == 0)
651 emit_zeroreg(rt);
652 else
653 emit_logicop_imm(0, rs, imm, rt);
be516ebe 654}
655
d1e4ebd9 656static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 657{
d1e4ebd9 658 if (imm == 0) {
659 if (rs != rt)
660 emit_mov(rs, rt);
661 }
662 else
663 emit_logicop_imm(1, rs, imm, rt);
be516ebe 664}
665
d1e4ebd9 666static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 667{
d1e4ebd9 668 if (imm == 0) {
669 if (rs != rt)
670 emit_mov(rs, rt);
671 }
672 else
673 emit_logicop_imm(2, rs, imm, rt);
be516ebe 674}
675
d1e4ebd9 676static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 677{
d1e4ebd9 678 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
679 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 680}
681
d1e4ebd9 682static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 683{
d1e4ebd9 684 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
685 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 686}
687
688static void emit_shlimm(u_int rs,u_int imm,u_int rt)
689{
be516ebe 690 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 691 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 692}
693
3968e69e 694static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 695{
3968e69e 696 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 698}
699
3968e69e 700static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 701{
be516ebe 702 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 703 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 704}
705
706static void emit_sarimm(u_int rs,u_int imm,u_int rt)
707{
be516ebe 708 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 709 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 710}
711
712static void emit_rorimm(u_int rs,u_int imm,u_int rt)
713{
3968e69e 714 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 715 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 716}
717
718static void emit_signextend16(u_int rs, u_int rt)
719{
720 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 722}
723
d1e4ebd9 724static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 725{
3968e69e 726 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 727 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 728}
729
d1e4ebd9 730static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 731{
d1e4ebd9 732 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
733 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 734}
735
d1e4ebd9 736static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 737{
d1e4ebd9 738 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 740}
741
d1e4ebd9 742static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 743{
d1e4ebd9 744 if (imm < 4096) {
745 assem_debug("cmp %s,%#x\n", regname[rs], imm);
746 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
747 }
748 else if (-imm < 4096) {
749 assem_debug("cmn %s,%#x\n", regname[rs], imm);
750 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
751 }
752 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 753 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 754 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
755 }
756 else {
757 host_tempreg_acquire();
758 emit_movimm(imm, HOST_TEMPREG);
759 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
760 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
761 host_tempreg_release();
762 }
be516ebe 763}
764
d1e4ebd9 765static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 766{
d1e4ebd9 767 assert(imm == 0 || imm == 1);
768 assert(cond0 < 0x10);
769 assert(cond1 < 0x10);
770 if (imm) {
771 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
772 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
773 } else {
774 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
775 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
776 }
be516ebe 777}
778
d1e4ebd9 779static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 780{
d1e4ebd9 781 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 782}
783
d1e4ebd9 784static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 785{
d1e4ebd9 786 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 787}
788
789static void emit_cmovb_imm(int imm,u_int rt)
790{
d1e4ebd9 791 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 792}
793
3968e69e 794static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 795{
3968e69e 796 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
797 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 798}
799
800static void emit_cmovne_reg(u_int rs,u_int rt)
801{
d1e4ebd9 802 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
803 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 804}
805
806static void emit_cmovl_reg(u_int rs,u_int rt)
807{
d1e4ebd9 808 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 810}
811
e3c6bdb5 812static void emit_cmovb_reg(u_int rs,u_int rt)
813{
814 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
816}
817
be516ebe 818static void emit_cmovs_reg(u_int rs,u_int rt)
819{
d1e4ebd9 820 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 822}
823
3968e69e 824static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
825{
826 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
827 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
828}
829
be516ebe 830static void emit_slti32(u_int rs,int imm,u_int rt)
831{
832 if(rs!=rt) emit_zeroreg(rt);
833 emit_cmpimm(rs,imm);
834 if(rs==rt) emit_movimm(0,rt);
835 emit_cmovl_imm(1,rt);
836}
837
838static void emit_sltiu32(u_int rs,int imm,u_int rt)
839{
840 if(rs!=rt) emit_zeroreg(rt);
841 emit_cmpimm(rs,imm);
842 if(rs==rt) emit_movimm(0,rt);
843 emit_cmovb_imm(1,rt);
844}
845
846static void emit_cmp(u_int rs,u_int rt)
847{
848 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 849 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 850}
851
852static void emit_set_gz32(u_int rs, u_int rt)
853{
854 //assem_debug("set_gz32\n");
855 emit_cmpimm(rs,1);
856 emit_movimm(1,rt);
857 emit_cmovl_imm(0,rt);
858}
859
860static void emit_set_nz32(u_int rs, u_int rt)
861{
862 //assem_debug("set_nz32\n");
d1e4ebd9 863 if(rs!=rt) emit_mov(rs,rt);
864 emit_test(rs,rs);
865 emit_cmovne_imm(1,rt);
be516ebe 866}
867
868static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
869{
870 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
871 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
872 emit_cmp(rs1,rs2);
873 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
874 emit_cmovl_imm(1,rt);
875}
876
877static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
878{
879 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
880 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
881 emit_cmp(rs1,rs2);
882 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
883 emit_cmovb_imm(1,rt);
884}
885
2a014d73 886static int can_jump_or_call(const void *a)
887{
888 intptr_t diff = (u_char *)a - out;
889 return (-134217728 <= diff && diff <= 134217727);
890}
891
d1e4ebd9 892static void emit_call(const void *a)
be516ebe 893{
d1e4ebd9 894 intptr_t diff = (u_char *)a - out;
895 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 896 assert(!(diff & 3));
897 if (-134217728 <= diff && diff <= 134217727)
898 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
899 else
3968e69e 900 abort();
be516ebe 901}
902
d1e4ebd9 903static void emit_jmp(const void *a)
be516ebe 904{
d1e4ebd9 905 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
906 u_int offset = genjmp(a);
907 output_w32(0x14000000 | offset);
be516ebe 908}
909
d1e4ebd9 910static void emit_jne(const void *a)
be516ebe 911{
d1e4ebd9 912 assem_debug("bne %p\n", a);
913 u_int offset = genjmpcc(a);
914 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 915}
916
7c3a5182 917static void emit_jeq(const void *a)
be516ebe 918{
d1e4ebd9 919 assem_debug("beq %p\n", a);
920 u_int offset = genjmpcc(a);
921 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 922}
923
7c3a5182 924static void emit_js(const void *a)
be516ebe 925{
d1e4ebd9 926 assem_debug("bmi %p\n", a);
927 u_int offset = genjmpcc(a);
928 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 929}
930
7c3a5182 931static void emit_jns(const void *a)
be516ebe 932{
d1e4ebd9 933 assem_debug("bpl %p\n", a);
934 u_int offset = genjmpcc(a);
935 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 936}
937
7c3a5182 938static void emit_jl(const void *a)
be516ebe 939{
d1e4ebd9 940 assem_debug("blt %p\n", a);
941 u_int offset = genjmpcc(a);
942 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 943}
944
7c3a5182 945static void emit_jge(const void *a)
be516ebe 946{
d1e4ebd9 947 assem_debug("bge %p\n", a);
948 u_int offset = genjmpcc(a);
949 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 950}
951
7c3a5182 952static void emit_jno(const void *a)
be516ebe 953{
d1e4ebd9 954 assem_debug("bvc %p\n", a);
955 u_int offset = genjmpcc(a);
956 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 957}
958
7c3a5182 959static void emit_jc(const void *a)
be516ebe 960{
d1e4ebd9 961 assem_debug("bcs %p\n", a);
962 u_int offset = genjmpcc(a);
963 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 964}
965
3968e69e 966static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 967{
3968e69e 968 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 969 u_int offset = genjmpcc(a);
3968e69e 970 is64 = is64 ? 0x80000000 : 0;
971 isnz = isnz ? 0x01000000 : 0;
972 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
973}
974
975static void emit_cbz(const void *a, u_int r)
976{
977 emit_cb(0, 0, a, r);
be516ebe 978}
979
980static void emit_jmpreg(u_int r)
981{
3968e69e 982 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 983 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 984}
985
986static void emit_retreg(u_int r)
987{
d1e4ebd9 988 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 989 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
990}
991
992static void emit_ret(void)
993{
994 emit_retreg(LR);
995}
996
d1e4ebd9 997static void emit_adr(void *addr, u_int rt)
998{
999 intptr_t offset = (u_char *)addr - out;
1000 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1001 assert(rt < 31);
d1e4ebd9 1002 assem_debug("adr x%d,#%#lx\n", rt, offset);
1003 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1004}
1005
3968e69e 1006static void emit_adrp(void *addr, u_int rt)
1007{
1008 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1009 assert(-4294967296l <= offset && offset < 4294967296l);
1010 assert(rt < 31);
1011 offset >>= 12;
1012 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1013 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1014}
1015
be516ebe 1016static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1017{
d1e4ebd9 1018 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1019 assert(-256 <= offset && offset < 256);
1020 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1021}
1022
1023static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1024{
1025 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1026 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1027}
1028
1029static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1030{
1031 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1033}
1034
1035static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1036{
1037 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1039}
1040
1041static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1042{
1043 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1045}
39b71d9a 1046#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1047
1048static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1049{
1050 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1051 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1052}
1053
1054static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1055{
1056 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1058}
1059
1060static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1061{
1062 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1064}
1065
1066static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1067{
1068 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1070}
1071
1072static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1073{
1074 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1075 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1076}
1077
be516ebe 1078static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1079{
d1e4ebd9 1080 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1081 assert(-256 <= offset && offset < 256);
1082 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1083}
1084
1085static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1086{
d1e4ebd9 1087 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1088 assert(-256 <= offset && offset < 256);
1089 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1090}
1091
1092static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1093{
d1e4ebd9 1094 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1095 assert(-256 <= offset && offset < 256);
1096 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1097}
1098
1099static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1100{
d1e4ebd9 1101 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1102 assert(-256 <= offset && offset < 256);
1103 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1104}
1105
be516ebe 1106static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1107{
3968e69e 1108 if (!(offset & 3) && (u_int)offset <= 16380) {
1109 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1110 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1111 }
1112 else if (-256 <= offset && offset < 256) {
1113 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1114 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1115 }
687b4580 1116 else
1117 assert(0);
be516ebe 1118}
1119
1120static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1121{
3968e69e 1122 if (!(offset & 1) && (u_int)offset <= 8190) {
1123 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1124 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1125 }
1126 else if (-256 <= offset && offset < 256) {
1127 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1128 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1129 }
687b4580 1130 else
1131 assert(0);
be516ebe 1132}
1133
1134static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1135{
3968e69e 1136 if ((u_int)offset < 4096) {
1137 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1138 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1139 }
1140 else if (-256 <= offset && offset < 256) {
1141 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1142 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1143 }
687b4580 1144 else
1145 assert(0);
be516ebe 1146}
1147
3968e69e 1148static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1149{
3968e69e 1150 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1151 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1152}
1153
3968e69e 1154static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1155{
3968e69e 1156 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1157 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1158}
1159
1160static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1161{
1162 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1163 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1164}
1165
1166static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1167{
1168 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1169 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1170}
1171
3968e69e 1172static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1173{
1174 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1176}
1177
1178static void emit_clz(u_int rs, u_int rt)
be516ebe 1179{
1180 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1181 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1182}
1183
be516ebe 1184// special case for checking invalid_code
d1e4ebd9 1185static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1186{
d1e4ebd9 1187 host_tempreg_acquire();
1188 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1189 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1190 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1191 emit_cmpimm(HOST_TEMPREG, imm);
1192 host_tempreg_release();
be516ebe 1193}
1194
3968e69e 1195// special for loadlr_assemble, rs2 is destroyed
1196static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1197{
3968e69e 1198 emit_shl(rs2, shift, rs2);
1199 emit_bic(rs1, rs2, rt);
be516ebe 1200}
1201
3968e69e 1202static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1203{
3968e69e 1204 emit_shr(rs2, shift, rs2);
1205 emit_bic(rs1, rs2, rt);
be516ebe 1206}
1207
d1e4ebd9 1208static void emit_loadlp_ofs(u_int ofs, u_int rt)
1209{
1210 output_w32(0x58000000 | imm19_rt(ofs, rt));
1211}
1212
687b4580 1213static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1214{
687b4580 1215 u_int op = 0xb9000000;
d1e4ebd9 1216 unused const char *ldst = is_st ? "st" : "ld";
1217 unused char rp = is64 ? 'x' : 'w';
687b4580 1218 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1219 is64 = is64 ? 1 : 0;
1220 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1221 ofs = (ofs >> (2+is64));
687b4580 1222 if (!is_st) op |= 0x00400000;
1223 if (is64) op |= 0x40000000;
d1e4ebd9 1224 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1225}
1226
687b4580 1227static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1228{
687b4580 1229 u_int op = 0x29000000;
d1e4ebd9 1230 unused const char *ldst = is_st ? "st" : "ld";
1231 unused char rp = is64 ? 'x' : 'w';
687b4580 1232 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1233 is64 = is64 ? 1 : 0;
1234 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1235 ofs = (ofs >> (2+is64));
1236 assert(-64 <= ofs && ofs <= 63);
1237 ofs &= 0x7f;
1238 if (!is_st) op |= 0x00400000;
1239 if (is64) op |= 0x80000000;
d1e4ebd9 1240 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1241}
1242
1243static void save_load_regs_all(int is_store, u_int reglist)
1244{
1245 int ofs = 0, c = 0;
1246 u_int r, pair[2];
1247 for (r = 0; reglist; r++, reglist >>= 1) {
1248 if (reglist & 1)
1249 pair[c++] = r;
1250 if (c == 2) {
1251 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1252 ofs += 8 * 2;
1253 c = 0;
1254 }
1255 }
1256 if (c) {
1257 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1258 ofs += 8;
1259 }
1260 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1261}
1262
1263// Save registers before function call
1264static void save_regs(u_int reglist)
1265{
1266 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1267 save_load_regs_all(1, reglist);
be516ebe 1268}
1269
1270// Restore registers after function call
1271static void restore_regs(u_int reglist)
1272{
1273 reglist &= CALLER_SAVE_REGS;
687b4580 1274 save_load_regs_all(0, reglist);
be516ebe 1275}
1276
1277/* Stubs/epilogue */
1278
1279static void literal_pool(int n)
1280{
1281 (void)literals;
1282}
1283
1284static void literal_pool_jumpover(int n)
1285{
1286}
1287
d1e4ebd9 1288// parsed by get_pointer, find_extjump_insn
1289static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1290{
d1e4ebd9 1291 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1292
d1e4ebd9 1293 emit_movz(target & 0xffff, 0);
1294 emit_movk_lsl16(target >> 16, 0);
1295
1296 // addr is in the current recompiled block (max 256k)
1297 // offset shouldn't exceed +/-1MB
1298 emit_adr(addr, 1);
2a014d73 1299 emit_far_jump(linker);
be516ebe 1300}
1301
d1e4ebd9 1302static void check_extjump2(void *src)
be516ebe 1303{
d1e4ebd9 1304 u_int *ptr = src;
1305 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1306 (void)ptr;
be516ebe 1307}
1308
1309// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1310static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1311{
d1e4ebd9 1312 int diff = rt_val - rs_val;
3968e69e 1313 if ((-4096 < diff && diff < 4096)
1314 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1315 emit_addimm(rs, diff, rt);
3968e69e 1316 else if (rt_val == ~rs_val)
1317 emit_not(rs, rt);
d1e4ebd9 1318 else if (is_rotated_mask(rs_val ^ rt_val))
1319 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1320 else
d1e4ebd9 1321 emit_movimm(rt_val, rt);
be516ebe 1322}
1323
d1e4ebd9 1324// return 1 if the above function can do it's job cheaply
687b4580 1325static int is_similar_value(u_int v1, u_int v2)
be516ebe 1326{
687b4580 1327 int diff = v1 - v2;
3968e69e 1328 return (-4096 < diff && diff < 4096)
1329 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1330 || v1 == ~v2
d1e4ebd9 1331 || is_rotated_mask(v1 ^ v2);
1332}
1333
37387d8b 1334static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1335{
1336 if (rt_val < 0x100000000ull) {
1337 emit_movimm_from(rs_val, rs, rt_val, rt);
1338 return;
1339 }
1340 // just move the whole thing. At least on Linux all addresses
1341 // seem to be 48bit, so 3 insns - not great not terrible
1342 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1343 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1344 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1345 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1346 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1347 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1348 if (rt_val >> 48) {
1349 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1350 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1351 }
1352}
1353
1354// trashes x2
d1e4ebd9 1355static void pass_args64(u_int a0, u_int a1)
1356{
1357 if(a0==1&&a1==0) {
1358 // must swap
1359 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1360 }
1361 else if(a0!=0&&a1==0) {
1362 emit_mov64(a1,1);
1363 if (a0>=0) emit_mov64(a0,0);
1364 }
1365 else {
1366 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1367 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1368 }
be516ebe 1369}
1370
d1e4ebd9 1371static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1372{
1373 switch(type) {
1374 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1375 case LOADBU_STUB:
1376 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1377 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1378 case LOADHU_STUB:
1379 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1380 case LOADW_STUB:
1381 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1382 default: assert(0);
d1e4ebd9 1383 }
1384}
1385
1386#include "pcsxmem.h"
be516ebe 1387//#include "pcsxmem_inline.c"
1388
1389static void do_readstub(int n)
1390{
1391 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1392 set_jump_target(stubs[n].addr, out);
1393 enum stub_type type = stubs[n].type;
1394 int i = stubs[n].a;
1395 int rs = stubs[n].b;
1396 const struct regstat *i_regs = (void *)stubs[n].c;
1397 u_int reglist = stubs[n].e;
1398 const signed char *i_regmap = i_regs->regmap;
1399 int rt;
cf95b4f0 1400 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1401 rt=get_reg(i_regmap,FTEMP);
1402 }else{
cf95b4f0 1403 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1404 }
1405 assert(rs>=0);
1406 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1407 void *restore_jump = NULL, *handler_jump = NULL;
1408 reglist|=(1<<rs);
1409 for (r = 0; r < HOST_CCREG; r++) {
1410 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1411 temp = r;
1412 break;
1413 }
1414 }
cf95b4f0 1415 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1416 reglist&=~(1<<rt);
1417 if(temp==-1) {
1418 save_regs(reglist);
1419 regs_saved=1;
1420 temp=(rs==0)?2:0;
1421 }
1422 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1423 temp2=1;
1424 emit_readdword(&mem_rtab,temp);
1425 emit_shrimm(rs,12,temp2);
1426 emit_readdword_dualindexedx8(temp,temp2,temp2);
1427 emit_adds64(temp2,temp2,temp2);
1428 handler_jump=out;
1429 emit_jc(0);
cf95b4f0 1430 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1431 switch(type) {
1432 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1433 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1434 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1435 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1436 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1437 default: assert(0);
d1e4ebd9 1438 }
1439 }
1440 if(regs_saved) {
1441 restore_jump=out;
1442 emit_jmp(0); // jump to reg restore
1443 }
1444 else
1445 emit_jmp(stubs[n].retaddr); // return address
1446 set_jump_target(handler_jump, out);
1447
1448 if(!regs_saved)
1449 save_regs(reglist);
1450 void *handler=NULL;
1451 if(type==LOADB_STUB||type==LOADBU_STUB)
1452 handler=jump_handler_read8;
1453 if(type==LOADH_STUB||type==LOADHU_STUB)
1454 handler=jump_handler_read16;
1455 if(type==LOADW_STUB)
1456 handler=jump_handler_read32;
1457 assert(handler);
1458 pass_args64(rs,temp2);
1459 int cc=get_reg(i_regmap,CCREG);
1460 if(cc<0)
1461 emit_loadreg(CCREG,2);
2330734f 1462 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1463 emit_far_call(handler);
d1e4ebd9 1464 // (no cycle reload after read)
cf95b4f0 1465 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1466 loadstore_extend(type,0,rt);
1467 }
1468 if(restore_jump)
1469 set_jump_target(restore_jump, out);
1470 restore_regs(reglist);
1471 emit_jmp(stubs[n].retaddr);
be516ebe 1472}
1473
81dbbf4c 1474static void inline_readstub(enum stub_type type, int i, u_int addr,
1475 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1476{
d1e4ebd9 1477 int rs=get_reg(regmap,target);
1478 int rt=get_reg(regmap,target);
1479 if(rs<0) rs=get_reg(regmap,-1);
1480 assert(rs>=0);
1481 u_int is_dynamic=0;
1482 uintptr_t host_addr = 0;
1483 void *handler;
1484 int cc=get_reg(regmap,CCREG);
2330734f 1485 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1486 // return;
1487 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1488 if (handler == NULL) {
cf95b4f0 1489 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1490 return;
37387d8b 1491 if (addr != host_addr)
1492 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1493 switch(type) {
1494 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1495 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1496 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1497 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1498 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1499 default: assert(0);
1500 }
1501 return;
1502 }
37387d8b 1503 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1504 if (is_dynamic) {
d1e4ebd9 1505 if(type==LOADB_STUB||type==LOADBU_STUB)
1506 handler=jump_handler_read8;
1507 if(type==LOADH_STUB||type==LOADHU_STUB)
1508 handler=jump_handler_read16;
1509 if(type==LOADW_STUB)
1510 handler=jump_handler_read32;
1511 }
1512
1513 // call a memhandler
cf95b4f0 1514 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1515 reglist&=~(1<<rt);
1516 save_regs(reglist);
1517 if(target==0)
1518 emit_movimm(addr,0);
1519 else if(rs!=0)
1520 emit_mov(rs,0);
1521 if(cc<0)
1522 emit_loadreg(CCREG,2);
2330734f 1523 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1524 if(is_dynamic) {
1525 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1526 emit_adrp((void *)l1, 1);
1527 emit_addimm64(1, l1 & 0xfff, 1);
1528 }
d1e4ebd9 1529 else
2a014d73 1530 emit_far_call(do_memhandler_pre);
d1e4ebd9 1531
2a014d73 1532 emit_far_call(handler);
d1e4ebd9 1533
1534 // (no cycle reload after read)
cf95b4f0 1535 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1536 loadstore_extend(type, 0, rt);
1537 restore_regs(reglist);
be516ebe 1538}
1539
1540static void do_writestub(int n)
1541{
1542 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1543 set_jump_target(stubs[n].addr, out);
1544 enum stub_type type=stubs[n].type;
1545 int i=stubs[n].a;
1546 int rs=stubs[n].b;
1547 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1548 u_int reglist=stubs[n].e;
1549 signed char *i_regmap=i_regs->regmap;
1550 int rt,r;
cf95b4f0 1551 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1552 rt=get_reg(i_regmap,r=FTEMP);
1553 }else{
cf95b4f0 1554 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1555 }
1556 assert(rs>=0);
1557 assert(rt>=0);
1558 int rtmp,temp=-1,temp2,regs_saved=0;
1559 void *restore_jump = NULL, *handler_jump = NULL;
1560 int reglist2=reglist|(1<<rs)|(1<<rt);
1561 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1562 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1563 temp = rtmp;
1564 break;
1565 }
1566 }
1567 if(temp==-1) {
1568 save_regs(reglist);
1569 regs_saved=1;
1570 for(rtmp=0;rtmp<=3;rtmp++)
1571 if(rtmp!=rs&&rtmp!=rt)
1572 {temp=rtmp;break;}
1573 }
1574 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1575 temp2=3;
1576 else {
1577 host_tempreg_acquire();
1578 temp2=HOST_TEMPREG;
1579 }
1580 emit_readdword(&mem_wtab,temp);
1581 emit_shrimm(rs,12,temp2);
1582 emit_readdword_dualindexedx8(temp,temp2,temp2);
1583 emit_adds64(temp2,temp2,temp2);
1584 handler_jump=out;
1585 emit_jc(0);
1586 switch(type) {
1587 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1588 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1589 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1590 default: assert(0);
1591 }
1592 if(regs_saved) {
1593 restore_jump=out;
1594 emit_jmp(0); // jump to reg restore
1595 }
1596 else
1597 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1598 set_jump_target(handler_jump, out);
1599
d1e4ebd9 1600 if(!regs_saved)
1601 save_regs(reglist);
1602 void *handler=NULL;
1603 switch(type) {
1604 case STOREB_STUB: handler=jump_handler_write8; break;
1605 case STOREH_STUB: handler=jump_handler_write16; break;
1606 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1607 default: assert(0);
d1e4ebd9 1608 }
1609 assert(handler);
1610 pass_args(rs,rt);
1611 if(temp2!=3) {
1612 emit_mov64(temp2,3);
1613 host_tempreg_release();
1614 }
1615 int cc=get_reg(i_regmap,CCREG);
1616 if(cc<0)
1617 emit_loadreg(CCREG,2);
2330734f 1618 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1619 // returns new cycle_count
2a014d73 1620 emit_far_call(handler);
2330734f 1621 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1622 if(cc<0)
1623 emit_storereg(CCREG,2);
1624 if(restore_jump)
1625 set_jump_target(restore_jump, out);
1626 restore_regs(reglist);
1627 emit_jmp(stubs[n].retaddr);
be516ebe 1628}
1629
81dbbf4c 1630static void inline_writestub(enum stub_type type, int i, u_int addr,
1631 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1632{
687b4580 1633 int rs = get_reg(regmap,-1);
1634 int rt = get_reg(regmap,target);
1635 assert(rs >= 0);
1636 assert(rt >= 0);
1637 uintptr_t host_addr = 0;
1638 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1639 if (handler == NULL) {
37387d8b 1640 if (addr != host_addr)
1641 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1642 switch (type) {
687b4580 1643 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1644 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1645 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1646 default: assert(0);
1647 }
1648 return;
1649 }
1650
1651 // call a memhandler
1652 save_regs(reglist);
687b4580 1653 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1654 loadstore_extend(type, rt, 0);
1655 int cc, cc_use;
1656 cc = cc_use = get_reg(regmap, CCREG);
1657 if (cc < 0)
1658 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1659 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1660
2a014d73 1661 emit_far_call(do_memhandler_pre);
1662 emit_far_call(handler);
1663 emit_far_call(do_memhandler_post);
2330734f 1664 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1665 if (cc < 0)
1666 emit_storereg(CCREG, cc_use);
687b4580 1667 restore_regs(reglist);
be516ebe 1668}
1669
3968e69e 1670static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1671{
3968e69e 1672 int ret = memcmp(source, copy, size);
1673 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1674 return ret;
1675}
1676
1677// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1678static void do_dirty_stub_base(u_int vaddr, u_int source_len)
3968e69e 1679{
3d680478 1680 assert(source_len <= MAXBLOCK*4);
3968e69e 1681 emit_loadlp_ofs(0, 0); // ldr x1, source
1682 emit_loadlp_ofs(0, 1); // ldr x2, copy
3d680478 1683 emit_movz(source_len, 2);
2a014d73 1684 emit_far_call(verify_code_arm64);
3968e69e 1685 void *jmp = out;
1686 emit_cbz(0, 0);
1687 emit_movz(vaddr & 0xffff, 0);
1688 emit_movk_lsl16(vaddr >> 16, 0);
2a014d73 1689 emit_far_call(get_addr);
3968e69e 1690 emit_jmpreg(0);
1691 set_jump_target(jmp, out);
1692}
1693
1694static void assert_dirty_stub(const u_int *ptr)
1695{
1696 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1697 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
3d680478 1698 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
3968e69e 1699 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1700}
1701
d1e4ebd9 1702static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1703{
d1e4ebd9 1704 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1705 assert((*loadl & ~0x1f) == 0x58000000);
1706 assert((ofs & 3) == 0);
1707 assert(ofs < 0x100000);
1708 *loadl |= (ofs >> 2) << 5;
1709}
1710
d1e4ebd9 1711static void do_dirty_stub_emit_literals(u_int *loadlps)
1712{
1713 set_loadlp(&loadlps[0], out);
1714 output_w64((uintptr_t)source);
1715 set_loadlp(&loadlps[1], out);
1716 output_w64((uintptr_t)copy);
be516ebe 1717}
1718
3d680478 1719static void *do_dirty_stub(int i, u_int source_len)
be516ebe 1720{
1721 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1722 u_int *loadlps = (void *)out;
3d680478 1723 do_dirty_stub_base(start + i*4, source_len);
d1e4ebd9 1724 void *entry = out;
be516ebe 1725 load_regs_entry(i);
d1e4ebd9 1726 if (entry == out)
1727 entry = instr_addr[i];
1728 emit_jmp(instr_addr[i]);
1729 do_dirty_stub_emit_literals(loadlps);
1730 return entry;
be516ebe 1731}
1732
3d680478 1733static void do_dirty_stub_ds(u_int source_len)
be516ebe 1734{
d1e4ebd9 1735 u_int *loadlps = (void *)out;
3d680478 1736 do_dirty_stub_base(start + 1, source_len);
3968e69e 1737 void *lit_jumpover = out;
d1e4ebd9 1738 emit_jmp(out + 8*2);
1739 do_dirty_stub_emit_literals(loadlps);
3968e69e 1740 set_jump_target(lit_jumpover, out);
be516ebe 1741}
1742
3968e69e 1743static uint64_t get_from_ldr_literal(const u_int *i)
1744{
1745 signed int ofs;
1746 assert((i[0] & 0xff000000) == 0x58000000);
1747 ofs = i[0] << 8;
1748 ofs >>= 5+8;
1749 return *(uint64_t *)(i + ofs);
1750}
be516ebe 1751
3968e69e 1752static uint64_t get_from_movz(const u_int *i)
1753{
1754 assert((i[0] & 0x7fe00000) == 0x52800000);
1755 return (i[0] >> 5) & 0xffff;
1756}
be516ebe 1757
3968e69e 1758// Find the "clean" entry point from a "dirty" entry point
1759// by skipping past the call to verify_code
1760static void *get_clean_addr(u_int *addr)
be516ebe 1761{
3968e69e 1762 assert_dirty_stub(addr);
1763 return addr + 9;
be516ebe 1764}
be516ebe 1765
3968e69e 1766static int verify_dirty(const u_int *ptr)
be516ebe 1767{
3968e69e 1768 const void *source, *copy;
1769 u_int len;
1770 assert_dirty_stub(ptr);
1771 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1772 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
3d680478 1773 len = get_from_movz(&ptr[2]); // movz w3, #source_len
3968e69e 1774 return !memcmp(source, copy, len);
1775}
1776
1777static int isclean(void *addr)
1778{
1779 const u_int *ptr = addr;
1780 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1781 assert_dirty_stub(ptr);
1782 return 0;
1783 }
1784 return 1;
1785}
1786
1787// get source that block at addr was compiled from (host pointers)
1788static void get_bounds(void *addr, u_char **start, u_char **end)
1789{
1790 const u_int *ptr = addr;
1791 assert_dirty_stub(ptr);
1792 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
3d680478 1793 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
3968e69e 1794}
1795
1796/* Special assem */
1797
81dbbf4c 1798static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1799{
1800 save_load_regs_all(1, reglist);
32631e6a 1801 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1802#ifdef PCNT
1803 emit_movimm(op, 0);
2a014d73 1804 emit_far_call(pcnt_gte_start);
3968e69e 1805#endif
1806 // pointer to cop2 regs
1807 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1808}
1809
1810static void c2op_epilogue(u_int op,u_int reglist)
1811{
1812#ifdef PCNT
1813 emit_movimm(op, 0);
2a014d73 1814 emit_far_call(pcnt_gte_end);
3968e69e 1815#endif
1816 save_load_regs_all(0, reglist);
be516ebe 1817}
1818
81dbbf4c 1819static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1820{
3968e69e 1821 u_int c2op=source[i]&0x3f;
1822 u_int hr,reglist_full=0,reglist;
1823 int need_flags,need_ir;
1824 for(hr=0;hr<HOST_REGS;hr++) {
1825 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1826 }
1827 reglist=reglist_full&CALLER_SAVE_REGS;
1828
1829 if (gte_handlers[c2op]!=NULL) {
1830 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1831 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1832 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1833 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1834 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1835 need_flags=0;
1836 //int shift = (source[i] >> 19) & 1;
1837 //int lm = (source[i] >> 10) & 1;
1838 switch(c2op) {
1839 default:
1840 (void)need_ir;
81dbbf4c 1841 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1842 emit_movimm(source[i],1); // opcode
1843 emit_writeword(1,&psxRegs.code);
2a014d73 1844 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1845 break;
1846 }
1847 c2op_epilogue(c2op,reglist);
1848 }
1849}
1850
1851static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1852{
1853 //value = value & 0x7ffff000;
1854 //if (value & 0x7f87e000) value |= 0x80000000;
1855 emit_andimm(sl, 0x7fffe000, temp);
1856 emit_testimm(temp, 0xff87ffff);
1857 emit_andimm(sl, 0x7ffff000, temp);
1858 host_tempreg_acquire();
1859 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1860 emit_cmovne_reg(HOST_TEMPREG, temp);
1861 host_tempreg_release();
1862 assert(0); // testing needed
1863}
1864
1865static void do_mfc2_31_one(u_int copr,signed char temp)
1866{
1867 emit_readshword(&reg_cop2d[copr],temp);
1868 emit_bicsar_imm(temp,31,temp);
1869 emit_cmpimm(temp,0xf80);
1870 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1871 emit_andimm(temp,0xf80,temp);
1872}
1873
1874static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1875{
1876 if (temp < 0) {
1877 host_tempreg_acquire();
1878 temp = HOST_TEMPREG;
1879 }
1880 do_mfc2_31_one(9,temp);
1881 emit_shrimm(temp,7,tl);
1882 do_mfc2_31_one(10,temp);
1883 emit_orrshr_imm(temp,2,tl);
1884 do_mfc2_31_one(11,temp);
1885 emit_orrshl_imm(temp,3,tl);
1886 emit_writeword(tl,&reg_cop2d[29]);
1887
1888 if (temp == HOST_TEMPREG)
1889 host_tempreg_release();
be516ebe 1890}
1891
2330734f 1892static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1893{
3968e69e 1894 // case 0x18: MULT
1895 // case 0x19: MULTU
1896 // case 0x1A: DIV
1897 // case 0x1B: DIVU
cf95b4f0 1898 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1899 {
cf95b4f0 1900 switch(dops[i].opcode2)
3968e69e 1901 {
1902 case 0x18: // MULT
1903 case 0x19: // MULTU
1904 {
cf95b4f0 1905 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1906 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1907 signed char hi=get_reg(i_regs->regmap,HIREG);
1908 signed char lo=get_reg(i_regs->regmap,LOREG);
1909 assert(m1>=0);
1910 assert(m2>=0);
1911 assert(hi>=0);
1912 assert(lo>=0);
1913
cf95b4f0 1914 if(dops[i].opcode2==0x18) // MULT
3968e69e 1915 emit_smull(m1,m2,hi);
1916 else // MULTU
1917 emit_umull(m1,m2,hi);
1918
1919 emit_mov(hi,lo);
1920 emit_shrimm64(hi,32,hi);
1921 break;
1922 }
1923 case 0x1A: // DIV
1924 case 0x1B: // DIVU
1925 {
cf95b4f0 1926 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1927 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1928 signed char quotient=get_reg(i_regs->regmap,LOREG);
1929 signed char remainder=get_reg(i_regs->regmap,HIREG);
1930 assert(numerator>=0);
1931 assert(denominator>=0);
1932 assert(quotient>=0);
1933 assert(remainder>=0);
1934
cf95b4f0 1935 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1936 emit_sdiv(numerator,denominator,quotient);
1937 else // DIVU
1938 emit_udiv(numerator,denominator,quotient);
1939 emit_msub(quotient,denominator,numerator,remainder);
1940
1941 // div 0 quotient (remainder is already correct)
1942 host_tempreg_acquire();
cf95b4f0 1943 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1944 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1945 else
1946 emit_movimm(~0,HOST_TEMPREG);
1947 emit_test(denominator,denominator);
1948 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1949 host_tempreg_release();
1950 break;
1951 }
1952 default:
1953 assert(0);
1954 }
1955 }
1956 else
1957 {
1958 signed char hr=get_reg(i_regs->regmap,HIREG);
1959 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1960 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1961 {
cf95b4f0 1962 if (dops[i].rs1) {
1963 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1964 assert(numerator >= 0);
1965 if (hr >= 0)
1966 emit_mov(numerator,hr);
1967 if (lr >= 0) {
cf95b4f0 1968 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1969 emit_sub_asrimm(0,numerator,31,lr);
1970 else
1971 emit_movimm(~0,lr);
1972 }
1973 }
1974 else {
1975 if (hr >= 0) emit_zeroreg(hr);
1976 if (lr >= 0) emit_movimm(~0,lr);
1977 }
1978 }
1979 else
1980 {
1981 // Multiply by zero is zero.
1982 if (hr >= 0) emit_zeroreg(hr);
1983 if (lr >= 0) emit_zeroreg(lr);
1984 }
1985 }
be516ebe 1986}
1987#define multdiv_assemble multdiv_assemble_arm64
1988
d1e4ebd9 1989static void do_jump_vaddr(u_int rs)
1990{
1991 if (rs != 0)
1992 emit_mov(rs, 0);
2a014d73 1993 emit_far_call(get_addr_ht);
d1e4ebd9 1994 emit_jmpreg(0);
1995}
1996
be516ebe 1997static void do_preload_rhash(u_int r) {
1998 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1999 // register. On ARM the hash can be done with a single instruction (below)
2000}
2001
2002static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 2003 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 2004}
2005
2006static void do_rhash(u_int rs,u_int rh) {
2007 emit_andimm(rs, 0xf8, rh);
2008}
2009
d1e4ebd9 2010static void do_miniht_load(int ht, u_int rh) {
2011 emit_add64(ht, rh, ht);
2012 emit_ldst(0, 0, rh, ht, 0);
be516ebe 2013}
2014
d1e4ebd9 2015static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2016 emit_cmp(rh, rs);
2017 void *jaddr = out;
2018 emit_jeq(0);
2019 do_jump_vaddr(rs);
2020
2021 set_jump_target(jaddr, out);
2022 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2023 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2024 emit_jmpreg(ht);
be516ebe 2025}
2026
d1e4ebd9 2027// parsed by set_jump_target?
be516ebe 2028static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2029 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2030 emit_movk(return_address&0xffff,rt);
2031 add_to_linker(out,return_address,1);
2032 emit_adr(out,temp);
2033 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2034 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2035}
2036
919981d0 2037static void clear_cache_arm64(char *start, char *end)
be516ebe 2038{
919981d0 2039 // Don't rely on GCC's __clear_cache implementation, as it caches
2040 // icache/dcache cache line sizes, that can vary between cores on
2041 // big.LITTLE architectures.
2042 uint64_t addr, ctr_el0;
2043 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2044 size_t isize, dsize;
2045
2046 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2047 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2048 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2049
2050 // use the global minimum cache line size
2051 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2052 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2053
2054 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2055 not required for instruction to data coherence. */
2056 if ((ctr_el0 & (1 << 28)) == 0x0) {
2057 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2058 for (; addr < (uint64_t)end; addr += dsize)
2059 // use "civac" instead of "cvau", as this is the suggested workaround for
2060 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2061 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2062 }
919981d0 2063 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2064
919981d0 2065 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2066 Unification is not required for instruction to data coherence. */
2067 if ((ctr_el0 & (1 << 29)) == 0x0) {
2068 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2069 for (; addr < (uint64_t)end; addr += isize)
2070 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2071
2072 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2073 }
919981d0 2074
2075 __asm__ volatile("isb" : : : "memory");
be516ebe 2076}
2077
2078// CPU-architecture-specific initialization
2a014d73 2079static void arch_init(void)
2080{
2081 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2082 struct tramp_insns *ops = ndrc->tramp.ops;
2083 size_t i;
2084 assert(!(diff & 3));
2085 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2086 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2087 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2088 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2089 }
2090 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2091}
2092
2093// vim:shiftwidth=2:expandtab