drc: get rid of RAM_FIXED, revive ROREG
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define CALLER_SAVE_REGS 0x0007ffff
27
28#define unused __attribute__((unused))
29
d1e4ebd9 30void do_memhandler_pre();
31void do_memhandler_post();
be516ebe 32
33/* Linker */
d1e4ebd9 34static void set_jump_target(void *addr, void *target)
be516ebe 35{
d1e4ebd9 36 u_int *ptr = addr;
37 intptr_t offset = (u_char *)target - (u_char *)addr;
38
3968e69e 39 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
3968e69e 43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 47 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 48 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 49 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
3968e69e 52 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
3968e69e 58 abort(); // should not happen
be516ebe 59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
d1e4ebd9 65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
be516ebe 69}
70
71// find where external branch is liked to using addr of it's stub:
3968e69e 72// get address that the stub loads (dyna_linker arg1),
be516ebe 73// treat it as a pointer to branch insn,
74// return addr where that branch jumps to
75static void *get_pointer(void *stub)
76{
d1e4ebd9 77 int *i_ptr = find_extjump_insn(stub);
3968e69e 78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 83 assert(0);
84 return NULL;
85}
86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
d1e4ebd9 149static void output_w64(uint64_t dword)
150{
151 *((uint64_t *)out) = dword;
152 out+=8;
153}
154
155/*
687b4580 156static u_int rm_rd(u_int rm, u_int rd)
157{
158 assert(rm < 31);
159 assert(rd < 31);
160 return (rm << 16) | rd;
161}
d1e4ebd9 162*/
687b4580 163
3968e69e 164static u_int rn_rd(u_int rn, u_int rd)
165{
166 assert(rn < 31);
167 assert(rd < 31);
168 return (rn << 5) | rd;
169}
170
be516ebe 171static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
172{
d1e4ebd9 173 assert(rm < 32);
174 assert(rn < 32);
175 assert(rd < 32);
be516ebe 176 return (rm << 16) | (rn << 5) | rd;
177}
178
3968e69e 179static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180{
181 assert(ra < 32);
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183}
184
d1e4ebd9 185static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
186{
187 assert(imm7 < 0x80);
188 assert(rt2 < 31);
189 assert(rn < 32);
190 assert(rt < 31);
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192}
193
687b4580 194static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195{
196 assert(imm6 <= 63);
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198}
199
be516ebe 200static u_int imm16_rd(u_int imm16, u_int rd)
201{
202 assert(imm16 < 0x10000);
203 assert(rd < 31);
204 return (imm16 << 5) | rd;
205}
206
687b4580 207static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
208{
209 assert(imm12 < 0x1000);
d1e4ebd9 210 assert(rn < 32);
211 assert(rd < 32);
212 return (imm12 << 10) | (rn << 5) | rd;
213}
214
215static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
216{
217 assert(imm9 < 0x200);
687b4580 218 assert(rn < 31);
219 assert(rd < 31);
d1e4ebd9 220 return (imm9 << 12) | (rn << 5) | rd;
687b4580 221}
222
d1e4ebd9 223static u_int imm19_rt(u_int imm19, u_int rt)
224{
225 assert(imm19 < 0x80000);
226 assert(rt < 31);
227 return (imm19 << 5) | rt;
228}
229
230static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
231{
232 assert(n < 2);
233 assert(immr < 0x40);
234 assert(imms < 0x40);
235 assert(rn < 32);
236 assert(rd < 32);
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238}
239
240static u_int genjmp(const u_char *addr)
be516ebe 241{
242 intptr_t offset = addr - out;
d1e4ebd9 243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 244 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
be516ebe 247 return 0;
248 }
d1e4ebd9 249 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 250}
251
d1e4ebd9 252static u_int genjmpcc(const u_char *addr)
be516ebe 253{
254 intptr_t offset = addr - out;
d1e4ebd9 255 if ((uintptr_t)addr < 3) return 0;
be516ebe 256 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
258 abort();
259 return 0;
260 }
261 return ((u_int)offset >> 2) & 0x7ffff;
262}
263
264static uint32_t is_mask(u_int value)
265{
266 return value && ((value + 1) & value) == 0;
267}
268
269// This function returns true if the argument contains a
270// non-empty sequence of ones (possibly rotated) with the remainder zero.
271static uint32_t is_rotated_mask(u_int value)
272{
3968e69e 273 if (value == 0 || value == ~0)
be516ebe 274 return 0;
d1e4ebd9 275 if (is_mask((value - 1) | value))
276 return 1;
277 return is_mask((~value - 1) | ~value);
278}
279
280static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
281{
282 int lzeros, tzeros, ones;
283 assert(value != 0);
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
289 *imms = ones - 1;
290 return;
be516ebe 291 }
d1e4ebd9 292 value = ~value;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
3968e69e 297 *immr = lzeros;
d1e4ebd9 298 *imms = 31 - ones;
299 return;
300 }
3968e69e 301 abort();
be516ebe 302}
303
304static void emit_mov(u_int rs, u_int rt)
305{
687b4580 306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308}
309
310static void emit_mov64(u_int rs, u_int rt)
311{
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 314}
315
687b4580 316static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 317{
d1e4ebd9 318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 320}
321
d1e4ebd9 322static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 323{
d1e4ebd9 324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 326}
327
d1e4ebd9 328static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 329{
3968e69e 330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
332}
39b71d9a 333#define emit_adds_ptr emit_adds64
d1e4ebd9 334
335static void emit_neg(u_int rs, u_int rt)
336{
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 339}
340
687b4580 341static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 342{
d1e4ebd9 343 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 344 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 345}
346
3968e69e 347static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
348{
349 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
350 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
351}
352
d1e4ebd9 353static void emit_movz(u_int imm, u_int rt)
be516ebe 354{
d1e4ebd9 355 assem_debug("movz %s,#%#x\n", regname[rt], imm);
356 output_w32(0x52800000 | imm16_rd(imm, rt));
357}
358
359static void emit_movz_lsl16(u_int imm, u_int rt)
360{
361 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
362 output_w32(0x52a00000 | imm16_rd(imm, rt));
363}
364
365static void emit_movn(u_int imm, u_int rt)
366{
367 assem_debug("movn %s,#%#x\n", regname[rt], imm);
368 output_w32(0x12800000 | imm16_rd(imm, rt));
369}
370
371static void emit_movn_lsl16(u_int imm,u_int rt)
372{
373 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x12a00000 | imm16_rd(imm, rt));
375}
376
377static void emit_movk(u_int imm,u_int rt)
378{
379 assem_debug("movk %s,#%#x\n", regname[rt], imm);
380 output_w32(0x72800000 | imm16_rd(imm, rt));
381}
382
383static void emit_movk_lsl16(u_int imm,u_int rt)
384{
385 assert(imm<65536);
3968e69e 386 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 387 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 388}
389
390static void emit_zeroreg(u_int rt)
391{
d1e4ebd9 392 emit_movz(0, rt);
be516ebe 393}
394
be516ebe 395static void emit_movimm(u_int imm, u_int rt)
396{
d1e4ebd9 397 if (imm < 65536)
398 emit_movz(imm, rt);
399 else if ((~imm) < 65536)
400 emit_movn(~imm, rt);
401 else if ((imm&0xffff) == 0)
402 emit_movz_lsl16(imm >> 16, rt);
403 else if (((~imm)&0xffff) == 0)
404 emit_movn_lsl16(~imm >> 16, rt);
405 else if (is_rotated_mask(imm)) {
406 u_int immr, imms;
407 gen_logical_imm(imm, &immr, &imms);
408 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
409 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
410 }
be516ebe 411 else {
d1e4ebd9 412 emit_movz(imm & 0xffff, rt);
413 emit_movk_lsl16(imm >> 16, rt);
be516ebe 414 }
415}
416
687b4580 417static void emit_readword(void *addr, u_int rt)
418{
419 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
420 if (!(offset & 3) && offset <= 16380) {
421 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
422 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
423 }
424 else
3968e69e 425 abort();
687b4580 426}
427
d1e4ebd9 428static void emit_readdword(void *addr, u_int rt)
429{
430 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
431 if (!(offset & 7) && offset <= 32760) {
432 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
433 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
434 }
3968e69e 435 else
436 abort();
437}
39b71d9a 438#define emit_readptr emit_readdword
3968e69e 439
440static void emit_readshword(void *addr, u_int rt)
441{
442 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
443 if (!(offset & 1) && offset <= 8190) {
444 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
445 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
446 }
d1e4ebd9 447 else
448 assert(0);
449}
450
be516ebe 451static void emit_loadreg(u_int r, u_int hr)
452{
d1e4ebd9 453 int is64 = 0;
be516ebe 454 assert(r < 64);
455 if (r == 0)
456 emit_zeroreg(hr);
457 else {
7c3a5182 458 void *addr = &psxRegs.GPR.r[r];
be516ebe 459 switch (r) {
7c3a5182 460 //case HIREG: addr = &hi; break;
461 //case LOREG: addr = &lo; break;
be516ebe 462 case CCREG: addr = &cycle_count; break;
463 case CSREG: addr = &Status; break;
d1e4ebd9 464 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 465 case ROREG: addr = &ram_offset; is64 = 1; break;
7c3a5182 466 default: assert(r < 34); break;
be516ebe 467 }
d1e4ebd9 468 if (is64)
469 emit_readdword(addr, hr);
470 else
471 emit_readword(addr, hr);
be516ebe 472 }
473}
474
687b4580 475static void emit_writeword(u_int rt, void *addr)
476{
477 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
478 if (!(offset & 3) && offset <= 16380) {
479 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
480 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
481 }
482 else
483 assert(0);
484}
485
d1e4ebd9 486static void emit_writedword(u_int rt, void *addr)
487{
488 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
489 if (!(offset & 7) && offset <= 32760) {
490 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 491 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 492 }
493 else
3968e69e 494 abort();
d1e4ebd9 495}
496
687b4580 497static void emit_storereg(u_int r, u_int hr)
be516ebe 498{
499 assert(r < 64);
7c3a5182 500 void *addr = &psxRegs.GPR.r[r];
be516ebe 501 switch (r) {
7c3a5182 502 //case HIREG: addr = &hi; break;
503 //case LOREG: addr = &lo; break;
be516ebe 504 case CCREG: addr = &cycle_count; break;
7c3a5182 505 default: assert(r < 34); break;
be516ebe 506 }
687b4580 507 emit_writeword(hr, addr);
be516ebe 508}
509
510static void emit_test(u_int rs, u_int rt)
511{
d1e4ebd9 512 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
513 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 514}
515
d1e4ebd9 516static void emit_testimm(u_int rs, u_int imm)
be516ebe 517{
d1e4ebd9 518 u_int immr, imms;
687b4580 519 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 520 assert(is_rotated_mask(imm)); // good enough for PCSX
521 gen_logical_imm(imm, &immr, &imms);
3968e69e 522 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 523}
524
525static void emit_not(u_int rs,u_int rt)
526{
527 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 528 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 529}
530
be516ebe 531static void emit_and(u_int rs1,u_int rs2,u_int rt)
532{
533 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 534 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 535}
536
537static void emit_or(u_int rs1,u_int rs2,u_int rt)
538{
539 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 540 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 541}
542
3968e69e 543static void emit_bic(u_int rs1,u_int rs2,u_int rt)
544{
545 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
546 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
547}
548
be516ebe 549static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
550{
be516ebe 551 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 552 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 553}
554
555static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
556{
be516ebe 557 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 558 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 559}
560
3968e69e 561static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
562{
563 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
564 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
565}
566
be516ebe 567static void emit_xor(u_int rs1,u_int rs2,u_int rt)
568{
569 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 570 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 571}
572
3968e69e 573static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
574{
575 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
576 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
577}
578
d1e4ebd9 579static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 580{
d1e4ebd9 581 unused const char *st = s ? "s" : "";
582 s = s ? 0x20000000 : 0;
583 is64 = is64 ? 0x80000000 : 0;
687b4580 584 if (imm < 4096) {
d1e4ebd9 585 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
586 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 587 }
588 else if (-imm < 4096) {
3968e69e 589 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 590 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
591 }
592 else if (imm < 16777216) {
593 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
594 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
595 if ((imm & 0xfff) || s) {
596 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 597 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 598 }
599 }
600 else if (-imm < 16777216) {
601 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
602 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
603 if ((imm & 0xfff) || s) {
604 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
605 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
606 }
687b4580 607 }
608 else
3968e69e 609 abort();
be516ebe 610}
611
d1e4ebd9 612static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
613{
614 emit_addimm_s(0, 0, rs, imm, rt);
615}
616
617static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
618{
619 emit_addimm_s(0, 1, rs, imm, rt);
620}
621
be516ebe 622static void emit_addimm_and_set_flags(int imm, u_int rt)
623{
d1e4ebd9 624 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 625}
626
d1e4ebd9 627static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 628{
d1e4ebd9 629 const char *names[] = { "and", "orr", "eor", "ands" };
630 const char *name = names[op];
631 u_int immr, imms;
632 op = op << 29;
633 if (is_rotated_mask(imm)) {
634 gen_logical_imm(imm, &immr, &imms);
635 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
636 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
637 }
638 else {
639 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
640 host_tempreg_acquire();
641 emit_movimm(imm, HOST_TEMPREG);
642 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
643 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
644 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
645 host_tempreg_release();
646 }
647 (void)name;
be516ebe 648}
649
d1e4ebd9 650static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 651{
d1e4ebd9 652 if (imm == 0)
653 emit_zeroreg(rt);
654 else
655 emit_logicop_imm(0, rs, imm, rt);
be516ebe 656}
657
d1e4ebd9 658static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 659{
d1e4ebd9 660 if (imm == 0) {
661 if (rs != rt)
662 emit_mov(rs, rt);
663 }
664 else
665 emit_logicop_imm(1, rs, imm, rt);
be516ebe 666}
667
d1e4ebd9 668static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 669{
d1e4ebd9 670 if (imm == 0) {
671 if (rs != rt)
672 emit_mov(rs, rt);
673 }
674 else
675 emit_logicop_imm(2, rs, imm, rt);
be516ebe 676}
677
d1e4ebd9 678static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 679{
d1e4ebd9 680 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
681 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 682}
683
d1e4ebd9 684static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 685{
d1e4ebd9 686 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
687 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 688}
689
690static void emit_shlimm(u_int rs,u_int imm,u_int rt)
691{
be516ebe 692 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 693 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 694}
695
3968e69e 696static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 697{
3968e69e 698 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
699 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 700}
701
3968e69e 702static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 703{
be516ebe 704 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 705 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 706}
707
708static void emit_sarimm(u_int rs,u_int imm,u_int rt)
709{
be516ebe 710 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 711 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 712}
713
714static void emit_rorimm(u_int rs,u_int imm,u_int rt)
715{
3968e69e 716 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 717 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 718}
719
720static void emit_signextend16(u_int rs, u_int rt)
721{
722 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 723 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 724}
725
d1e4ebd9 726static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 727{
3968e69e 728 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 729 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 730}
731
d1e4ebd9 732static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 733{
d1e4ebd9 734 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
735 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 736}
737
d1e4ebd9 738static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 739{
d1e4ebd9 740 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
741 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 742}
743
d1e4ebd9 744static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 745{
d1e4ebd9 746 if (imm < 4096) {
747 assem_debug("cmp %s,%#x\n", regname[rs], imm);
748 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
749 }
750 else if (-imm < 4096) {
751 assem_debug("cmn %s,%#x\n", regname[rs], imm);
752 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
753 }
754 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 755 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 756 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
757 }
758 else {
759 host_tempreg_acquire();
760 emit_movimm(imm, HOST_TEMPREG);
761 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
762 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
763 host_tempreg_release();
764 }
be516ebe 765}
766
d1e4ebd9 767static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 768{
d1e4ebd9 769 assert(imm == 0 || imm == 1);
770 assert(cond0 < 0x10);
771 assert(cond1 < 0x10);
772 if (imm) {
773 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
774 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
775 } else {
776 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
777 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
778 }
be516ebe 779}
780
d1e4ebd9 781static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 782{
d1e4ebd9 783 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 784}
785
d1e4ebd9 786static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 787{
d1e4ebd9 788 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 789}
790
791static void emit_cmovb_imm(int imm,u_int rt)
792{
d1e4ebd9 793 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 794}
795
3968e69e 796static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 797{
3968e69e 798 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
799 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 800}
801
802static void emit_cmovne_reg(u_int rs,u_int rt)
803{
d1e4ebd9 804 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
805 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 806}
807
808static void emit_cmovl_reg(u_int rs,u_int rt)
809{
d1e4ebd9 810 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
811 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 812}
813
e3c6bdb5 814static void emit_cmovb_reg(u_int rs,u_int rt)
815{
816 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
817 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
818}
819
be516ebe 820static void emit_cmovs_reg(u_int rs,u_int rt)
821{
d1e4ebd9 822 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
823 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 824}
825
3968e69e 826static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
827{
828 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
829 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
830}
831
be516ebe 832static void emit_slti32(u_int rs,int imm,u_int rt)
833{
834 if(rs!=rt) emit_zeroreg(rt);
835 emit_cmpimm(rs,imm);
836 if(rs==rt) emit_movimm(0,rt);
837 emit_cmovl_imm(1,rt);
838}
839
840static void emit_sltiu32(u_int rs,int imm,u_int rt)
841{
842 if(rs!=rt) emit_zeroreg(rt);
843 emit_cmpimm(rs,imm);
844 if(rs==rt) emit_movimm(0,rt);
845 emit_cmovb_imm(1,rt);
846}
847
848static void emit_cmp(u_int rs,u_int rt)
849{
850 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 851 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 852}
853
854static void emit_set_gz32(u_int rs, u_int rt)
855{
856 //assem_debug("set_gz32\n");
857 emit_cmpimm(rs,1);
858 emit_movimm(1,rt);
859 emit_cmovl_imm(0,rt);
860}
861
862static void emit_set_nz32(u_int rs, u_int rt)
863{
864 //assem_debug("set_nz32\n");
d1e4ebd9 865 if(rs!=rt) emit_mov(rs,rt);
866 emit_test(rs,rs);
867 emit_cmovne_imm(1,rt);
be516ebe 868}
869
870static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
871{
872 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
873 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
874 emit_cmp(rs1,rs2);
875 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
876 emit_cmovl_imm(1,rt);
877}
878
879static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
880{
881 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
882 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
883 emit_cmp(rs1,rs2);
884 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
885 emit_cmovb_imm(1,rt);
886}
887
2a014d73 888static int can_jump_or_call(const void *a)
889{
890 intptr_t diff = (u_char *)a - out;
891 return (-134217728 <= diff && diff <= 134217727);
892}
893
d1e4ebd9 894static void emit_call(const void *a)
be516ebe 895{
d1e4ebd9 896 intptr_t diff = (u_char *)a - out;
897 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 898 assert(!(diff & 3));
899 if (-134217728 <= diff && diff <= 134217727)
900 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
901 else
3968e69e 902 abort();
be516ebe 903}
904
d1e4ebd9 905static void emit_jmp(const void *a)
be516ebe 906{
d1e4ebd9 907 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
908 u_int offset = genjmp(a);
909 output_w32(0x14000000 | offset);
be516ebe 910}
911
d1e4ebd9 912static void emit_jne(const void *a)
be516ebe 913{
d1e4ebd9 914 assem_debug("bne %p\n", a);
915 u_int offset = genjmpcc(a);
916 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 917}
918
7c3a5182 919static void emit_jeq(const void *a)
be516ebe 920{
d1e4ebd9 921 assem_debug("beq %p\n", a);
922 u_int offset = genjmpcc(a);
923 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 924}
925
7c3a5182 926static void emit_js(const void *a)
be516ebe 927{
d1e4ebd9 928 assem_debug("bmi %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 931}
932
7c3a5182 933static void emit_jns(const void *a)
be516ebe 934{
d1e4ebd9 935 assem_debug("bpl %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 938}
939
7c3a5182 940static void emit_jl(const void *a)
be516ebe 941{
d1e4ebd9 942 assem_debug("blt %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 945}
946
7c3a5182 947static void emit_jge(const void *a)
be516ebe 948{
d1e4ebd9 949 assem_debug("bge %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 952}
953
7c3a5182 954static void emit_jno(const void *a)
be516ebe 955{
d1e4ebd9 956 assem_debug("bvc %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 959}
960
7c3a5182 961static void emit_jc(const void *a)
be516ebe 962{
d1e4ebd9 963 assem_debug("bcs %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 966}
967
3968e69e 968static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 969{
3968e69e 970 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 971 u_int offset = genjmpcc(a);
3968e69e 972 is64 = is64 ? 0x80000000 : 0;
973 isnz = isnz ? 0x01000000 : 0;
974 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
975}
976
977static void emit_cbz(const void *a, u_int r)
978{
979 emit_cb(0, 0, a, r);
be516ebe 980}
981
982static void emit_jmpreg(u_int r)
983{
3968e69e 984 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 985 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 986}
987
988static void emit_retreg(u_int r)
989{
d1e4ebd9 990 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 991 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
992}
993
994static void emit_ret(void)
995{
996 emit_retreg(LR);
997}
998
d1e4ebd9 999static void emit_adr(void *addr, u_int rt)
1000{
1001 intptr_t offset = (u_char *)addr - out;
1002 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1003 assert(rt < 31);
d1e4ebd9 1004 assem_debug("adr x%d,#%#lx\n", rt, offset);
1005 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1006}
1007
3968e69e 1008static void emit_adrp(void *addr, u_int rt)
1009{
1010 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1011 assert(-4294967296l <= offset && offset < 4294967296l);
1012 assert(rt < 31);
1013 offset >>= 12;
1014 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1015 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1016}
1017
be516ebe 1018static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1019{
d1e4ebd9 1020 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1021 assert(-256 <= offset && offset < 256);
1022 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1023}
1024
1025static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1026{
1027 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1028 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1029}
1030
1031static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1032{
1033 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1034 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1035}
1036
1037static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1038{
1039 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1040 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1041}
1042
1043static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1044{
1045 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1047}
39b71d9a 1048#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1049
1050static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1051{
1052 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1053 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1054}
1055
1056static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1057{
1058 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1059 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1060}
1061
1062static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1063{
1064 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1065 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1066}
1067
1068static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1069{
1070 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1071 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1072}
1073
1074static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1075{
1076 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1077 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1078}
1079
be516ebe 1080static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1081{
d1e4ebd9 1082 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1083 assert(-256 <= offset && offset < 256);
1084 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1085}
1086
1087static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1088{
d1e4ebd9 1089 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1090 assert(-256 <= offset && offset < 256);
1091 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1092}
1093
1094static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1095{
d1e4ebd9 1096 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1097 assert(-256 <= offset && offset < 256);
1098 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1099}
1100
1101static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1102{
d1e4ebd9 1103 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1104 assert(-256 <= offset && offset < 256);
1105 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1106}
1107
be516ebe 1108static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1109{
3968e69e 1110 if (!(offset & 3) && (u_int)offset <= 16380) {
1111 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1112 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1113 }
1114 else if (-256 <= offset && offset < 256) {
1115 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1116 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1117 }
687b4580 1118 else
1119 assert(0);
be516ebe 1120}
1121
1122static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1123{
3968e69e 1124 if (!(offset & 1) && (u_int)offset <= 8190) {
1125 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1126 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1127 }
1128 else if (-256 <= offset && offset < 256) {
1129 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1130 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1131 }
687b4580 1132 else
1133 assert(0);
be516ebe 1134}
1135
1136static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1137{
3968e69e 1138 if ((u_int)offset < 4096) {
1139 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1140 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1141 }
1142 else if (-256 <= offset && offset < 256) {
1143 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1145 }
687b4580 1146 else
1147 assert(0);
be516ebe 1148}
1149
3968e69e 1150static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1151{
3968e69e 1152 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1153 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1154}
1155
3968e69e 1156static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1157{
3968e69e 1158 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1159 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1160}
1161
1162static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1163{
1164 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1165 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1166}
1167
1168static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1169{
1170 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1172}
1173
3968e69e 1174static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1175{
1176 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1178}
1179
1180static void emit_clz(u_int rs, u_int rt)
be516ebe 1181{
1182 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1183 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1184}
1185
be516ebe 1186// special case for checking invalid_code
d1e4ebd9 1187static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1188{
d1e4ebd9 1189 host_tempreg_acquire();
1190 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1191 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1192 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1193 emit_cmpimm(HOST_TEMPREG, imm);
1194 host_tempreg_release();
be516ebe 1195}
1196
3968e69e 1197// special for loadlr_assemble, rs2 is destroyed
1198static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1199{
3968e69e 1200 emit_shl(rs2, shift, rs2);
1201 emit_bic(rs1, rs2, rt);
be516ebe 1202}
1203
3968e69e 1204static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1205{
3968e69e 1206 emit_shr(rs2, shift, rs2);
1207 emit_bic(rs1, rs2, rt);
be516ebe 1208}
1209
d1e4ebd9 1210static void emit_loadlp_ofs(u_int ofs, u_int rt)
1211{
1212 output_w32(0x58000000 | imm19_rt(ofs, rt));
1213}
1214
687b4580 1215static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1216{
687b4580 1217 u_int op = 0xb9000000;
d1e4ebd9 1218 unused const char *ldst = is_st ? "st" : "ld";
1219 unused char rp = is64 ? 'x' : 'w';
687b4580 1220 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1221 is64 = is64 ? 1 : 0;
1222 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1223 ofs = (ofs >> (2+is64));
687b4580 1224 if (!is_st) op |= 0x00400000;
1225 if (is64) op |= 0x40000000;
d1e4ebd9 1226 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1227}
1228
687b4580 1229static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1230{
687b4580 1231 u_int op = 0x29000000;
d1e4ebd9 1232 unused const char *ldst = is_st ? "st" : "ld";
1233 unused char rp = is64 ? 'x' : 'w';
687b4580 1234 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1235 is64 = is64 ? 1 : 0;
1236 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1237 ofs = (ofs >> (2+is64));
1238 assert(-64 <= ofs && ofs <= 63);
1239 ofs &= 0x7f;
1240 if (!is_st) op |= 0x00400000;
1241 if (is64) op |= 0x80000000;
d1e4ebd9 1242 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1243}
1244
1245static void save_load_regs_all(int is_store, u_int reglist)
1246{
1247 int ofs = 0, c = 0;
1248 u_int r, pair[2];
1249 for (r = 0; reglist; r++, reglist >>= 1) {
1250 if (reglist & 1)
1251 pair[c++] = r;
1252 if (c == 2) {
1253 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1254 ofs += 8 * 2;
1255 c = 0;
1256 }
1257 }
1258 if (c) {
1259 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1260 ofs += 8;
1261 }
1262 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1263}
1264
1265// Save registers before function call
1266static void save_regs(u_int reglist)
1267{
1268 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1269 save_load_regs_all(1, reglist);
be516ebe 1270}
1271
1272// Restore registers after function call
1273static void restore_regs(u_int reglist)
1274{
1275 reglist &= CALLER_SAVE_REGS;
687b4580 1276 save_load_regs_all(0, reglist);
be516ebe 1277}
1278
1279/* Stubs/epilogue */
1280
1281static void literal_pool(int n)
1282{
1283 (void)literals;
1284}
1285
1286static void literal_pool_jumpover(int n)
1287{
1288}
1289
d1e4ebd9 1290// parsed by get_pointer, find_extjump_insn
1291static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1292{
d1e4ebd9 1293 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1294
d1e4ebd9 1295 emit_movz(target & 0xffff, 0);
1296 emit_movk_lsl16(target >> 16, 0);
1297
1298 // addr is in the current recompiled block (max 256k)
1299 // offset shouldn't exceed +/-1MB
1300 emit_adr(addr, 1);
2a014d73 1301 emit_far_jump(linker);
be516ebe 1302}
1303
d1e4ebd9 1304static void check_extjump2(void *src)
be516ebe 1305{
d1e4ebd9 1306 u_int *ptr = src;
1307 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1308 (void)ptr;
be516ebe 1309}
1310
1311// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1312static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1313{
d1e4ebd9 1314 int diff = rt_val - rs_val;
3968e69e 1315 if ((-4096 < diff && diff < 4096)
1316 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1317 emit_addimm(rs, diff, rt);
3968e69e 1318 else if (rt_val == ~rs_val)
1319 emit_not(rs, rt);
d1e4ebd9 1320 else if (is_rotated_mask(rs_val ^ rt_val))
1321 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1322 else
d1e4ebd9 1323 emit_movimm(rt_val, rt);
be516ebe 1324}
1325
d1e4ebd9 1326// return 1 if the above function can do it's job cheaply
687b4580 1327static int is_similar_value(u_int v1, u_int v2)
be516ebe 1328{
687b4580 1329 int diff = v1 - v2;
3968e69e 1330 return (-4096 < diff && diff < 4096)
1331 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1332 || v1 == ~v2
d1e4ebd9 1333 || is_rotated_mask(v1 ^ v2);
1334}
1335
37387d8b 1336static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1337{
1338 if (rt_val < 0x100000000ull) {
1339 emit_movimm_from(rs_val, rs, rt_val, rt);
1340 return;
1341 }
1342 // just move the whole thing. At least on Linux all addresses
1343 // seem to be 48bit, so 3 insns - not great not terrible
1344 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1345 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1346 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1347 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1348 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1349 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1350 if (rt_val >> 48) {
1351 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1352 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1353 }
1354}
1355
1356// trashes x2
d1e4ebd9 1357static void pass_args64(u_int a0, u_int a1)
1358{
1359 if(a0==1&&a1==0) {
1360 // must swap
1361 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1362 }
1363 else if(a0!=0&&a1==0) {
1364 emit_mov64(a1,1);
1365 if (a0>=0) emit_mov64(a0,0);
1366 }
1367 else {
1368 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1369 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1370 }
be516ebe 1371}
1372
d1e4ebd9 1373static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1374{
1375 switch(type) {
1376 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1377 case LOADBU_STUB:
1378 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1379 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1380 case LOADHU_STUB:
1381 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1382 case LOADW_STUB:
1383 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1384 default: assert(0);
d1e4ebd9 1385 }
1386}
1387
1388#include "pcsxmem.h"
be516ebe 1389//#include "pcsxmem_inline.c"
1390
1391static void do_readstub(int n)
1392{
1393 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1394 set_jump_target(stubs[n].addr, out);
1395 enum stub_type type = stubs[n].type;
1396 int i = stubs[n].a;
1397 int rs = stubs[n].b;
1398 const struct regstat *i_regs = (void *)stubs[n].c;
1399 u_int reglist = stubs[n].e;
1400 const signed char *i_regmap = i_regs->regmap;
1401 int rt;
cf95b4f0 1402 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1403 rt=get_reg(i_regmap,FTEMP);
1404 }else{
cf95b4f0 1405 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1406 }
1407 assert(rs>=0);
1408 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1409 void *restore_jump = NULL, *handler_jump = NULL;
1410 reglist|=(1<<rs);
1411 for (r = 0; r < HOST_CCREG; r++) {
1412 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1413 temp = r;
1414 break;
1415 }
1416 }
cf95b4f0 1417 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1418 reglist&=~(1<<rt);
1419 if(temp==-1) {
1420 save_regs(reglist);
1421 regs_saved=1;
1422 temp=(rs==0)?2:0;
1423 }
1424 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1425 temp2=1;
1426 emit_readdword(&mem_rtab,temp);
1427 emit_shrimm(rs,12,temp2);
1428 emit_readdword_dualindexedx8(temp,temp2,temp2);
1429 emit_adds64(temp2,temp2,temp2);
1430 handler_jump=out;
1431 emit_jc(0);
cf95b4f0 1432 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1433 switch(type) {
1434 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1435 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1436 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1437 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1438 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1439 default: assert(0);
d1e4ebd9 1440 }
1441 }
1442 if(regs_saved) {
1443 restore_jump=out;
1444 emit_jmp(0); // jump to reg restore
1445 }
1446 else
1447 emit_jmp(stubs[n].retaddr); // return address
1448 set_jump_target(handler_jump, out);
1449
1450 if(!regs_saved)
1451 save_regs(reglist);
1452 void *handler=NULL;
1453 if(type==LOADB_STUB||type==LOADBU_STUB)
1454 handler=jump_handler_read8;
1455 if(type==LOADH_STUB||type==LOADHU_STUB)
1456 handler=jump_handler_read16;
1457 if(type==LOADW_STUB)
1458 handler=jump_handler_read32;
1459 assert(handler);
1460 pass_args64(rs,temp2);
1461 int cc=get_reg(i_regmap,CCREG);
1462 if(cc<0)
1463 emit_loadreg(CCREG,2);
bb4f300c 1464 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1465 emit_far_call(handler);
d1e4ebd9 1466 // (no cycle reload after read)
cf95b4f0 1467 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1468 loadstore_extend(type,0,rt);
1469 }
1470 if(restore_jump)
1471 set_jump_target(restore_jump, out);
1472 restore_regs(reglist);
1473 emit_jmp(stubs[n].retaddr);
be516ebe 1474}
1475
81dbbf4c 1476static void inline_readstub(enum stub_type type, int i, u_int addr,
1477 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1478{
d1e4ebd9 1479 int rs=get_reg(regmap,target);
1480 int rt=get_reg(regmap,target);
1481 if(rs<0) rs=get_reg(regmap,-1);
1482 assert(rs>=0);
1483 u_int is_dynamic=0;
1484 uintptr_t host_addr = 0;
1485 void *handler;
1486 int cc=get_reg(regmap,CCREG);
bb4f300c 1487 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
d1e4ebd9 1488 // return;
1489 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1490 if (handler == NULL) {
cf95b4f0 1491 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1492 return;
37387d8b 1493 if (addr != host_addr)
1494 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1495 switch(type) {
1496 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1497 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1498 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1499 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1500 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1501 default: assert(0);
1502 }
1503 return;
1504 }
37387d8b 1505 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1506 if (is_dynamic) {
d1e4ebd9 1507 if(type==LOADB_STUB||type==LOADBU_STUB)
1508 handler=jump_handler_read8;
1509 if(type==LOADH_STUB||type==LOADHU_STUB)
1510 handler=jump_handler_read16;
1511 if(type==LOADW_STUB)
1512 handler=jump_handler_read32;
1513 }
1514
1515 // call a memhandler
cf95b4f0 1516 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1517 reglist&=~(1<<rt);
1518 save_regs(reglist);
1519 if(target==0)
1520 emit_movimm(addr,0);
1521 else if(rs!=0)
1522 emit_mov(rs,0);
1523 if(cc<0)
1524 emit_loadreg(CCREG,2);
bb4f300c 1525 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
3968e69e 1526 if(is_dynamic) {
1527 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1528 emit_adrp((void *)l1, 1);
1529 emit_addimm64(1, l1 & 0xfff, 1);
1530 }
d1e4ebd9 1531 else
2a014d73 1532 emit_far_call(do_memhandler_pre);
d1e4ebd9 1533
2a014d73 1534 emit_far_call(handler);
d1e4ebd9 1535
1536 // (no cycle reload after read)
cf95b4f0 1537 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1538 loadstore_extend(type, 0, rt);
1539 restore_regs(reglist);
be516ebe 1540}
1541
1542static void do_writestub(int n)
1543{
1544 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1545 set_jump_target(stubs[n].addr, out);
1546 enum stub_type type=stubs[n].type;
1547 int i=stubs[n].a;
1548 int rs=stubs[n].b;
1549 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1550 u_int reglist=stubs[n].e;
1551 signed char *i_regmap=i_regs->regmap;
1552 int rt,r;
cf95b4f0 1553 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1554 rt=get_reg(i_regmap,r=FTEMP);
1555 }else{
cf95b4f0 1556 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1557 }
1558 assert(rs>=0);
1559 assert(rt>=0);
1560 int rtmp,temp=-1,temp2,regs_saved=0;
1561 void *restore_jump = NULL, *handler_jump = NULL;
1562 int reglist2=reglist|(1<<rs)|(1<<rt);
1563 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1564 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1565 temp = rtmp;
1566 break;
1567 }
1568 }
1569 if(temp==-1) {
1570 save_regs(reglist);
1571 regs_saved=1;
1572 for(rtmp=0;rtmp<=3;rtmp++)
1573 if(rtmp!=rs&&rtmp!=rt)
1574 {temp=rtmp;break;}
1575 }
1576 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1577 temp2=3;
1578 else {
1579 host_tempreg_acquire();
1580 temp2=HOST_TEMPREG;
1581 }
1582 emit_readdword(&mem_wtab,temp);
1583 emit_shrimm(rs,12,temp2);
1584 emit_readdword_dualindexedx8(temp,temp2,temp2);
1585 emit_adds64(temp2,temp2,temp2);
1586 handler_jump=out;
1587 emit_jc(0);
1588 switch(type) {
1589 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1590 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1591 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1592 default: assert(0);
1593 }
1594 if(regs_saved) {
1595 restore_jump=out;
1596 emit_jmp(0); // jump to reg restore
1597 }
1598 else
1599 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1600 set_jump_target(handler_jump, out);
1601
1602 // TODO FIXME: regalloc should prefer callee-saved regs
1603 if(!regs_saved)
1604 save_regs(reglist);
1605 void *handler=NULL;
1606 switch(type) {
1607 case STOREB_STUB: handler=jump_handler_write8; break;
1608 case STOREH_STUB: handler=jump_handler_write16; break;
1609 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1610 default: assert(0);
d1e4ebd9 1611 }
1612 assert(handler);
1613 pass_args(rs,rt);
1614 if(temp2!=3) {
1615 emit_mov64(temp2,3);
1616 host_tempreg_release();
1617 }
1618 int cc=get_reg(i_regmap,CCREG);
1619 if(cc<0)
1620 emit_loadreg(CCREG,2);
bb4f300c 1621 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
d1e4ebd9 1622 // returns new cycle_count
2a014d73 1623 emit_far_call(handler);
bb4f300c 1624 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
d1e4ebd9 1625 if(cc<0)
1626 emit_storereg(CCREG,2);
1627 if(restore_jump)
1628 set_jump_target(restore_jump, out);
1629 restore_regs(reglist);
1630 emit_jmp(stubs[n].retaddr);
be516ebe 1631}
1632
81dbbf4c 1633static void inline_writestub(enum stub_type type, int i, u_int addr,
1634 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1635{
687b4580 1636 int rs = get_reg(regmap,-1);
1637 int rt = get_reg(regmap,target);
1638 assert(rs >= 0);
1639 assert(rt >= 0);
1640 uintptr_t host_addr = 0;
1641 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1642 if (handler == NULL) {
37387d8b 1643 if (addr != host_addr)
1644 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1645 switch (type) {
687b4580 1646 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1647 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1648 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1649 default: assert(0);
1650 }
1651 return;
1652 }
1653
1654 // call a memhandler
1655 save_regs(reglist);
687b4580 1656 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1657 loadstore_extend(type, rt, 0);
1658 int cc, cc_use;
1659 cc = cc_use = get_reg(regmap, CCREG);
1660 if (cc < 0)
1661 emit_loadreg(CCREG, (cc_use = 2));
bb4f300c 1662 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
d1e4ebd9 1663
2a014d73 1664 emit_far_call(do_memhandler_pre);
1665 emit_far_call(handler);
1666 emit_far_call(do_memhandler_post);
bb4f300c 1667 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
d1e4ebd9 1668 if (cc < 0)
1669 emit_storereg(CCREG, cc_use);
687b4580 1670 restore_regs(reglist);
be516ebe 1671}
1672
3968e69e 1673static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1674{
3968e69e 1675 int ret = memcmp(source, copy, size);
1676 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1677 return ret;
1678}
1679
1680// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1681static void do_dirty_stub_base(u_int vaddr, u_int source_len)
3968e69e 1682{
3d680478 1683 assert(source_len <= MAXBLOCK*4);
3968e69e 1684 emit_loadlp_ofs(0, 0); // ldr x1, source
1685 emit_loadlp_ofs(0, 1); // ldr x2, copy
3d680478 1686 emit_movz(source_len, 2);
2a014d73 1687 emit_far_call(verify_code_arm64);
3968e69e 1688 void *jmp = out;
1689 emit_cbz(0, 0);
1690 emit_movz(vaddr & 0xffff, 0);
1691 emit_movk_lsl16(vaddr >> 16, 0);
2a014d73 1692 emit_far_call(get_addr);
3968e69e 1693 emit_jmpreg(0);
1694 set_jump_target(jmp, out);
1695}
1696
1697static void assert_dirty_stub(const u_int *ptr)
1698{
1699 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1700 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
3d680478 1701 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
3968e69e 1702 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1703}
1704
d1e4ebd9 1705static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1706{
d1e4ebd9 1707 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1708 assert((*loadl & ~0x1f) == 0x58000000);
1709 assert((ofs & 3) == 0);
1710 assert(ofs < 0x100000);
1711 *loadl |= (ofs >> 2) << 5;
1712}
1713
d1e4ebd9 1714static void do_dirty_stub_emit_literals(u_int *loadlps)
1715{
1716 set_loadlp(&loadlps[0], out);
1717 output_w64((uintptr_t)source);
1718 set_loadlp(&loadlps[1], out);
1719 output_w64((uintptr_t)copy);
be516ebe 1720}
1721
3d680478 1722static void *do_dirty_stub(int i, u_int source_len)
be516ebe 1723{
1724 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1725 u_int *loadlps = (void *)out;
3d680478 1726 do_dirty_stub_base(start + i*4, source_len);
d1e4ebd9 1727 void *entry = out;
be516ebe 1728 load_regs_entry(i);
d1e4ebd9 1729 if (entry == out)
1730 entry = instr_addr[i];
1731 emit_jmp(instr_addr[i]);
1732 do_dirty_stub_emit_literals(loadlps);
1733 return entry;
be516ebe 1734}
1735
3d680478 1736static void do_dirty_stub_ds(u_int source_len)
be516ebe 1737{
d1e4ebd9 1738 u_int *loadlps = (void *)out;
3d680478 1739 do_dirty_stub_base(start + 1, source_len);
3968e69e 1740 void *lit_jumpover = out;
d1e4ebd9 1741 emit_jmp(out + 8*2);
1742 do_dirty_stub_emit_literals(loadlps);
3968e69e 1743 set_jump_target(lit_jumpover, out);
be516ebe 1744}
1745
3968e69e 1746static uint64_t get_from_ldr_literal(const u_int *i)
1747{
1748 signed int ofs;
1749 assert((i[0] & 0xff000000) == 0x58000000);
1750 ofs = i[0] << 8;
1751 ofs >>= 5+8;
1752 return *(uint64_t *)(i + ofs);
1753}
be516ebe 1754
3968e69e 1755static uint64_t get_from_movz(const u_int *i)
1756{
1757 assert((i[0] & 0x7fe00000) == 0x52800000);
1758 return (i[0] >> 5) & 0xffff;
1759}
be516ebe 1760
3968e69e 1761// Find the "clean" entry point from a "dirty" entry point
1762// by skipping past the call to verify_code
1763static void *get_clean_addr(u_int *addr)
be516ebe 1764{
3968e69e 1765 assert_dirty_stub(addr);
1766 return addr + 9;
be516ebe 1767}
be516ebe 1768
3968e69e 1769static int verify_dirty(const u_int *ptr)
be516ebe 1770{
3968e69e 1771 const void *source, *copy;
1772 u_int len;
1773 assert_dirty_stub(ptr);
1774 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1775 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
3d680478 1776 len = get_from_movz(&ptr[2]); // movz w3, #source_len
3968e69e 1777 return !memcmp(source, copy, len);
1778}
1779
1780static int isclean(void *addr)
1781{
1782 const u_int *ptr = addr;
1783 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1784 assert_dirty_stub(ptr);
1785 return 0;
1786 }
1787 return 1;
1788}
1789
1790// get source that block at addr was compiled from (host pointers)
1791static void get_bounds(void *addr, u_char **start, u_char **end)
1792{
1793 const u_int *ptr = addr;
1794 assert_dirty_stub(ptr);
1795 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
3d680478 1796 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
3968e69e 1797}
1798
1799/* Special assem */
1800
81dbbf4c 1801static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1802{
1803 save_load_regs_all(1, reglist);
32631e6a 1804 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1805#ifdef PCNT
1806 emit_movimm(op, 0);
2a014d73 1807 emit_far_call(pcnt_gte_start);
3968e69e 1808#endif
1809 // pointer to cop2 regs
1810 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1811}
1812
1813static void c2op_epilogue(u_int op,u_int reglist)
1814{
1815#ifdef PCNT
1816 emit_movimm(op, 0);
2a014d73 1817 emit_far_call(pcnt_gte_end);
3968e69e 1818#endif
1819 save_load_regs_all(0, reglist);
be516ebe 1820}
1821
81dbbf4c 1822static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1823{
3968e69e 1824 u_int c2op=source[i]&0x3f;
1825 u_int hr,reglist_full=0,reglist;
1826 int need_flags,need_ir;
1827 for(hr=0;hr<HOST_REGS;hr++) {
1828 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1829 }
1830 reglist=reglist_full&CALLER_SAVE_REGS;
1831
1832 if (gte_handlers[c2op]!=NULL) {
1833 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1834 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1835 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1836 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1837 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1838 need_flags=0;
1839 //int shift = (source[i] >> 19) & 1;
1840 //int lm = (source[i] >> 10) & 1;
1841 switch(c2op) {
1842 default:
1843 (void)need_ir;
81dbbf4c 1844 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1845 emit_movimm(source[i],1); // opcode
1846 emit_writeword(1,&psxRegs.code);
2a014d73 1847 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1848 break;
1849 }
1850 c2op_epilogue(c2op,reglist);
1851 }
1852}
1853
1854static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1855{
1856 //value = value & 0x7ffff000;
1857 //if (value & 0x7f87e000) value |= 0x80000000;
1858 emit_andimm(sl, 0x7fffe000, temp);
1859 emit_testimm(temp, 0xff87ffff);
1860 emit_andimm(sl, 0x7ffff000, temp);
1861 host_tempreg_acquire();
1862 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1863 emit_cmovne_reg(HOST_TEMPREG, temp);
1864 host_tempreg_release();
1865 assert(0); // testing needed
1866}
1867
1868static void do_mfc2_31_one(u_int copr,signed char temp)
1869{
1870 emit_readshword(&reg_cop2d[copr],temp);
1871 emit_bicsar_imm(temp,31,temp);
1872 emit_cmpimm(temp,0xf80);
1873 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1874 emit_andimm(temp,0xf80,temp);
1875}
1876
1877static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1878{
1879 if (temp < 0) {
1880 host_tempreg_acquire();
1881 temp = HOST_TEMPREG;
1882 }
1883 do_mfc2_31_one(9,temp);
1884 emit_shrimm(temp,7,tl);
1885 do_mfc2_31_one(10,temp);
1886 emit_orrshr_imm(temp,2,tl);
1887 do_mfc2_31_one(11,temp);
1888 emit_orrshl_imm(temp,3,tl);
1889 emit_writeword(tl,&reg_cop2d[29]);
1890
1891 if (temp == HOST_TEMPREG)
1892 host_tempreg_release();
be516ebe 1893}
1894
1895static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1896{
3968e69e 1897 // case 0x18: MULT
1898 // case 0x19: MULTU
1899 // case 0x1A: DIV
1900 // case 0x1B: DIVU
cf95b4f0 1901 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1902 {
cf95b4f0 1903 switch(dops[i].opcode2)
3968e69e 1904 {
1905 case 0x18: // MULT
1906 case 0x19: // MULTU
1907 {
cf95b4f0 1908 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1909 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1910 signed char hi=get_reg(i_regs->regmap,HIREG);
1911 signed char lo=get_reg(i_regs->regmap,LOREG);
1912 assert(m1>=0);
1913 assert(m2>=0);
1914 assert(hi>=0);
1915 assert(lo>=0);
1916
cf95b4f0 1917 if(dops[i].opcode2==0x18) // MULT
3968e69e 1918 emit_smull(m1,m2,hi);
1919 else // MULTU
1920 emit_umull(m1,m2,hi);
1921
1922 emit_mov(hi,lo);
1923 emit_shrimm64(hi,32,hi);
1924 break;
1925 }
1926 case 0x1A: // DIV
1927 case 0x1B: // DIVU
1928 {
cf95b4f0 1929 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1930 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1931 signed char quotient=get_reg(i_regs->regmap,LOREG);
1932 signed char remainder=get_reg(i_regs->regmap,HIREG);
1933 assert(numerator>=0);
1934 assert(denominator>=0);
1935 assert(quotient>=0);
1936 assert(remainder>=0);
1937
cf95b4f0 1938 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1939 emit_sdiv(numerator,denominator,quotient);
1940 else // DIVU
1941 emit_udiv(numerator,denominator,quotient);
1942 emit_msub(quotient,denominator,numerator,remainder);
1943
1944 // div 0 quotient (remainder is already correct)
1945 host_tempreg_acquire();
cf95b4f0 1946 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1947 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1948 else
1949 emit_movimm(~0,HOST_TEMPREG);
1950 emit_test(denominator,denominator);
1951 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1952 host_tempreg_release();
1953 break;
1954 }
1955 default:
1956 assert(0);
1957 }
1958 }
1959 else
1960 {
1961 signed char hr=get_reg(i_regs->regmap,HIREG);
1962 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1963 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1964 {
cf95b4f0 1965 if (dops[i].rs1) {
1966 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1967 assert(numerator >= 0);
1968 if (hr >= 0)
1969 emit_mov(numerator,hr);
1970 if (lr >= 0) {
cf95b4f0 1971 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1972 emit_sub_asrimm(0,numerator,31,lr);
1973 else
1974 emit_movimm(~0,lr);
1975 }
1976 }
1977 else {
1978 if (hr >= 0) emit_zeroreg(hr);
1979 if (lr >= 0) emit_movimm(~0,lr);
1980 }
1981 }
1982 else
1983 {
1984 // Multiply by zero is zero.
1985 if (hr >= 0) emit_zeroreg(hr);
1986 if (lr >= 0) emit_zeroreg(lr);
1987 }
1988 }
be516ebe 1989}
1990#define multdiv_assemble multdiv_assemble_arm64
1991
d1e4ebd9 1992static void do_jump_vaddr(u_int rs)
1993{
1994 if (rs != 0)
1995 emit_mov(rs, 0);
2a014d73 1996 emit_far_call(get_addr_ht);
d1e4ebd9 1997 emit_jmpreg(0);
1998}
1999
be516ebe 2000static void do_preload_rhash(u_int r) {
2001 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2002 // register. On ARM the hash can be done with a single instruction (below)
2003}
2004
2005static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 2006 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 2007}
2008
2009static void do_rhash(u_int rs,u_int rh) {
2010 emit_andimm(rs, 0xf8, rh);
2011}
2012
d1e4ebd9 2013static void do_miniht_load(int ht, u_int rh) {
2014 emit_add64(ht, rh, ht);
2015 emit_ldst(0, 0, rh, ht, 0);
be516ebe 2016}
2017
d1e4ebd9 2018static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2019 emit_cmp(rh, rs);
2020 void *jaddr = out;
2021 emit_jeq(0);
2022 do_jump_vaddr(rs);
2023
2024 set_jump_target(jaddr, out);
2025 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2026 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2027 emit_jmpreg(ht);
be516ebe 2028}
2029
d1e4ebd9 2030// parsed by set_jump_target?
be516ebe 2031static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2032 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2033 emit_movk(return_address&0xffff,rt);
2034 add_to_linker(out,return_address,1);
2035 emit_adr(out,temp);
2036 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2037 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2038}
2039
919981d0 2040static void clear_cache_arm64(char *start, char *end)
be516ebe 2041{
919981d0 2042 // Don't rely on GCC's __clear_cache implementation, as it caches
2043 // icache/dcache cache line sizes, that can vary between cores on
2044 // big.LITTLE architectures.
2045 uint64_t addr, ctr_el0;
2046 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2047 size_t isize, dsize;
2048
2049 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2050 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2051 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2052
2053 // use the global minimum cache line size
2054 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2055 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2056
2057 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2058 not required for instruction to data coherence. */
2059 if ((ctr_el0 & (1 << 28)) == 0x0) {
2060 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2061 for (; addr < (uint64_t)end; addr += dsize)
2062 // use "civac" instead of "cvau", as this is the suggested workaround for
2063 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2064 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2065 }
919981d0 2066 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2067
919981d0 2068 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2069 Unification is not required for instruction to data coherence. */
2070 if ((ctr_el0 & (1 << 29)) == 0x0) {
2071 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2072 for (; addr < (uint64_t)end; addr += isize)
2073 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2074
2075 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2076 }
919981d0 2077
2078 __asm__ volatile("isb" : : : "memory");
be516ebe 2079}
2080
2081// CPU-architecture-specific initialization
2a014d73 2082static void arch_init(void)
2083{
2084 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2085 struct tramp_insns *ops = ndrc->tramp.ops;
2086 size_t i;
2087 assert(!(diff & 3));
2088 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2089 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2090 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2091 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2092 }
2093 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2094}
2095
2096// vim:shiftwidth=2:expandtab