drc: implement cycle reload on read
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26/* Linker */
d1e4ebd9 27static void set_jump_target(void *addr, void *target)
be516ebe 28{
d9e2b173 29 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 30 intptr_t offset = (u_char *)target - (u_char *)addr;
31
3968e69e 32 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 33 assert(offset>=-134217728LL&&offset<134217728LL);
34 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
35 }
3968e69e 36 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
37 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 38 // Conditional branch are limited to +/- 1MB
39 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 40 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 41 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 42 assert(-1048576 <= offset && offset < 1048576);
4a2e3735 43 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
d1e4ebd9 44 }
3968e69e 45 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 46 // generated by do_miniht_insert
47 assert(offset>=-1048576LL&&offset<1048576LL);
48 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
49 }
50 else
3968e69e 51 abort(); // should not happen
be516ebe 52}
53
54// from a pointer to external jump stub (which was produced by emit_extjump2)
55// find where the jumping insn is
56static void *find_extjump_insn(void *stub)
57{
d1e4ebd9 58 int *ptr = (int *)stub + 2;
59 assert((*ptr&0x9f000000) == 0x10000000); // adr
60 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
61 return ptr + offset / 4;
be516ebe 62}
63
104df9d3 64#if 0
be516ebe 65// find where external branch is liked to using addr of it's stub:
3968e69e 66// get address that the stub loads (dyna_linker arg1),
be516ebe 67// treat it as a pointer to branch insn,
68// return addr where that branch jumps to
69static void *get_pointer(void *stub)
70{
d1e4ebd9 71 int *i_ptr = find_extjump_insn(stub);
3968e69e 72 if ((*i_ptr&0xfc000000) == 0x14000000) // b
73 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
74 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
75 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
76 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 77 assert(0);
78 return NULL;
79}
104df9d3 80#endif
be516ebe 81
be516ebe 82// Allocate a specific ARM register.
83static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
84{
85 int n;
86 int dirty=0;
87
88 // see if it's already allocated (and dealloc it)
89 for(n=0;n<HOST_REGS;n++)
90 {
91 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
92 dirty=(cur->dirty>>n)&1;
93 cur->regmap[n]=-1;
94 }
95 }
96
97 cur->regmap[hr]=reg;
98 cur->dirty&=~(1<<hr);
99 cur->dirty|=dirty<<hr;
100 cur->isconst&=~(1<<hr);
101}
102
103// Alloc cycle count into dedicated register
90f98e7c 104static void alloc_cc(struct regstat *cur, int i)
be516ebe 105{
90f98e7c 106 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
107}
108
109static void alloc_cc_optional(struct regstat *cur, int i)
110{
111 if (cur->regmap[HOST_CCREG] < 0) {
112 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
113 cur->noevict &= ~(1u << HOST_CCREG);
114 }
be516ebe 115}
116
117/* Special alloc */
118
119
120/* Assembler */
121
122static unused const char *regname[32] = {
d1e4ebd9 123 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
124 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
125 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
126 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
127};
128
129static unused const char *regname64[32] = {
130 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
131 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
132 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
133 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
134};
135
136enum {
137 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
138 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
139};
140
141static unused const char *condname[16] = {
142 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
143 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 144};
145
be516ebe 146static void output_w32(u_int word)
147{
d9e2b173 148 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 149 out += 4;
150}
151
3968e69e 152static u_int rn_rd(u_int rn, u_int rd)
153{
154 assert(rn < 31);
155 assert(rd < 31);
156 return (rn << 5) | rd;
157}
158
be516ebe 159static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
160{
d1e4ebd9 161 assert(rm < 32);
162 assert(rn < 32);
163 assert(rd < 32);
be516ebe 164 return (rm << 16) | (rn << 5) | rd;
165}
166
3968e69e 167static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
168{
169 assert(ra < 32);
170 return rm_rn_rd(rm, rn, rd) | (ra << 10);
171}
172
d1e4ebd9 173static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
174{
175 assert(imm7 < 0x80);
176 assert(rt2 < 31);
177 assert(rn < 32);
178 assert(rt < 31);
179 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
180}
181
687b4580 182static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
183{
184 assert(imm6 <= 63);
185 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
186}
187
be516ebe 188static u_int imm16_rd(u_int imm16, u_int rd)
189{
190 assert(imm16 < 0x10000);
191 assert(rd < 31);
192 return (imm16 << 5) | rd;
193}
194
687b4580 195static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
196{
197 assert(imm12 < 0x1000);
d1e4ebd9 198 assert(rn < 32);
199 assert(rd < 32);
200 return (imm12 << 10) | (rn << 5) | rd;
201}
202
203static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
204{
205 assert(imm9 < 0x200);
687b4580 206 assert(rn < 31);
207 assert(rd < 31);
d1e4ebd9 208 return (imm9 << 12) | (rn << 5) | rd;
687b4580 209}
210
d1e4ebd9 211static u_int imm19_rt(u_int imm19, u_int rt)
212{
213 assert(imm19 < 0x80000);
214 assert(rt < 31);
215 return (imm19 << 5) | rt;
216}
217
218static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
219{
220 assert(n < 2);
221 assert(immr < 0x40);
222 assert(imms < 0x40);
223 assert(rn < 32);
224 assert(rd < 32);
225 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
226}
227
228static u_int genjmp(const u_char *addr)
be516ebe 229{
230 intptr_t offset = addr - out;
d1e4ebd9 231 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 232 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 233 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
234 abort();
be516ebe 235 return 0;
236 }
d1e4ebd9 237 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 238}
239
d1e4ebd9 240static u_int genjmpcc(const u_char *addr)
be516ebe 241{
242 intptr_t offset = addr - out;
d1e4ebd9 243 if ((uintptr_t)addr < 3) return 0;
be516ebe 244 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
247 return 0;
248 }
249 return ((u_int)offset >> 2) & 0x7ffff;
250}
251
252static uint32_t is_mask(u_int value)
253{
254 return value && ((value + 1) & value) == 0;
255}
256
257// This function returns true if the argument contains a
258// non-empty sequence of ones (possibly rotated) with the remainder zero.
259static uint32_t is_rotated_mask(u_int value)
260{
3968e69e 261 if (value == 0 || value == ~0)
be516ebe 262 return 0;
d1e4ebd9 263 if (is_mask((value - 1) | value))
264 return 1;
265 return is_mask((~value - 1) | ~value);
266}
267
268static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
269{
270 int lzeros, tzeros, ones;
271 assert(value != 0);
272 if (is_mask((value - 1) | value)) {
273 lzeros = __builtin_clz(value);
274 tzeros = __builtin_ctz(value);
275 ones = 32 - lzeros - tzeros;
276 *immr = (32 - tzeros) & 31;
277 *imms = ones - 1;
278 return;
be516ebe 279 }
d1e4ebd9 280 value = ~value;
281 if (is_mask((value - 1) | value)) {
282 lzeros = __builtin_clz(value);
283 tzeros = __builtin_ctz(value);
284 ones = 32 - lzeros - tzeros;
3968e69e 285 *immr = lzeros;
d1e4ebd9 286 *imms = 31 - ones;
287 return;
288 }
3968e69e 289 abort();
be516ebe 290}
291
292static void emit_mov(u_int rs, u_int rt)
293{
687b4580 294 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 295 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
296}
297
298static void emit_mov64(u_int rs, u_int rt)
299{
300 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
301 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 302}
303
687b4580 304static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 305{
d1e4ebd9 306 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
307 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 308}
309
a5cd72d0 310static void emit_adds(u_int rs1, u_int rs2, u_int rt)
311{
312 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
313 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
314}
315
d1e4ebd9 316static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 317{
d1e4ebd9 318 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
319 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 320}
321
d1e4ebd9 322static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 323{
3968e69e 324 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 325 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
326}
39b71d9a 327#define emit_adds_ptr emit_adds64
d1e4ebd9 328
a5cd72d0 329static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
330{
331 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
332 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
333}
334
d1e4ebd9 335static void emit_neg(u_int rs, u_int rt)
336{
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 339}
340
a5cd72d0 341static void emit_negs(u_int rs, u_int rt)
342{
343 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
344 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
345}
346
687b4580 347static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 348{
d1e4ebd9 349 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 350 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 351}
352
a5cd72d0 353static void emit_subs(u_int rs1, u_int rs2, u_int rt)
354{
355 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
356 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
357}
358
359static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
3968e69e 360{
361 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
362 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
363}
364
d1e4ebd9 365static void emit_movz(u_int imm, u_int rt)
be516ebe 366{
d1e4ebd9 367 assem_debug("movz %s,#%#x\n", regname[rt], imm);
368 output_w32(0x52800000 | imm16_rd(imm, rt));
369}
370
371static void emit_movz_lsl16(u_int imm, u_int rt)
372{
373 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x52a00000 | imm16_rd(imm, rt));
375}
376
377static void emit_movn(u_int imm, u_int rt)
378{
379 assem_debug("movn %s,#%#x\n", regname[rt], imm);
380 output_w32(0x12800000 | imm16_rd(imm, rt));
381}
382
383static void emit_movn_lsl16(u_int imm,u_int rt)
384{
385 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
386 output_w32(0x12a00000 | imm16_rd(imm, rt));
387}
388
389static void emit_movk(u_int imm,u_int rt)
390{
391 assem_debug("movk %s,#%#x\n", regname[rt], imm);
392 output_w32(0x72800000 | imm16_rd(imm, rt));
393}
394
395static void emit_movk_lsl16(u_int imm,u_int rt)
396{
397 assert(imm<65536);
3968e69e 398 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 399 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 400}
401
402static void emit_zeroreg(u_int rt)
403{
d1e4ebd9 404 emit_movz(0, rt);
be516ebe 405}
406
be516ebe 407static void emit_movimm(u_int imm, u_int rt)
408{
d1e4ebd9 409 if (imm < 65536)
410 emit_movz(imm, rt);
411 else if ((~imm) < 65536)
412 emit_movn(~imm, rt);
413 else if ((imm&0xffff) == 0)
414 emit_movz_lsl16(imm >> 16, rt);
415 else if (((~imm)&0xffff) == 0)
416 emit_movn_lsl16(~imm >> 16, rt);
417 else if (is_rotated_mask(imm)) {
418 u_int immr, imms;
419 gen_logical_imm(imm, &immr, &imms);
420 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
421 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
422 }
be516ebe 423 else {
d1e4ebd9 424 emit_movz(imm & 0xffff, rt);
425 emit_movk_lsl16(imm >> 16, rt);
be516ebe 426 }
427}
428
aaece508 429static void emit_movimm64(uint64_t imm, u_int rt)
430{
431 u_int shift, op, imm16, insns = 0;
432 for (shift = 0; shift < 4; shift++) {
433 imm16 = (imm >> shift * 16) & 0xffff;
434 if (!imm16)
435 continue;
436 op = insns ? 0xf2800000 : 0xd2800000;
437 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
438 if (shift)
439 assem_debug(",lsl #%u", shift * 16);
440 assem_debug("\n");
441 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
442 insns++;
443 }
444 if (!insns) {
445 assem_debug("movz %s,#0\n", regname64[rt]);
446 output_w32(0xd2800000 | imm16_rd(0, rt));
447 }
448}
449
687b4580 450static void emit_readword(void *addr, u_int rt)
451{
452 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
453 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 454 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 455 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
456 }
457 else
3968e69e 458 abort();
687b4580 459}
460
d1e4ebd9 461static void emit_readdword(void *addr, u_int rt)
462{
463 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
464 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 465 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
d1e4ebd9 466 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
467 }
3968e69e 468 else
469 abort();
470}
39b71d9a 471#define emit_readptr emit_readdword
3968e69e 472
473static void emit_readshword(void *addr, u_int rt)
474{
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 1) && offset <= 8190) {
477 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
478 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
479 }
d1e4ebd9 480 else
481 assert(0);
482}
483
be516ebe 484static void emit_loadreg(u_int r, u_int hr)
485{
d1e4ebd9 486 int is64 = 0;
be516ebe 487 if (r == 0)
488 emit_zeroreg(hr);
489 else {
33788798 490 void *addr;
be516ebe 491 switch (r) {
7c3a5182 492 //case HIREG: addr = &hi; break;
493 //case LOREG: addr = &lo; break;
be516ebe 494 case CCREG: addr = &cycle_count; break;
d1e4ebd9 495 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 496 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 497 default:
498 assert(r < 34);
499 addr = &psxRegs.GPR.r[r];
500 break;
be516ebe 501 }
d1e4ebd9 502 if (is64)
503 emit_readdword(addr, hr);
504 else
505 emit_readword(addr, hr);
be516ebe 506 }
507}
508
687b4580 509static void emit_writeword(u_int rt, void *addr)
510{
511 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
512 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 513 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 514 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
515 }
516 else
517 assert(0);
518}
519
d1e4ebd9 520static void emit_writedword(u_int rt, void *addr)
521{
522 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
523 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 524 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
3968e69e 525 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 526 }
527 else
3968e69e 528 abort();
d1e4ebd9 529}
530
687b4580 531static void emit_storereg(u_int r, u_int hr)
be516ebe 532{
533 assert(r < 64);
7c3a5182 534 void *addr = &psxRegs.GPR.r[r];
be516ebe 535 switch (r) {
7c3a5182 536 //case HIREG: addr = &hi; break;
537 //case LOREG: addr = &lo; break;
be516ebe 538 case CCREG: addr = &cycle_count; break;
7c3a5182 539 default: assert(r < 34); break;
be516ebe 540 }
687b4580 541 emit_writeword(hr, addr);
be516ebe 542}
543
544static void emit_test(u_int rs, u_int rt)
545{
d1e4ebd9 546 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
547 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 548}
549
d1e4ebd9 550static void emit_testimm(u_int rs, u_int imm)
be516ebe 551{
d1e4ebd9 552 u_int immr, imms;
687b4580 553 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 554 assert(is_rotated_mask(imm)); // good enough for PCSX
555 gen_logical_imm(imm, &immr, &imms);
3968e69e 556 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 557}
558
559static void emit_not(u_int rs,u_int rt)
560{
561 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 562 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 563}
564
be516ebe 565static void emit_and(u_int rs1,u_int rs2,u_int rt)
566{
567 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 568 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 569}
570
571static void emit_or(u_int rs1,u_int rs2,u_int rt)
572{
573 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 574 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 575}
576
3968e69e 577static void emit_bic(u_int rs1,u_int rs2,u_int rt)
578{
579 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
580 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
581}
582
be516ebe 583static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
584{
be516ebe 585 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 586 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 587}
588
589static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
590{
be516ebe 591 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 592 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 593}
594
a5cd72d0 595static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
596{
597 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
598 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
599}
600
3968e69e 601static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
602{
603 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
604 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
605}
606
be516ebe 607static void emit_xor(u_int rs1,u_int rs2,u_int rt)
608{
609 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 610 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 611}
612
3968e69e 613static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
614{
615 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
616 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
617}
618
d1e4ebd9 619static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 620{
d1e4ebd9 621 unused const char *st = s ? "s" : "";
622 s = s ? 0x20000000 : 0;
623 is64 = is64 ? 0x80000000 : 0;
687b4580 624 if (imm < 4096) {
d1e4ebd9 625 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
626 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 627 }
628 else if (-imm < 4096) {
3968e69e 629 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 630 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
631 }
a5cd72d0 632 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
633 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
634 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
635 if (imm & 0xfff) {
636 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
637 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 638 }
639 }
a5cd72d0 640 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
641 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
642 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
643 if (-imm & 0xfff) {
644 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
645 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
d1e4ebd9 646 }
687b4580 647 }
a5cd72d0 648 else {
649 u_int tmp = rt;
650 assert(!is64);
651 if (rs == rt) {
652 host_tempreg_acquire();
653 tmp = HOST_TEMPREG;
654 }
655 emit_movimm(imm, tmp);
656 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
657 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
658 if (tmp == HOST_TEMPREG)
659 host_tempreg_release();
660 }
be516ebe 661}
662
d1e4ebd9 663static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
664{
9b495f6e 665 if (imm == 0) {
666 emit_mov(rs, rt);
667 return;
668 }
d1e4ebd9 669 emit_addimm_s(0, 0, rs, imm, rt);
670}
671
672static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
673{
674 emit_addimm_s(0, 1, rs, imm, rt);
675}
676
bc7c5acb 677static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
678{
679 emit_addimm64(rs, imm, rt);
680}
681
be516ebe 682static void emit_addimm_and_set_flags(int imm, u_int rt)
683{
d1e4ebd9 684 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 685}
686
a5cd72d0 687static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
688{
689 emit_addimm_s(1, 0, rs, imm, rt);
690}
691
d1e4ebd9 692static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 693{
d1e4ebd9 694 const char *names[] = { "and", "orr", "eor", "ands" };
695 const char *name = names[op];
696 u_int immr, imms;
697 op = op << 29;
698 if (is_rotated_mask(imm)) {
699 gen_logical_imm(imm, &immr, &imms);
700 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
701 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
702 }
703 else {
704 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
705 host_tempreg_acquire();
706 emit_movimm(imm, HOST_TEMPREG);
707 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
708 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
709 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
710 host_tempreg_release();
711 }
712 (void)name;
be516ebe 713}
714
d1e4ebd9 715static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 716{
d1e4ebd9 717 if (imm == 0)
718 emit_zeroreg(rt);
719 else
720 emit_logicop_imm(0, rs, imm, rt);
be516ebe 721}
722
d1e4ebd9 723static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 724{
d1e4ebd9 725 if (imm == 0) {
726 if (rs != rt)
727 emit_mov(rs, rt);
728 }
729 else
730 emit_logicop_imm(1, rs, imm, rt);
be516ebe 731}
732
d1e4ebd9 733static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 734{
d1e4ebd9 735 if (imm == 0) {
736 if (rs != rt)
737 emit_mov(rs, rt);
738 }
739 else
740 emit_logicop_imm(2, rs, imm, rt);
be516ebe 741}
742
d1e4ebd9 743static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 744{
d1e4ebd9 745 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
746 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 747}
748
d1e4ebd9 749static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 750{
d1e4ebd9 751 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
752 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 753}
754
755static void emit_shlimm(u_int rs,u_int imm,u_int rt)
756{
be516ebe 757 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 758 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 759}
760
3968e69e 761static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 762{
3968e69e 763 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
764 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 765}
766
3968e69e 767static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 768{
be516ebe 769 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 770 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 771}
772
773static void emit_sarimm(u_int rs,u_int imm,u_int rt)
774{
be516ebe 775 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 776 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 777}
778
779static void emit_rorimm(u_int rs,u_int imm,u_int rt)
780{
3968e69e 781 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 782 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 783}
784
785static void emit_signextend16(u_int rs, u_int rt)
786{
787 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 788 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 789}
790
d1e4ebd9 791static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 792{
3968e69e 793 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 794 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 795}
796
d1e4ebd9 797static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 798{
d1e4ebd9 799 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
800 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 801}
802
d1e4ebd9 803static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 804{
d1e4ebd9 805 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
806 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 807}
808
d1e4ebd9 809static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 810{
d1e4ebd9 811 if (imm < 4096) {
812 assem_debug("cmp %s,%#x\n", regname[rs], imm);
813 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
814 }
815 else if (-imm < 4096) {
816 assem_debug("cmn %s,%#x\n", regname[rs], imm);
817 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
818 }
819 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 820 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 821 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
822 }
823 else {
824 host_tempreg_acquire();
825 emit_movimm(imm, HOST_TEMPREG);
826 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
827 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
828 host_tempreg_release();
829 }
be516ebe 830}
831
d1e4ebd9 832static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 833{
d1e4ebd9 834 assert(imm == 0 || imm == 1);
835 assert(cond0 < 0x10);
836 assert(cond1 < 0x10);
837 if (imm) {
838 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
839 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
840 } else {
841 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
842 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
843 }
be516ebe 844}
845
d1e4ebd9 846static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 847{
d1e4ebd9 848 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 849}
850
d1e4ebd9 851static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 852{
d1e4ebd9 853 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 854}
855
856static void emit_cmovb_imm(int imm,u_int rt)
857{
d1e4ebd9 858 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 859}
860
3968e69e 861static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 862{
3968e69e 863 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
864 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 865}
866
867static void emit_cmovne_reg(u_int rs,u_int rt)
868{
d1e4ebd9 869 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
870 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 871}
872
873static void emit_cmovl_reg(u_int rs,u_int rt)
874{
d1e4ebd9 875 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
876 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 877}
878
e3c6bdb5 879static void emit_cmovb_reg(u_int rs,u_int rt)
880{
881 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
882 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
883}
884
be516ebe 885static void emit_cmovs_reg(u_int rs,u_int rt)
886{
d1e4ebd9 887 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
888 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 889}
890
3968e69e 891static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
892{
893 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
895}
896
a5cd72d0 897static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
898{
899 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
901}
902
be516ebe 903static void emit_slti32(u_int rs,int imm,u_int rt)
904{
905 if(rs!=rt) emit_zeroreg(rt);
906 emit_cmpimm(rs,imm);
907 if(rs==rt) emit_movimm(0,rt);
908 emit_cmovl_imm(1,rt);
909}
910
911static void emit_sltiu32(u_int rs,int imm,u_int rt)
912{
913 if(rs!=rt) emit_zeroreg(rt);
914 emit_cmpimm(rs,imm);
915 if(rs==rt) emit_movimm(0,rt);
916 emit_cmovb_imm(1,rt);
917}
918
919static void emit_cmp(u_int rs,u_int rt)
920{
921 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 922 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 923}
924
882a08fc 925static void emit_cmpcs(u_int rs,u_int rt)
926{
927 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
928 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
929}
930
be516ebe 931static void emit_set_gz32(u_int rs, u_int rt)
932{
933 //assem_debug("set_gz32\n");
934 emit_cmpimm(rs,1);
935 emit_movimm(1,rt);
936 emit_cmovl_imm(0,rt);
937}
938
939static void emit_set_nz32(u_int rs, u_int rt)
940{
941 //assem_debug("set_nz32\n");
d1e4ebd9 942 if(rs!=rt) emit_mov(rs,rt);
943 emit_test(rs,rs);
944 emit_cmovne_imm(1,rt);
be516ebe 945}
946
947static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
948{
949 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
950 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
951 emit_cmp(rs1,rs2);
952 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
953 emit_cmovl_imm(1,rt);
954}
955
956static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
957{
958 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
959 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
960 emit_cmp(rs1,rs2);
961 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
962 emit_cmovb_imm(1,rt);
963}
964
2a014d73 965static int can_jump_or_call(const void *a)
966{
967 intptr_t diff = (u_char *)a - out;
968 return (-134217728 <= diff && diff <= 134217727);
969}
970
d1e4ebd9 971static void emit_call(const void *a)
be516ebe 972{
d1e4ebd9 973 intptr_t diff = (u_char *)a - out;
974 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 975 assert(!(diff & 3));
976 if (-134217728 <= diff && diff <= 134217727)
977 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
978 else
3968e69e 979 abort();
be516ebe 980}
981
d1e4ebd9 982static void emit_jmp(const void *a)
be516ebe 983{
d1e4ebd9 984 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
985 u_int offset = genjmp(a);
986 output_w32(0x14000000 | offset);
be516ebe 987}
988
d1e4ebd9 989static void emit_jne(const void *a)
be516ebe 990{
d1e4ebd9 991 assem_debug("bne %p\n", a);
992 u_int offset = genjmpcc(a);
993 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 994}
995
7c3a5182 996static void emit_jeq(const void *a)
be516ebe 997{
d1e4ebd9 998 assem_debug("beq %p\n", a);
999 u_int offset = genjmpcc(a);
1000 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 1001}
1002
7c3a5182 1003static void emit_js(const void *a)
be516ebe 1004{
d1e4ebd9 1005 assem_debug("bmi %p\n", a);
1006 u_int offset = genjmpcc(a);
1007 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 1008}
1009
7c3a5182 1010static void emit_jns(const void *a)
be516ebe 1011{
d1e4ebd9 1012 assem_debug("bpl %p\n", a);
1013 u_int offset = genjmpcc(a);
1014 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 1015}
1016
7c3a5182 1017static void emit_jl(const void *a)
be516ebe 1018{
d1e4ebd9 1019 assem_debug("blt %p\n", a);
1020 u_int offset = genjmpcc(a);
1021 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 1022}
1023
7c3a5182 1024static void emit_jge(const void *a)
be516ebe 1025{
d1e4ebd9 1026 assem_debug("bge %p\n", a);
1027 u_int offset = genjmpcc(a);
1028 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 1029}
1030
a5cd72d0 1031static void emit_jo(const void *a)
1032{
1033 assem_debug("bvs %p\n", a);
1034 u_int offset = genjmpcc(a);
1035 output_w32(0x54000000 | (offset << 5) | COND_VS);
1036}
1037
7c3a5182 1038static void emit_jno(const void *a)
be516ebe 1039{
d1e4ebd9 1040 assem_debug("bvc %p\n", a);
1041 u_int offset = genjmpcc(a);
1042 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 1043}
1044
7c3a5182 1045static void emit_jc(const void *a)
be516ebe 1046{
d1e4ebd9 1047 assem_debug("bcs %p\n", a);
1048 u_int offset = genjmpcc(a);
1049 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 1050}
1051
3968e69e 1052static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 1053{
3968e69e 1054 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 1055 u_int offset = genjmpcc(a);
3968e69e 1056 is64 = is64 ? 0x80000000 : 0;
1057 isnz = isnz ? 0x01000000 : 0;
1058 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1059}
1060
9b495f6e 1061static void *emit_cbz(u_int r, const void *a)
3968e69e 1062{
9b495f6e 1063 void *ret = out;
3968e69e 1064 emit_cb(0, 0, a, r);
9b495f6e 1065 return ret;
be516ebe 1066}
1067
1068static void emit_jmpreg(u_int r)
1069{
3968e69e 1070 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 1071 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1072}
1073
1074static void emit_retreg(u_int r)
1075{
d1e4ebd9 1076 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1077 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1078}
1079
1080static void emit_ret(void)
1081{
1082 emit_retreg(LR);
1083}
1084
d1e4ebd9 1085static void emit_adr(void *addr, u_int rt)
1086{
1087 intptr_t offset = (u_char *)addr - out;
1088 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1089 assert(rt < 31);
d1e4ebd9 1090 assem_debug("adr x%d,#%#lx\n", rt, offset);
1091 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1092}
1093
3968e69e 1094static void emit_adrp(void *addr, u_int rt)
1095{
1096 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1097 assert(-4294967296l <= offset && offset < 4294967296l);
1098 assert(rt < 31);
1099 offset >>= 12;
1100 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1101 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1102}
1103
be516ebe 1104static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1105{
d1e4ebd9 1106 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1107 assert(-256 <= offset && offset < 256);
1108 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1109}
1110
1111static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1112{
1113 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1114 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1115}
1116
1117static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1118{
1119 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1120 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1121}
1122
1123static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1124{
1125 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1126 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1127}
1128
1129static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1130{
1131 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1132 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1133}
39b71d9a 1134#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1135
1136static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1137{
1138 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1139 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1140}
1141
1142static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1143{
1144 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1145 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1146}
1147
1148static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1149{
1150 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1151 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1152}
1153
1154static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1155{
1156 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1157 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1158}
1159
1160static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1161{
1162 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1163 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1164}
1165
be516ebe 1166static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1167{
d1e4ebd9 1168 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1169 assert(-256 <= offset && offset < 256);
1170 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1171}
1172
1173static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1174{
d1e4ebd9 1175 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1176 assert(-256 <= offset && offset < 256);
1177 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1178}
1179
1180static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1181{
d1e4ebd9 1182 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1183 assert(-256 <= offset && offset < 256);
1184 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1185}
1186
1187static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1188{
d1e4ebd9 1189 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1190 assert(-256 <= offset && offset < 256);
1191 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1192}
1193
be516ebe 1194static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1195{
3968e69e 1196 if (!(offset & 3) && (u_int)offset <= 16380) {
1197 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1198 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1199 }
1200 else if (-256 <= offset && offset < 256) {
1201 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1202 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1203 }
687b4580 1204 else
1205 assert(0);
be516ebe 1206}
1207
1208static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1209{
3968e69e 1210 if (!(offset & 1) && (u_int)offset <= 8190) {
1211 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1212 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1213 }
1214 else if (-256 <= offset && offset < 256) {
1215 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1216 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1217 }
687b4580 1218 else
1219 assert(0);
be516ebe 1220}
1221
1222static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1223{
3968e69e 1224 if ((u_int)offset < 4096) {
1225 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1226 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1227 }
1228 else if (-256 <= offset && offset < 256) {
1229 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1230 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1231 }
687b4580 1232 else
1233 assert(0);
be516ebe 1234}
1235
3968e69e 1236static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1237{
3968e69e 1238 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1239 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1240}
1241
3968e69e 1242static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1243{
3968e69e 1244 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1245 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1246}
1247
1248static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1249{
1250 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1251 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1252}
1253
1254static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1255{
1256 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1257 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1258}
1259
3968e69e 1260static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1261{
1262 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1263 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1264}
1265
1266static void emit_clz(u_int rs, u_int rt)
be516ebe 1267{
1268 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1269 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1270}
1271
be516ebe 1272// special case for checking invalid_code
9b495f6e 1273static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1274{
1275 emit_shrimm(r, 12, rt);
1276 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1277 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
be516ebe 1278}
1279
3968e69e 1280// special for loadlr_assemble, rs2 is destroyed
1281static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1282{
3968e69e 1283 emit_shl(rs2, shift, rs2);
1284 emit_bic(rs1, rs2, rt);
be516ebe 1285}
1286
3968e69e 1287static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1288{
3968e69e 1289 emit_shr(rs2, shift, rs2);
1290 emit_bic(rs1, rs2, rt);
be516ebe 1291}
1292
687b4580 1293static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1294{
687b4580 1295 u_int op = 0xb9000000;
d1e4ebd9 1296 unused const char *ldst = is_st ? "st" : "ld";
1297 unused char rp = is64 ? 'x' : 'w';
687b4580 1298 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1299 is64 = is64 ? 1 : 0;
1300 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1301 ofs = (ofs >> (2+is64));
687b4580 1302 if (!is_st) op |= 0x00400000;
1303 if (is64) op |= 0x40000000;
d1e4ebd9 1304 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1305}
1306
687b4580 1307static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1308{
687b4580 1309 u_int op = 0x29000000;
d1e4ebd9 1310 unused const char *ldst = is_st ? "st" : "ld";
1311 unused char rp = is64 ? 'x' : 'w';
687b4580 1312 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1313 is64 = is64 ? 1 : 0;
1314 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1315 ofs = (ofs >> (2+is64));
1316 assert(-64 <= ofs && ofs <= 63);
1317 ofs &= 0x7f;
1318 if (!is_st) op |= 0x00400000;
1319 if (is64) op |= 0x80000000;
d1e4ebd9 1320 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1321}
1322
1323static void save_load_regs_all(int is_store, u_int reglist)
1324{
1325 int ofs = 0, c = 0;
1326 u_int r, pair[2];
1327 for (r = 0; reglist; r++, reglist >>= 1) {
1328 if (reglist & 1)
1329 pair[c++] = r;
1330 if (c == 2) {
1331 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1332 ofs += 8 * 2;
1333 c = 0;
1334 }
1335 }
1336 if (c) {
1337 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1338 ofs += 8;
1339 }
1340 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1341}
1342
1343// Save registers before function call
1344static void save_regs(u_int reglist)
1345{
1346 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1347 save_load_regs_all(1, reglist);
be516ebe 1348}
1349
1350// Restore registers after function call
1351static void restore_regs(u_int reglist)
1352{
1353 reglist &= CALLER_SAVE_REGS;
687b4580 1354 save_load_regs_all(0, reglist);
be516ebe 1355}
1356
1357/* Stubs/epilogue */
1358
1359static void literal_pool(int n)
1360{
1361 (void)literals;
1362}
1363
1364static void literal_pool_jumpover(int n)
1365{
1366}
1367
d1e4ebd9 1368// parsed by get_pointer, find_extjump_insn
104df9d3 1369static void emit_extjump(u_char *addr, u_int target)
be516ebe 1370{
d1e4ebd9 1371 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1372
d1e4ebd9 1373 emit_movz(target & 0xffff, 0);
1374 emit_movk_lsl16(target >> 16, 0);
1375
1376 // addr is in the current recompiled block (max 256k)
1377 // offset shouldn't exceed +/-1MB
1378 emit_adr(addr, 1);
104df9d3 1379 emit_far_jump(dyna_linker);
be516ebe 1380}
1381
d1e4ebd9 1382static void check_extjump2(void *src)
be516ebe 1383{
d1e4ebd9 1384 u_int *ptr = src;
1385 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1386 (void)ptr;
be516ebe 1387}
1388
1389// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1390static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1391{
d1e4ebd9 1392 int diff = rt_val - rs_val;
3968e69e 1393 if ((-4096 < diff && diff < 4096)
1394 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1395 emit_addimm(rs, diff, rt);
3968e69e 1396 else if (rt_val == ~rs_val)
1397 emit_not(rs, rt);
d1e4ebd9 1398 else if (is_rotated_mask(rs_val ^ rt_val))
1399 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1400 else
d1e4ebd9 1401 emit_movimm(rt_val, rt);
be516ebe 1402}
1403
d1e4ebd9 1404// return 1 if the above function can do it's job cheaply
687b4580 1405static int is_similar_value(u_int v1, u_int v2)
be516ebe 1406{
687b4580 1407 int diff = v1 - v2;
3968e69e 1408 return (-4096 < diff && diff < 4096)
1409 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1410 || v1 == ~v2
d1e4ebd9 1411 || is_rotated_mask(v1 ^ v2);
1412}
1413
37387d8b 1414static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1415{
1416 if (rt_val < 0x100000000ull) {
1417 emit_movimm_from(rs_val, rs, rt_val, rt);
1418 return;
1419 }
1420 // just move the whole thing. At least on Linux all addresses
1421 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1422 emit_movimm64(rt_val, rt);
37387d8b 1423}
1424
1425// trashes x2
d1e4ebd9 1426static void pass_args64(u_int a0, u_int a1)
1427{
1428 if(a0==1&&a1==0) {
1429 // must swap
1430 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1431 }
1432 else if(a0!=0&&a1==0) {
1433 emit_mov64(a1,1);
1434 if (a0>=0) emit_mov64(a0,0);
1435 }
1436 else {
1437 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1438 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1439 }
be516ebe 1440}
1441
d1e4ebd9 1442static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1443{
1444 switch(type) {
1445 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1446 case LOADBU_STUB:
1447 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1448 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1449 case LOADHU_STUB:
1450 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1451 case LOADW_STUB:
1452 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1453 default: assert(0);
d1e4ebd9 1454 }
1455}
1456
1457#include "pcsxmem.h"
be516ebe 1458//#include "pcsxmem_inline.c"
1459
1460static void do_readstub(int n)
1461{
1462 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1463 set_jump_target(stubs[n].addr, out);
1464 enum stub_type type = stubs[n].type;
1465 int i = stubs[n].a;
1466 int rs = stubs[n].b;
1467 const struct regstat *i_regs = (void *)stubs[n].c;
7da5c7ad 1468 int adj = (int)stubs[n].d;
d1e4ebd9 1469 u_int reglist = stubs[n].e;
1470 const signed char *i_regmap = i_regs->regmap;
1471 int rt;
a5cd72d0 1472 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1473 rt=get_reg(i_regmap,FTEMP);
1474 }else{
cf95b4f0 1475 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1476 }
1477 assert(rs>=0);
1478 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1479 void *restore_jump = NULL, *handler_jump = NULL;
1480 reglist|=(1<<rs);
1481 for (r = 0; r < HOST_CCREG; r++) {
1482 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1483 temp = r;
1484 break;
1485 }
1486 }
cf95b4f0 1487 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1488 reglist&=~(1<<rt);
1489 if(temp==-1) {
1490 save_regs(reglist);
1491 regs_saved=1;
1492 temp=(rs==0)?2:0;
1493 }
1494 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1495 temp2=1;
1496 emit_readdword(&mem_rtab,temp);
1497 emit_shrimm(rs,12,temp2);
1498 emit_readdword_dualindexedx8(temp,temp2,temp2);
1499 emit_adds64(temp2,temp2,temp2);
1500 handler_jump=out;
1501 emit_jc(0);
a5cd72d0 1502 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1503 switch(type) {
1504 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1505 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1506 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1507 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1508 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1509 default: assert(0);
d1e4ebd9 1510 }
1511 }
1512 if(regs_saved) {
1513 restore_jump=out;
1514 emit_jmp(0); // jump to reg restore
1515 }
1516 else
1517 emit_jmp(stubs[n].retaddr); // return address
1518 set_jump_target(handler_jump, out);
1519
1520 if(!regs_saved)
1521 save_regs(reglist);
1522 void *handler=NULL;
1523 if(type==LOADB_STUB||type==LOADBU_STUB)
1524 handler=jump_handler_read8;
1525 if(type==LOADH_STUB||type==LOADHU_STUB)
1526 handler=jump_handler_read16;
1527 if(type==LOADW_STUB)
1528 handler=jump_handler_read32;
1529 assert(handler);
1530 pass_args64(rs,temp2);
7da5c7ad 1531 int cc, cc_use;
1532 cc = cc_use = get_reg(i_regmap, CCREG);
1533 if (cc < 0)
1534 emit_loadreg(CCREG, (cc_use = 2));
1535 emit_addimm(cc_use, adj, 2);
1536
2a014d73 1537 emit_far_call(handler);
7da5c7ad 1538
1539#if 0
1540 // cycle reload for read32 only (value in w2 both in and out)
1541 if (type == LOADW_STUB) {
1542 emit_addimm(2, -adj, cc_use);
1543 if (cc < 0)
1544 emit_storereg(CCREG, cc_use);
1545 }
1546#endif
a5cd72d0 1547 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1548 loadstore_extend(type,0,rt);
1549 }
1550 if(restore_jump)
1551 set_jump_target(restore_jump, out);
1552 restore_regs(reglist);
1553 emit_jmp(stubs[n].retaddr);
be516ebe 1554}
1555
81dbbf4c 1556static void inline_readstub(enum stub_type type, int i, u_int addr,
1557 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1558{
277718fa 1559 int ra = cinfo[i].addr;
1560 int rt = get_reg(regmap, target);
1561 assert(ra >= 0);
d1e4ebd9 1562 u_int is_dynamic=0;
1563 uintptr_t host_addr = 0;
1564 void *handler;
7da5c7ad 1565 int cc, cc_use;
1566 cc = cc_use = get_reg(regmap, CCREG);
277718fa 1567 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
d1e4ebd9 1568 // return;
1569 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1570 if (handler == NULL) {
cf95b4f0 1571 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1572 return;
37387d8b 1573 if (addr != host_addr)
277718fa 1574 emit_movimm_from64(addr, ra, host_addr, ra);
d1e4ebd9 1575 switch(type) {
277718fa 1576 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1577 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1578 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1579 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1580 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
d1e4ebd9 1581 default: assert(0);
1582 }
1583 return;
1584 }
37387d8b 1585 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1586 if (is_dynamic) {
d1e4ebd9 1587 if(type==LOADB_STUB||type==LOADBU_STUB)
1588 handler=jump_handler_read8;
1589 if(type==LOADH_STUB||type==LOADHU_STUB)
1590 handler=jump_handler_read16;
1591 if(type==LOADW_STUB)
1592 handler=jump_handler_read32;
1593 }
1594
1595 // call a memhandler
cf95b4f0 1596 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1597 reglist&=~(1<<rt);
1598 save_regs(reglist);
1599 if(target==0)
1600 emit_movimm(addr,0);
277718fa 1601 else if(ra!=0)
1602 emit_mov(ra,0);
7da5c7ad 1603 if (cc < 0)
1604 emit_loadreg(CCREG, (cc_use = 2));
1605 emit_addimm(cc_use, adj, 2);
3968e69e 1606 if(is_dynamic) {
1607 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1608 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1609 if (-4294967296l <= offset && offset < 4294967296l) {
1610 emit_adrp((void *)l1, 1);
1611 emit_addimm64(1, l1 & 0xfff, 1);
1612 }
1613 else
1614 emit_movimm64(l1, 1);
3968e69e 1615 }
d1e4ebd9 1616 else
2a014d73 1617 emit_far_call(do_memhandler_pre);
d1e4ebd9 1618
2a014d73 1619 emit_far_call(handler);
d1e4ebd9 1620
7da5c7ad 1621#if 0
1622 // cycle reload for read32 only (value in w2 both in and out)
1623 if (type == LOADW_STUB) {
1624 if (!is_dynamic)
1625 emit_far_call(do_memhandler_post);
1626 emit_addimm(2, -adj, cc_use);
1627 if (cc < 0)
1628 emit_storereg(CCREG, cc_use);
1629 }
1630#endif
cf95b4f0 1631 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1632 loadstore_extend(type, 0, rt);
1633 restore_regs(reglist);
be516ebe 1634}
1635
1636static void do_writestub(int n)
1637{
1638 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1639 set_jump_target(stubs[n].addr, out);
1640 enum stub_type type=stubs[n].type;
1641 int i=stubs[n].a;
1642 int rs=stubs[n].b;
1643 struct regstat *i_regs=(struct regstat *)stubs[n].c;
7da5c7ad 1644 int adj = (int)stubs[n].d;
d1e4ebd9 1645 u_int reglist=stubs[n].e;
1646 signed char *i_regmap=i_regs->regmap;
1647 int rt,r;
a5cd72d0 1648 if(dops[i].itype==C2LS) {
d1e4ebd9 1649 rt=get_reg(i_regmap,r=FTEMP);
1650 }else{
cf95b4f0 1651 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1652 }
1653 assert(rs>=0);
1654 assert(rt>=0);
1655 int rtmp,temp=-1,temp2,regs_saved=0;
1656 void *restore_jump = NULL, *handler_jump = NULL;
1657 int reglist2=reglist|(1<<rs)|(1<<rt);
1658 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1659 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1660 temp = rtmp;
1661 break;
1662 }
1663 }
1664 if(temp==-1) {
1665 save_regs(reglist);
1666 regs_saved=1;
1667 for(rtmp=0;rtmp<=3;rtmp++)
1668 if(rtmp!=rs&&rtmp!=rt)
1669 {temp=rtmp;break;}
1670 }
1671 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1672 temp2=3;
1673 else {
1674 host_tempreg_acquire();
1675 temp2=HOST_TEMPREG;
1676 }
1677 emit_readdword(&mem_wtab,temp);
1678 emit_shrimm(rs,12,temp2);
1679 emit_readdword_dualindexedx8(temp,temp2,temp2);
1680 emit_adds64(temp2,temp2,temp2);
1681 handler_jump=out;
1682 emit_jc(0);
1683 switch(type) {
1684 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1685 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1686 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1687 default: assert(0);
1688 }
1689 if(regs_saved) {
1690 restore_jump=out;
1691 emit_jmp(0); // jump to reg restore
1692 }
1693 else
1694 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1695 set_jump_target(handler_jump, out);
1696
d1e4ebd9 1697 if(!regs_saved)
1698 save_regs(reglist);
1699 void *handler=NULL;
1700 switch(type) {
1701 case STOREB_STUB: handler=jump_handler_write8; break;
1702 case STOREH_STUB: handler=jump_handler_write16; break;
1703 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1704 default: assert(0);
d1e4ebd9 1705 }
1706 assert(handler);
1707 pass_args(rs,rt);
1708 if(temp2!=3) {
1709 emit_mov64(temp2,3);
1710 host_tempreg_release();
1711 }
7da5c7ad 1712 int cc, cc_use;
1713 cc = cc_use = get_reg(i_regmap, CCREG);
1714 if (cc < 0)
1715 emit_loadreg(CCREG, (cc_use = 2));
1716 emit_addimm(cc_use, adj, 2);
1717
2a014d73 1718 emit_far_call(handler);
7da5c7ad 1719
1720 // new cycle_count returned in x2
1721 emit_addimm(2, -adj, cc_use);
1722 if (cc < 0)
1723 emit_storereg(CCREG, cc_use);
1724 if (restore_jump)
d1e4ebd9 1725 set_jump_target(restore_jump, out);
1726 restore_regs(reglist);
1727 emit_jmp(stubs[n].retaddr);
be516ebe 1728}
1729
81dbbf4c 1730static void inline_writestub(enum stub_type type, int i, u_int addr,
1731 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1732{
277718fa 1733 int ra = cinfo[i].addr;
687b4580 1734 int rt = get_reg(regmap,target);
277718fa 1735 assert(ra >= 0);
687b4580 1736 assert(rt >= 0);
1737 uintptr_t host_addr = 0;
1738 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1739 if (handler == NULL) {
37387d8b 1740 if (addr != host_addr)
277718fa 1741 emit_movimm_from64(addr, ra, host_addr, ra);
d1e4ebd9 1742 switch (type) {
277718fa 1743 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1744 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1745 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
687b4580 1746 default: assert(0);
1747 }
1748 return;
1749 }
1750
1751 // call a memhandler
1752 save_regs(reglist);
277718fa 1753 emit_writeword(ra, &address); // some handlers still need it
d1e4ebd9 1754 loadstore_extend(type, rt, 0);
1755 int cc, cc_use;
1756 cc = cc_use = get_reg(regmap, CCREG);
1757 if (cc < 0)
1758 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1759 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1760
2a014d73 1761 emit_far_call(do_memhandler_pre);
1762 emit_far_call(handler);
1763 emit_far_call(do_memhandler_post);
7da5c7ad 1764 emit_addimm(2, -adj, cc_use);
d1e4ebd9 1765 if (cc < 0)
1766 emit_storereg(CCREG, cc_use);
687b4580 1767 restore_regs(reglist);
be516ebe 1768}
1769
3968e69e 1770/* Special assem */
1771
81dbbf4c 1772static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1773{
1774 save_load_regs_all(1, reglist);
32631e6a 1775 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1776#ifdef PCNT
1777 emit_movimm(op, 0);
2a014d73 1778 emit_far_call(pcnt_gte_start);
3968e69e 1779#endif
1780 // pointer to cop2 regs
1781 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1782}
1783
1784static void c2op_epilogue(u_int op,u_int reglist)
1785{
1786#ifdef PCNT
1787 emit_movimm(op, 0);
2a014d73 1788 emit_far_call(pcnt_gte_end);
3968e69e 1789#endif
1790 save_load_regs_all(0, reglist);
be516ebe 1791}
1792
81dbbf4c 1793static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1794{
3968e69e 1795 u_int c2op=source[i]&0x3f;
1796 u_int hr,reglist_full=0,reglist;
1797 int need_flags,need_ir;
1798 for(hr=0;hr<HOST_REGS;hr++) {
1799 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1800 }
1801 reglist=reglist_full&CALLER_SAVE_REGS;
1802
1803 if (gte_handlers[c2op]!=NULL) {
1804 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1805 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1806 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1807 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1808 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1809 need_flags=0;
1810 //int shift = (source[i] >> 19) & 1;
1811 //int lm = (source[i] >> 10) & 1;
1812 switch(c2op) {
1813 default:
1814 (void)need_ir;
81dbbf4c 1815 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1816 emit_movimm(source[i],1); // opcode
1817 emit_writeword(1,&psxRegs.code);
2a014d73 1818 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1819 break;
1820 }
1821 c2op_epilogue(c2op,reglist);
1822 }
1823}
1824
1825static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1826{
1827 //value = value & 0x7ffff000;
1828 //if (value & 0x7f87e000) value |= 0x80000000;
1829 emit_andimm(sl, 0x7fffe000, temp);
1830 emit_testimm(temp, 0xff87ffff);
1831 emit_andimm(sl, 0x7ffff000, temp);
1832 host_tempreg_acquire();
1833 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1834 emit_cmovne_reg(HOST_TEMPREG, temp);
1835 host_tempreg_release();
1836 assert(0); // testing needed
1837}
1838
1839static void do_mfc2_31_one(u_int copr,signed char temp)
1840{
1841 emit_readshword(&reg_cop2d[copr],temp);
1842 emit_bicsar_imm(temp,31,temp);
1843 emit_cmpimm(temp,0xf80);
1844 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1845 emit_andimm(temp,0xf80,temp);
1846}
1847
1848static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1849{
1850 if (temp < 0) {
1851 host_tempreg_acquire();
1852 temp = HOST_TEMPREG;
1853 }
1854 do_mfc2_31_one(9,temp);
1855 emit_shrimm(temp,7,tl);
1856 do_mfc2_31_one(10,temp);
1857 emit_orrshr_imm(temp,2,tl);
1858 do_mfc2_31_one(11,temp);
1859 emit_orrshl_imm(temp,3,tl);
1860 emit_writeword(tl,&reg_cop2d[29]);
1861
1862 if (temp == HOST_TEMPREG)
1863 host_tempreg_release();
be516ebe 1864}
1865
2330734f 1866static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1867{
3968e69e 1868 // case 0x18: MULT
1869 // case 0x19: MULTU
1870 // case 0x1A: DIV
1871 // case 0x1B: DIVU
cf95b4f0 1872 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1873 {
cf95b4f0 1874 switch(dops[i].opcode2)
3968e69e 1875 {
1876 case 0x18: // MULT
1877 case 0x19: // MULTU
1878 {
cf95b4f0 1879 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1880 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1881 signed char hi=get_reg(i_regs->regmap,HIREG);
1882 signed char lo=get_reg(i_regs->regmap,LOREG);
1883 assert(m1>=0);
1884 assert(m2>=0);
1885 assert(hi>=0);
1886 assert(lo>=0);
1887
cf95b4f0 1888 if(dops[i].opcode2==0x18) // MULT
3968e69e 1889 emit_smull(m1,m2,hi);
1890 else // MULTU
1891 emit_umull(m1,m2,hi);
1892
1893 emit_mov(hi,lo);
1894 emit_shrimm64(hi,32,hi);
1895 break;
1896 }
1897 case 0x1A: // DIV
1898 case 0x1B: // DIVU
1899 {
cf95b4f0 1900 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1901 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1902 signed char quotient=get_reg(i_regs->regmap,LOREG);
1903 signed char remainder=get_reg(i_regs->regmap,HIREG);
1904 assert(numerator>=0);
1905 assert(denominator>=0);
1906 assert(quotient>=0);
1907 assert(remainder>=0);
1908
cf95b4f0 1909 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1910 emit_sdiv(numerator,denominator,quotient);
1911 else // DIVU
1912 emit_udiv(numerator,denominator,quotient);
1913 emit_msub(quotient,denominator,numerator,remainder);
1914
1915 // div 0 quotient (remainder is already correct)
1916 host_tempreg_acquire();
a5cd72d0 1917 if (dops[i].opcode2 == 0x1A) { // DIV
1918 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1919 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1920 }
3968e69e 1921 else
1922 emit_movimm(~0,HOST_TEMPREG);
1923 emit_test(denominator,denominator);
1924 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1925 host_tempreg_release();
1926 break;
1927 }
1928 default:
1929 assert(0);
1930 }
1931 }
1932 else
1933 {
1934 signed char hr=get_reg(i_regs->regmap,HIREG);
1935 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1936 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1937 {
cf95b4f0 1938 if (dops[i].rs1) {
1939 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1940 assert(numerator >= 0);
1941 if (hr >= 0)
1942 emit_mov(numerator,hr);
1943 if (lr >= 0) {
a5cd72d0 1944 if (dops[i].opcode2 == 0x1A) { // DIV
1945 emit_add_lsrimm(WZR,numerator,31,lr);
1946 emit_orn_asrimm(lr,numerator,31,lr);
1947 }
3968e69e 1948 else
1949 emit_movimm(~0,lr);
1950 }
1951 }
1952 else {
1953 if (hr >= 0) emit_zeroreg(hr);
1954 if (lr >= 0) emit_movimm(~0,lr);
1955 }
1956 }
a5cd72d0 1957 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1958 {
1959 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1960 assert(denominator >= 0);
1961 if (hr >= 0) emit_zeroreg(hr);
1962 if (lr >= 0) {
1963 emit_zeroreg(lr);
1964 emit_test(denominator, denominator);
1965 emit_csinvne_reg(lr, lr, lr);
1966 }
1967 }
3968e69e 1968 else
1969 {
1970 // Multiply by zero is zero.
1971 if (hr >= 0) emit_zeroreg(hr);
1972 if (lr >= 0) emit_zeroreg(lr);
1973 }
1974 }
be516ebe 1975}
1976#define multdiv_assemble multdiv_assemble_arm64
1977
a22ccd6a 1978// wb_dirtys making use of stp when possible
1979static void wb_dirtys(const signed char i_regmap[], u_int i_dirty)
1980{
1981 signed char mregs[34+1];
1982 int r, hr;
1983 memset(mregs, -1, sizeof(mregs));
1984 for (hr = 0; hr < HOST_REGS; hr++) {
1985 r = i_regmap[hr];
1986 if (hr == EXCLUDE_REG || r <= 0 || r == CCREG)
1987 continue;
1988 if (!((i_dirty >> hr) & 1))
1989 continue;
1990 assert(r < 34u);
1991 mregs[r] = hr;
1992 }
1993 for (r = 1; r < 34; r++) {
1994 if (mregs[r] < 0)
1995 continue;
1996 if (mregs[r+1] >= 0) {
1997 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
1998 emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset);
1999 r++;
2000 }
2001 else
2002 emit_storereg(r, mregs[r]);
2003 }
2004}
2005#define wb_dirtys wb_dirtys
2006
2007static void load_all_regs(const signed char i_regmap[])
2008{
2009 signed char mregs[34+1];
2010 int r, hr;
2011 memset(mregs, -1, sizeof(mregs));
2012 for (hr = 0; hr < HOST_REGS; hr++) {
2013 r = i_regmap[hr];
2014 if (hr == EXCLUDE_REG || r < 0 || r == CCREG)
2015 continue;
2016 if ((u_int)r < 34u)
2017 mregs[r] = hr;
2018 else if (r < TEMPREG)
2019 emit_loadreg(r, hr);
2020 }
2021 if (mregs[0] >= 0)
2022 emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc
2023 for (r = 1; r < 34; r++) {
2024 if (mregs[r] < 0)
2025 continue;
2026 if (mregs[r+1] >= 0) {
2027 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
2028 emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset);
2029 r++;
2030 }
2031 else
2032 emit_loadreg(r, mregs[r]);
2033 }
2034}
2035#define load_all_regs load_all_regs
2036
d1e4ebd9 2037static void do_jump_vaddr(u_int rs)
2038{
2039 if (rs != 0)
2040 emit_mov(rs, 0);
104df9d3 2041 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 2042 emit_jmpreg(0);
2043}
2044
be516ebe 2045static void do_preload_rhash(u_int r) {
2046 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2047 // register. On ARM the hash can be done with a single instruction (below)
2048}
2049
2050static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 2051 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 2052}
2053
2054static void do_rhash(u_int rs,u_int rh) {
2055 emit_andimm(rs, 0xf8, rh);
2056}
2057
d1e4ebd9 2058static void do_miniht_load(int ht, u_int rh) {
2059 emit_add64(ht, rh, ht);
2060 emit_ldst(0, 0, rh, ht, 0);
be516ebe 2061}
2062
d1e4ebd9 2063static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2064 emit_cmp(rh, rs);
2065 void *jaddr = out;
2066 emit_jeq(0);
2067 do_jump_vaddr(rs);
2068
2069 set_jump_target(jaddr, out);
2070 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2071 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2072 emit_jmpreg(ht);
be516ebe 2073}
2074
d1e4ebd9 2075// parsed by set_jump_target?
be516ebe 2076static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2077 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2078 emit_movk(return_address&0xffff,rt);
2079 add_to_linker(out,return_address,1);
2080 emit_adr(out,temp);
2081 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2082 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2083}
2084
d9e2b173 2085static unused void clear_cache_arm64(char *start, char *end)
be516ebe 2086{
919981d0 2087 // Don't rely on GCC's __clear_cache implementation, as it caches
2088 // icache/dcache cache line sizes, that can vary between cores on
2089 // big.LITTLE architectures.
2090 uint64_t addr, ctr_el0;
2091 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2092 size_t isize, dsize;
2093
2094 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2095 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2096 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2097
2098 // use the global minimum cache line size
2099 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2100 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2101
2102 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2103 not required for instruction to data coherence. */
2104 if ((ctr_el0 & (1 << 28)) == 0x0) {
2105 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2106 for (; addr < (uint64_t)end; addr += dsize)
2107 // use "civac" instead of "cvau", as this is the suggested workaround for
2108 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2109 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2110 }
919981d0 2111 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2112
919981d0 2113 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2114 Unification is not required for instruction to data coherence. */
2115 if ((ctr_el0 & (1 << 29)) == 0x0) {
2116 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2117 for (; addr < (uint64_t)end; addr += isize)
2118 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2119
2120 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2121 }
919981d0 2122
2123 __asm__ volatile("isb" : : : "memory");
be516ebe 2124}
2125
2126// CPU-architecture-specific initialization
2a014d73 2127static void arch_init(void)
2128{
2129 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 2130 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 2131 size_t i;
2132 assert(!(diff & 3));
d9e2b173 2133 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 2134 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 2135 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2136 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 2137 }
2138 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2139}
2140
2141// vim:shiftwidth=2:expandtab