frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26/* Linker */
555d3b51 27static void set_jump_target_far1(u_int *insn_, void *target)
28{
29 u_int *insn = NDRC_WRITE_OFFSET(insn_);
30 u_int in = *insn & 0xfc000000;
31 intptr_t offset = (u_char *)target - (u_char *)insn_;
32 assert(in == 0x14000000);
33 assert(-134217728 <= offset && offset < 134217728);
34 in |= (offset >> 2) & 0x3ffffff;
35 *insn = in;
36}
37
d1e4ebd9 38static void set_jump_target(void *addr, void *target)
be516ebe 39{
d9e2b173 40 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 41 intptr_t offset = (u_char *)target - (u_char *)addr;
42
3968e69e 43 if ((*ptr&0xFC000000) == 0x14000000) { // b
555d3b51 44 set_jump_target_far1(addr, target);
d1e4ebd9 45 }
3968e69e 46 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
47 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 48 // Conditional branch are limited to +/- 1MB
49 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 50 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 51 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 52 assert(-1048576 <= offset && offset < 1048576);
4a2e3735 53 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
d1e4ebd9 54 }
3968e69e 55 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 56 // generated by do_miniht_insert
57 assert(offset>=-1048576LL&&offset<1048576LL);
58 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
59 }
60 else
3968e69e 61 abort(); // should not happen
be516ebe 62}
63
64// from a pointer to external jump stub (which was produced by emit_extjump2)
65// find where the jumping insn is
66static void *find_extjump_insn(void *stub)
67{
d1e4ebd9 68 int *ptr = (int *)stub + 2;
69 assert((*ptr&0x9f000000) == 0x10000000); // adr
70 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
71 return ptr + offset / 4;
be516ebe 72}
73
be516ebe 74// Allocate a specific ARM register.
75static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
76{
77 int n;
78 int dirty=0;
79
80 // see if it's already allocated (and dealloc it)
81 for(n=0;n<HOST_REGS;n++)
82 {
83 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
84 dirty=(cur->dirty>>n)&1;
85 cur->regmap[n]=-1;
86 }
87 }
88
89 cur->regmap[hr]=reg;
90 cur->dirty&=~(1<<hr);
91 cur->dirty|=dirty<<hr;
92 cur->isconst&=~(1<<hr);
93}
94
95// Alloc cycle count into dedicated register
90f98e7c 96static void alloc_cc(struct regstat *cur, int i)
be516ebe 97{
90f98e7c 98 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
99}
100
101static void alloc_cc_optional(struct regstat *cur, int i)
102{
103 if (cur->regmap[HOST_CCREG] < 0) {
104 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
105 cur->noevict &= ~(1u << HOST_CCREG);
106 }
be516ebe 107}
108
109/* Special alloc */
110
111
112/* Assembler */
113
0b1633d7 114static attr_unused const char *regname[32] = {
d1e4ebd9 115 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
116 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
117 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
118 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
119};
120
0b1633d7 121static attr_unused const char *regname64[32] = {
d1e4ebd9 122 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
123 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
124 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
125 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
126};
127
128enum {
129 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
130 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
131};
132
0b1633d7 133static attr_unused const char *condname[16] = {
d1e4ebd9 134 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
135 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 136};
137
be516ebe 138static void output_w32(u_int word)
139{
d9e2b173 140 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 141 out += 4;
142}
143
3968e69e 144static u_int rn_rd(u_int rn, u_int rd)
145{
146 assert(rn < 31);
147 assert(rd < 31);
148 return (rn << 5) | rd;
149}
150
be516ebe 151static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
152{
d1e4ebd9 153 assert(rm < 32);
154 assert(rn < 32);
155 assert(rd < 32);
be516ebe 156 return (rm << 16) | (rn << 5) | rd;
157}
158
3968e69e 159static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
160{
161 assert(ra < 32);
162 return rm_rn_rd(rm, rn, rd) | (ra << 10);
163}
164
d1e4ebd9 165static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
166{
167 assert(imm7 < 0x80);
168 assert(rt2 < 31);
169 assert(rn < 32);
170 assert(rt < 31);
171 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
172}
173
687b4580 174static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
175{
176 assert(imm6 <= 63);
177 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
178}
179
be516ebe 180static u_int imm16_rd(u_int imm16, u_int rd)
181{
182 assert(imm16 < 0x10000);
183 assert(rd < 31);
184 return (imm16 << 5) | rd;
185}
186
687b4580 187static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
188{
189 assert(imm12 < 0x1000);
d1e4ebd9 190 assert(rn < 32);
191 assert(rd < 32);
192 return (imm12 << 10) | (rn << 5) | rd;
193}
194
195static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
196{
197 assert(imm9 < 0x200);
687b4580 198 assert(rn < 31);
199 assert(rd < 31);
d1e4ebd9 200 return (imm9 << 12) | (rn << 5) | rd;
687b4580 201}
202
d1e4ebd9 203static u_int imm19_rt(u_int imm19, u_int rt)
204{
205 assert(imm19 < 0x80000);
206 assert(rt < 31);
207 return (imm19 << 5) | rt;
208}
209
210static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
211{
212 assert(n < 2);
213 assert(immr < 0x40);
214 assert(imms < 0x40);
215 assert(rn < 32);
216 assert(rd < 32);
217 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
218}
219
220static u_int genjmp(const u_char *addr)
be516ebe 221{
222 intptr_t offset = addr - out;
d1e4ebd9 223 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 224 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 225 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
226 abort();
be516ebe 227 return 0;
228 }
d1e4ebd9 229 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 230}
231
d1e4ebd9 232static u_int genjmpcc(const u_char *addr)
be516ebe 233{
234 intptr_t offset = addr - out;
d1e4ebd9 235 if ((uintptr_t)addr < 3) return 0;
be516ebe 236 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 237 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
238 abort();
239 return 0;
240 }
241 return ((u_int)offset >> 2) & 0x7ffff;
242}
243
244static uint32_t is_mask(u_int value)
245{
246 return value && ((value + 1) & value) == 0;
247}
248
249// This function returns true if the argument contains a
250// non-empty sequence of ones (possibly rotated) with the remainder zero.
251static uint32_t is_rotated_mask(u_int value)
252{
3968e69e 253 if (value == 0 || value == ~0)
be516ebe 254 return 0;
d1e4ebd9 255 if (is_mask((value - 1) | value))
256 return 1;
257 return is_mask((~value - 1) | ~value);
258}
259
260static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
261{
262 int lzeros, tzeros, ones;
263 assert(value != 0);
264 if (is_mask((value - 1) | value)) {
265 lzeros = __builtin_clz(value);
266 tzeros = __builtin_ctz(value);
267 ones = 32 - lzeros - tzeros;
268 *immr = (32 - tzeros) & 31;
269 *imms = ones - 1;
270 return;
be516ebe 271 }
d1e4ebd9 272 value = ~value;
273 if (is_mask((value - 1) | value)) {
274 lzeros = __builtin_clz(value);
275 tzeros = __builtin_ctz(value);
276 ones = 32 - lzeros - tzeros;
3968e69e 277 *immr = lzeros;
d1e4ebd9 278 *imms = 31 - ones;
279 return;
280 }
3968e69e 281 abort();
be516ebe 282}
283
284static void emit_mov(u_int rs, u_int rt)
285{
687b4580 286 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 287 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
288}
289
290static void emit_mov64(u_int rs, u_int rt)
291{
292 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
293 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 294}
295
687b4580 296static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 297{
d1e4ebd9 298 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
299 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 300}
301
a5cd72d0 302static void emit_adds(u_int rs1, u_int rs2, u_int rt)
303{
304 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
305 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
306}
307
d1e4ebd9 308static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 309{
d1e4ebd9 310 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
311 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 312}
313
d1e4ebd9 314static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 315{
3968e69e 316 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 317 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
318}
39b71d9a 319#define emit_adds_ptr emit_adds64
d1e4ebd9 320
a5cd72d0 321static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
322{
323 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
324 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
325}
326
d1e4ebd9 327static void emit_neg(u_int rs, u_int rt)
328{
329 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
330 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 331}
332
a5cd72d0 333static void emit_negs(u_int rs, u_int rt)
334{
335 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
337}
338
687b4580 339static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 340{
d1e4ebd9 341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 343}
344
a5cd72d0 345static void emit_subs(u_int rs1, u_int rs2, u_int rt)
346{
347 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
348 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
349}
350
0b1633d7 351static attr_unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
3968e69e 352{
353 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
354 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
355}
356
d1e4ebd9 357static void emit_movz(u_int imm, u_int rt)
be516ebe 358{
d1e4ebd9 359 assem_debug("movz %s,#%#x\n", regname[rt], imm);
360 output_w32(0x52800000 | imm16_rd(imm, rt));
361}
362
363static void emit_movz_lsl16(u_int imm, u_int rt)
364{
365 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
366 output_w32(0x52a00000 | imm16_rd(imm, rt));
367}
368
369static void emit_movn(u_int imm, u_int rt)
370{
371 assem_debug("movn %s,#%#x\n", regname[rt], imm);
372 output_w32(0x12800000 | imm16_rd(imm, rt));
373}
374
375static void emit_movn_lsl16(u_int imm,u_int rt)
376{
377 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
378 output_w32(0x12a00000 | imm16_rd(imm, rt));
379}
380
381static void emit_movk(u_int imm,u_int rt)
382{
383 assem_debug("movk %s,#%#x\n", regname[rt], imm);
384 output_w32(0x72800000 | imm16_rd(imm, rt));
385}
386
387static void emit_movk_lsl16(u_int imm,u_int rt)
388{
389 assert(imm<65536);
3968e69e 390 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 391 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 392}
393
394static void emit_zeroreg(u_int rt)
395{
d1e4ebd9 396 emit_movz(0, rt);
be516ebe 397}
398
be516ebe 399static void emit_movimm(u_int imm, u_int rt)
400{
d1e4ebd9 401 if (imm < 65536)
402 emit_movz(imm, rt);
403 else if ((~imm) < 65536)
404 emit_movn(~imm, rt);
405 else if ((imm&0xffff) == 0)
406 emit_movz_lsl16(imm >> 16, rt);
407 else if (((~imm)&0xffff) == 0)
408 emit_movn_lsl16(~imm >> 16, rt);
409 else if (is_rotated_mask(imm)) {
410 u_int immr, imms;
411 gen_logical_imm(imm, &immr, &imms);
412 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
413 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
414 }
be516ebe 415 else {
d1e4ebd9 416 emit_movz(imm & 0xffff, rt);
417 emit_movk_lsl16(imm >> 16, rt);
be516ebe 418 }
419}
420
aaece508 421static void emit_movimm64(uint64_t imm, u_int rt)
422{
423 u_int shift, op, imm16, insns = 0;
424 for (shift = 0; shift < 4; shift++) {
425 imm16 = (imm >> shift * 16) & 0xffff;
426 if (!imm16)
427 continue;
428 op = insns ? 0xf2800000 : 0xd2800000;
429 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
430 if (shift)
431 assem_debug(",lsl #%u", shift * 16);
432 assem_debug("\n");
433 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
434 insns++;
435 }
436 if (!insns) {
437 assem_debug("movz %s,#0\n", regname64[rt]);
438 output_w32(0xd2800000 | imm16_rd(0, rt));
439 }
440}
441
687b4580 442static void emit_readword(void *addr, u_int rt)
443{
444 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
445 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 446 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 447 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
448 }
449 else
3968e69e 450 abort();
687b4580 451}
452
d1e4ebd9 453static void emit_readdword(void *addr, u_int rt)
454{
455 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
456 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 457 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
d1e4ebd9 458 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
459 }
3968e69e 460 else
461 abort();
462}
39b71d9a 463#define emit_readptr emit_readdword
3968e69e 464
465static void emit_readshword(void *addr, u_int rt)
466{
467 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
468 if (!(offset & 1) && offset <= 8190) {
469 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
470 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
471 }
d1e4ebd9 472 else
473 assert(0);
474}
475
be516ebe 476static void emit_loadreg(u_int r, u_int hr)
477{
d1e4ebd9 478 int is64 = 0;
be516ebe 479 if (r == 0)
480 emit_zeroreg(hr);
481 else {
33788798 482 void *addr;
be516ebe 483 switch (r) {
7c3a5182 484 //case HIREG: addr = &hi; break;
485 //case LOREG: addr = &lo; break;
be516ebe 486 case CCREG: addr = &cycle_count; break;
d1e4ebd9 487 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 488 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 489 default:
490 assert(r < 34);
491 addr = &psxRegs.GPR.r[r];
492 break;
be516ebe 493 }
d1e4ebd9 494 if (is64)
495 emit_readdword(addr, hr);
496 else
497 emit_readword(addr, hr);
be516ebe 498 }
499}
500
687b4580 501static void emit_writeword(u_int rt, void *addr)
502{
503 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
504 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 505 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 506 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
507 }
508 else
509 assert(0);
510}
511
d1e4ebd9 512static void emit_writedword(u_int rt, void *addr)
513{
514 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
515 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 516 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
3968e69e 517 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 518 }
519 else
3968e69e 520 abort();
d1e4ebd9 521}
522
687b4580 523static void emit_storereg(u_int r, u_int hr)
be516ebe 524{
525 assert(r < 64);
a7864494 526 void *addr;
be516ebe 527 switch (r) {
7c3a5182 528 //case HIREG: addr = &hi; break;
529 //case LOREG: addr = &lo; break;
be516ebe 530 case CCREG: addr = &cycle_count; break;
a7864494 531 default: assert(r < 34u); addr = &psxRegs.GPR.r[r]; break;
be516ebe 532 }
687b4580 533 emit_writeword(hr, addr);
be516ebe 534}
535
536static void emit_test(u_int rs, u_int rt)
537{
d1e4ebd9 538 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
539 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 540}
541
d1e4ebd9 542static void emit_testimm(u_int rs, u_int imm)
be516ebe 543{
d1e4ebd9 544 u_int immr, imms;
687b4580 545 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 546 assert(is_rotated_mask(imm)); // good enough for PCSX
547 gen_logical_imm(imm, &immr, &imms);
3968e69e 548 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 549}
550
551static void emit_not(u_int rs,u_int rt)
552{
553 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 554 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 555}
556
be516ebe 557static void emit_and(u_int rs1,u_int rs2,u_int rt)
558{
559 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 560 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 561}
562
563static void emit_or(u_int rs1,u_int rs2,u_int rt)
564{
565 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 566 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 567}
568
3968e69e 569static void emit_bic(u_int rs1,u_int rs2,u_int rt)
570{
571 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
572 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
573}
574
be516ebe 575static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
576{
be516ebe 577 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 578 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 579}
580
581static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
582{
be516ebe 583 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 584 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 585}
586
a5cd72d0 587static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
588{
589 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
590 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
591}
592
3968e69e 593static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
594{
595 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
596 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
597}
598
be516ebe 599static void emit_xor(u_int rs1,u_int rs2,u_int rt)
600{
601 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 602 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 603}
604
3968e69e 605static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
606{
607 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
608 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
609}
610
d1e4ebd9 611static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 612{
0b1633d7 613 attr_unused const char *st = s ? "s" : "";
d1e4ebd9 614 s = s ? 0x20000000 : 0;
615 is64 = is64 ? 0x80000000 : 0;
687b4580 616 if (imm < 4096) {
d1e4ebd9 617 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
618 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 619 }
620 else if (-imm < 4096) {
3968e69e 621 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 622 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
623 }
a5cd72d0 624 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
625 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
626 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
627 if (imm & 0xfff) {
628 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
629 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 630 }
631 }
a5cd72d0 632 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
633 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
634 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
635 if (-imm & 0xfff) {
636 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
637 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
d1e4ebd9 638 }
687b4580 639 }
a5cd72d0 640 else {
641 u_int tmp = rt;
642 assert(!is64);
643 if (rs == rt) {
644 host_tempreg_acquire();
645 tmp = HOST_TEMPREG;
646 }
647 emit_movimm(imm, tmp);
648 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
649 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
650 if (tmp == HOST_TEMPREG)
651 host_tempreg_release();
652 }
be516ebe 653}
654
d1e4ebd9 655static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
656{
9b495f6e 657 if (imm == 0) {
658 emit_mov(rs, rt);
659 return;
660 }
d1e4ebd9 661 emit_addimm_s(0, 0, rs, imm, rt);
662}
663
664static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
665{
666 emit_addimm_s(0, 1, rs, imm, rt);
667}
668
bc7c5acb 669static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
670{
671 emit_addimm64(rs, imm, rt);
672}
673
be516ebe 674static void emit_addimm_and_set_flags(int imm, u_int rt)
675{
d1e4ebd9 676 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 677}
678
a5cd72d0 679static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
680{
681 emit_addimm_s(1, 0, rs, imm, rt);
682}
683
d1e4ebd9 684static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 685{
d1e4ebd9 686 const char *names[] = { "and", "orr", "eor", "ands" };
687 const char *name = names[op];
688 u_int immr, imms;
689 op = op << 29;
690 if (is_rotated_mask(imm)) {
691 gen_logical_imm(imm, &immr, &imms);
692 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
693 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
694 }
695 else {
696 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
697 host_tempreg_acquire();
698 emit_movimm(imm, HOST_TEMPREG);
699 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
700 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
701 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
702 host_tempreg_release();
703 }
704 (void)name;
be516ebe 705}
706
d1e4ebd9 707static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 708{
d1e4ebd9 709 if (imm == 0)
710 emit_zeroreg(rt);
711 else
712 emit_logicop_imm(0, rs, imm, rt);
be516ebe 713}
714
d1e4ebd9 715static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 716{
d1e4ebd9 717 if (imm == 0) {
718 if (rs != rt)
719 emit_mov(rs, rt);
720 }
721 else
722 emit_logicop_imm(1, rs, imm, rt);
be516ebe 723}
724
d1e4ebd9 725static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 726{
d1e4ebd9 727 if (imm == 0) {
728 if (rs != rt)
729 emit_mov(rs, rt);
730 }
731 else
732 emit_logicop_imm(2, rs, imm, rt);
be516ebe 733}
734
d1e4ebd9 735static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 736{
d1e4ebd9 737 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
738 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 739}
740
d1e4ebd9 741static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 742{
d1e4ebd9 743 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
744 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 745}
746
747static void emit_shlimm(u_int rs,u_int imm,u_int rt)
748{
be516ebe 749 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 750 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 751}
752
3968e69e 753static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 754{
3968e69e 755 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
756 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 757}
758
3968e69e 759static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 760{
be516ebe 761 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 762 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 763}
764
765static void emit_sarimm(u_int rs,u_int imm,u_int rt)
766{
be516ebe 767 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 768 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 769}
770
771static void emit_rorimm(u_int rs,u_int imm,u_int rt)
772{
3968e69e 773 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 774 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 775}
776
777static void emit_signextend16(u_int rs, u_int rt)
778{
779 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 780 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 781}
782
d1e4ebd9 783static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 784{
3968e69e 785 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 786 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 787}
788
d1e4ebd9 789static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 790{
d1e4ebd9 791 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
792 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 793}
794
d1e4ebd9 795static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 796{
d1e4ebd9 797 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
798 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 799}
800
d1e4ebd9 801static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 802{
d1e4ebd9 803 if (imm < 4096) {
804 assem_debug("cmp %s,%#x\n", regname[rs], imm);
805 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
806 }
807 else if (-imm < 4096) {
808 assem_debug("cmn %s,%#x\n", regname[rs], imm);
809 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
810 }
811 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 812 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 813 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
814 }
815 else {
816 host_tempreg_acquire();
817 emit_movimm(imm, HOST_TEMPREG);
818 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
819 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
820 host_tempreg_release();
821 }
be516ebe 822}
823
d1e4ebd9 824static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 825{
d1e4ebd9 826 assert(imm == 0 || imm == 1);
827 assert(cond0 < 0x10);
828 assert(cond1 < 0x10);
829 if (imm) {
830 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
831 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
832 } else {
833 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
834 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
835 }
be516ebe 836}
837
d1e4ebd9 838static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 839{
d1e4ebd9 840 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 841}
842
d1e4ebd9 843static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 844{
d1e4ebd9 845 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 846}
847
848static void emit_cmovb_imm(int imm,u_int rt)
849{
d1e4ebd9 850 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 851}
852
3968e69e 853static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 854{
3968e69e 855 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
856 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 857}
858
859static void emit_cmovne_reg(u_int rs,u_int rt)
860{
d1e4ebd9 861 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
862 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 863}
864
865static void emit_cmovl_reg(u_int rs,u_int rt)
866{
d1e4ebd9 867 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
868 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 869}
870
e3c6bdb5 871static void emit_cmovb_reg(u_int rs,u_int rt)
872{
873 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
874 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
875}
876
be516ebe 877static void emit_cmovs_reg(u_int rs,u_int rt)
878{
d1e4ebd9 879 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
880 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 881}
882
3968e69e 883static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
884{
885 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
886 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
887}
888
a5cd72d0 889static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
890{
891 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
892 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
893}
894
be516ebe 895static void emit_slti32(u_int rs,int imm,u_int rt)
896{
897 if(rs!=rt) emit_zeroreg(rt);
898 emit_cmpimm(rs,imm);
899 if(rs==rt) emit_movimm(0,rt);
900 emit_cmovl_imm(1,rt);
901}
902
903static void emit_sltiu32(u_int rs,int imm,u_int rt)
904{
905 if(rs!=rt) emit_zeroreg(rt);
906 emit_cmpimm(rs,imm);
907 if(rs==rt) emit_movimm(0,rt);
908 emit_cmovb_imm(1,rt);
909}
910
911static void emit_cmp(u_int rs,u_int rt)
912{
913 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 914 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 915}
916
882a08fc 917static void emit_cmpcs(u_int rs,u_int rt)
918{
919 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
920 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
921}
922
be516ebe 923static void emit_set_gz32(u_int rs, u_int rt)
924{
925 //assem_debug("set_gz32\n");
926 emit_cmpimm(rs,1);
927 emit_movimm(1,rt);
928 emit_cmovl_imm(0,rt);
929}
930
931static void emit_set_nz32(u_int rs, u_int rt)
932{
933 //assem_debug("set_nz32\n");
d1e4ebd9 934 if(rs!=rt) emit_mov(rs,rt);
935 emit_test(rs,rs);
936 emit_cmovne_imm(1,rt);
be516ebe 937}
938
939static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
940{
941 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
942 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
943 emit_cmp(rs1,rs2);
944 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
945 emit_cmovl_imm(1,rt);
946}
947
948static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
949{
950 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
951 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
952 emit_cmp(rs1,rs2);
953 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
954 emit_cmovb_imm(1,rt);
955}
956
2a014d73 957static int can_jump_or_call(const void *a)
958{
959 intptr_t diff = (u_char *)a - out;
960 return (-134217728 <= diff && diff <= 134217727);
961}
962
d1e4ebd9 963static void emit_call(const void *a)
be516ebe 964{
d1e4ebd9 965 intptr_t diff = (u_char *)a - out;
14c9acee 966 assem_debug("bl %p%s\n", log_addr(a), func_name(a));
687b4580 967 assert(!(diff & 3));
968 if (-134217728 <= diff && diff <= 134217727)
969 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
970 else
3968e69e 971 abort();
be516ebe 972}
973
d1e4ebd9 974static void emit_jmp(const void *a)
be516ebe 975{
14c9acee 976 assem_debug("b %p%s\n", log_addr(a), func_name(a));
d1e4ebd9 977 u_int offset = genjmp(a);
978 output_w32(0x14000000 | offset);
be516ebe 979}
980
d1e4ebd9 981static void emit_jne(const void *a)
be516ebe 982{
14c9acee 983 assem_debug("bne %p\n", log_addr(a));
d1e4ebd9 984 u_int offset = genjmpcc(a);
985 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 986}
987
7c3a5182 988static void emit_jeq(const void *a)
be516ebe 989{
14c9acee 990 assem_debug("beq %p\n", log_addr(a));
d1e4ebd9 991 u_int offset = genjmpcc(a);
992 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 993}
994
7c3a5182 995static void emit_js(const void *a)
be516ebe 996{
14c9acee 997 assem_debug("bmi %p\n", log_addr(a));
d1e4ebd9 998 u_int offset = genjmpcc(a);
999 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 1000}
1001
7c3a5182 1002static void emit_jns(const void *a)
be516ebe 1003{
14c9acee 1004 assem_debug("bpl %p\n", log_addr(a));
d1e4ebd9 1005 u_int offset = genjmpcc(a);
1006 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 1007}
1008
7c3a5182 1009static void emit_jl(const void *a)
be516ebe 1010{
14c9acee 1011 assem_debug("blt %p\n", log_addr(a));
d1e4ebd9 1012 u_int offset = genjmpcc(a);
1013 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 1014}
1015
7c3a5182 1016static void emit_jge(const void *a)
be516ebe 1017{
14c9acee 1018 assem_debug("bge %p\n", log_addr(a));
d1e4ebd9 1019 u_int offset = genjmpcc(a);
1020 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 1021}
1022
a5cd72d0 1023static void emit_jo(const void *a)
1024{
14c9acee 1025 assem_debug("bvs %p\n", log_addr(a));
a5cd72d0 1026 u_int offset = genjmpcc(a);
1027 output_w32(0x54000000 | (offset << 5) | COND_VS);
1028}
1029
7c3a5182 1030static void emit_jno(const void *a)
be516ebe 1031{
14c9acee 1032 assem_debug("bvc %p\n", log_addr(a));
d1e4ebd9 1033 u_int offset = genjmpcc(a);
1034 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 1035}
1036
7c3a5182 1037static void emit_jc(const void *a)
be516ebe 1038{
14c9acee 1039 assem_debug("bcs %p\n", log_addr(a));
d1e4ebd9 1040 u_int offset = genjmpcc(a);
1041 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 1042}
1043
3968e69e 1044static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 1045{
14c9acee 1046 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], log_addr(a));
d1e4ebd9 1047 u_int offset = genjmpcc(a);
3968e69e 1048 is64 = is64 ? 0x80000000 : 0;
1049 isnz = isnz ? 0x01000000 : 0;
1050 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1051}
1052
9b495f6e 1053static void *emit_cbz(u_int r, const void *a)
3968e69e 1054{
9b495f6e 1055 void *ret = out;
3968e69e 1056 emit_cb(0, 0, a, r);
9b495f6e 1057 return ret;
be516ebe 1058}
1059
1060static void emit_jmpreg(u_int r)
1061{
3968e69e 1062 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 1063 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1064}
1065
1066static void emit_retreg(u_int r)
1067{
d1e4ebd9 1068 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1069 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1070}
1071
1072static void emit_ret(void)
1073{
1074 emit_retreg(LR);
1075}
1076
d1e4ebd9 1077static void emit_adr(void *addr, u_int rt)
1078{
1079 intptr_t offset = (u_char *)addr - out;
1080 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1081 assert(rt < 31);
d1e4ebd9 1082 assem_debug("adr x%d,#%#lx\n", rt, offset);
1083 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1084}
1085
3968e69e 1086static void emit_adrp(void *addr, u_int rt)
1087{
1088 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1089 assert(-4294967296l <= offset && offset < 4294967296l);
1090 assert(rt < 31);
1091 offset >>= 12;
1092 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1093 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1094}
1095
be516ebe 1096static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1097{
d1e4ebd9 1098 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1101}
1102
1103static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1104{
1105 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1106 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1107}
1108
1109static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1110{
1111 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1112 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1113}
1114
1115static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1116{
1117 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1118 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1119}
1120
1121static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1122{
1123 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1124 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1125}
39b71d9a 1126#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1127
1128static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1129{
1130 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1131 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1132}
1133
1134static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1135{
1136 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1137 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1138}
1139
1140static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1141{
1142 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1143 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1144}
1145
1146static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1147{
1148 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1149 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1150}
1151
1152static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1153{
1154 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1155 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1156}
1157
be516ebe 1158static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1159{
d1e4ebd9 1160 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1161 assert(-256 <= offset && offset < 256);
1162 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1163}
1164
1165static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1166{
d1e4ebd9 1167 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1168 assert(-256 <= offset && offset < 256);
1169 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1170}
1171
1172static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1173{
d1e4ebd9 1174 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1175 assert(-256 <= offset && offset < 256);
1176 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1177}
1178
1179static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1180{
d1e4ebd9 1181 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1182 assert(-256 <= offset && offset < 256);
1183 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1184}
1185
be516ebe 1186static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1187{
3968e69e 1188 if (!(offset & 3) && (u_int)offset <= 16380) {
1189 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1190 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1191 }
1192 else if (-256 <= offset && offset < 256) {
1193 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1194 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1195 }
687b4580 1196 else
1197 assert(0);
be516ebe 1198}
1199
1200static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1201{
3968e69e 1202 if (!(offset & 1) && (u_int)offset <= 8190) {
1203 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1204 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1205 }
1206 else if (-256 <= offset && offset < 256) {
1207 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1208 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1209 }
687b4580 1210 else
1211 assert(0);
be516ebe 1212}
1213
1214static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1215{
3968e69e 1216 if ((u_int)offset < 4096) {
1217 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1218 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1219 }
1220 else if (-256 <= offset && offset < 256) {
1221 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1222 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1223 }
687b4580 1224 else
1225 assert(0);
be516ebe 1226}
1227
3968e69e 1228static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1229{
3968e69e 1230 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1231 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1232}
1233
3968e69e 1234static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1235{
3968e69e 1236 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1237 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1238}
1239
1240static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1241{
1242 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1243 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1244}
1245
1246static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1247{
1248 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1249 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1250}
1251
3968e69e 1252static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1253{
1254 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1255 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1256}
1257
1258static void emit_clz(u_int rs, u_int rt)
be516ebe 1259{
1260 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1261 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1262}
1263
be516ebe 1264// special case for checking invalid_code
9b495f6e 1265static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1266{
1267 emit_shrimm(r, 12, rt);
1268 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1269 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
be516ebe 1270}
1271
3968e69e 1272// special for loadlr_assemble, rs2 is destroyed
1273static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1274{
3968e69e 1275 emit_shl(rs2, shift, rs2);
1276 emit_bic(rs1, rs2, rt);
be516ebe 1277}
1278
3968e69e 1279static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1280{
3968e69e 1281 emit_shr(rs2, shift, rs2);
1282 emit_bic(rs1, rs2, rt);
be516ebe 1283}
1284
687b4580 1285static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1286{
687b4580 1287 u_int op = 0xb9000000;
0b1633d7 1288 attr_unused const char *ldst = is_st ? "st" : "ld";
1289 attr_unused char rp = is64 ? 'x' : 'w';
687b4580 1290 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1291 is64 = is64 ? 1 : 0;
1292 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1293 ofs = (ofs >> (2+is64));
687b4580 1294 if (!is_st) op |= 0x00400000;
1295 if (is64) op |= 0x40000000;
d1e4ebd9 1296 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1297}
1298
687b4580 1299static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1300{
687b4580 1301 u_int op = 0x29000000;
0b1633d7 1302 attr_unused const char *ldst = is_st ? "st" : "ld";
1303 attr_unused char rp = is64 ? 'x' : 'w';
687b4580 1304 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1305 is64 = is64 ? 1 : 0;
1306 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1307 ofs = (ofs >> (2+is64));
1308 assert(-64 <= ofs && ofs <= 63);
1309 ofs &= 0x7f;
1310 if (!is_st) op |= 0x00400000;
1311 if (is64) op |= 0x80000000;
d1e4ebd9 1312 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1313}
1314
1315static void save_load_regs_all(int is_store, u_int reglist)
1316{
1317 int ofs = 0, c = 0;
1318 u_int r, pair[2];
1319 for (r = 0; reglist; r++, reglist >>= 1) {
1320 if (reglist & 1)
1321 pair[c++] = r;
1322 if (c == 2) {
1323 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1324 ofs += 8 * 2;
1325 c = 0;
1326 }
1327 }
1328 if (c) {
1329 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1330 ofs += 8;
1331 }
1332 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1333}
1334
1335// Save registers before function call
1336static void save_regs(u_int reglist)
1337{
1338 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1339 save_load_regs_all(1, reglist);
be516ebe 1340}
1341
1342// Restore registers after function call
1343static void restore_regs(u_int reglist)
1344{
1345 reglist &= CALLER_SAVE_REGS;
687b4580 1346 save_load_regs_all(0, reglist);
be516ebe 1347}
1348
1349/* Stubs/epilogue */
1350
1351static void literal_pool(int n)
1352{
1353 (void)literals;
1354}
1355
1356static void literal_pool_jumpover(int n)
1357{
1358}
1359
555d3b51 1360// parsed by find_extjump_insn, check_extjump2
104df9d3 1361static void emit_extjump(u_char *addr, u_int target)
be516ebe 1362{
d1e4ebd9 1363 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1364
d1e4ebd9 1365 emit_movz(target & 0xffff, 0);
1366 emit_movk_lsl16(target >> 16, 0);
1367
1368 // addr is in the current recompiled block (max 256k)
1369 // offset shouldn't exceed +/-1MB
1370 emit_adr(addr, 1);
104df9d3 1371 emit_far_jump(dyna_linker);
be516ebe 1372}
1373
d1e4ebd9 1374static void check_extjump2(void *src)
be516ebe 1375{
d1e4ebd9 1376 u_int *ptr = src;
1377 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1378 (void)ptr;
be516ebe 1379}
1380
1381// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1382static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1383{
d1e4ebd9 1384 int diff = rt_val - rs_val;
3968e69e 1385 if ((-4096 < diff && diff < 4096)
1386 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1387 emit_addimm(rs, diff, rt);
3968e69e 1388 else if (rt_val == ~rs_val)
1389 emit_not(rs, rt);
d1e4ebd9 1390 else if (is_rotated_mask(rs_val ^ rt_val))
1391 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1392 else
d1e4ebd9 1393 emit_movimm(rt_val, rt);
be516ebe 1394}
1395
d1e4ebd9 1396// return 1 if the above function can do it's job cheaply
687b4580 1397static int is_similar_value(u_int v1, u_int v2)
be516ebe 1398{
687b4580 1399 int diff = v1 - v2;
3968e69e 1400 return (-4096 < diff && diff < 4096)
1401 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1402 || v1 == ~v2
d1e4ebd9 1403 || is_rotated_mask(v1 ^ v2);
1404}
1405
37387d8b 1406static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1407{
1408 if (rt_val < 0x100000000ull) {
1409 emit_movimm_from(rs_val, rs, rt_val, rt);
1410 return;
1411 }
1412 // just move the whole thing. At least on Linux all addresses
1413 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1414 emit_movimm64(rt_val, rt);
37387d8b 1415}
1416
1417// trashes x2
d1e4ebd9 1418static void pass_args64(u_int a0, u_int a1)
1419{
1420 if(a0==1&&a1==0) {
1421 // must swap
1422 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1423 }
1424 else if(a0!=0&&a1==0) {
1425 emit_mov64(a1,1);
1426 if (a0>=0) emit_mov64(a0,0);
1427 }
1428 else {
1429 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1430 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1431 }
be516ebe 1432}
1433
d1e4ebd9 1434static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1435{
1436 switch(type) {
1437 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1438 case LOADBU_STUB:
1439 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1440 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1441 case LOADHU_STUB:
1442 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1443 case LOADW_STUB:
1444 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1445 default: assert(0);
d1e4ebd9 1446 }
1447}
1448
1449#include "pcsxmem.h"
be516ebe 1450//#include "pcsxmem_inline.c"
1451
1452static void do_readstub(int n)
1453{
1454 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1455 set_jump_target(stubs[n].addr, out);
1456 enum stub_type type = stubs[n].type;
1457 int i = stubs[n].a;
1458 int rs = stubs[n].b;
1459 const struct regstat *i_regs = (void *)stubs[n].c;
7da5c7ad 1460 int adj = (int)stubs[n].d;
d1e4ebd9 1461 u_int reglist = stubs[n].e;
1462 const signed char *i_regmap = i_regs->regmap;
1463 int rt;
a5cd72d0 1464 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1465 rt=get_reg(i_regmap,FTEMP);
1466 }else{
cf95b4f0 1467 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1468 }
1469 assert(rs>=0);
1470 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1471 void *restore_jump = NULL, *handler_jump = NULL;
1472 reglist|=(1<<rs);
1473 for (r = 0; r < HOST_CCREG; r++) {
1474 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1475 temp = r;
1476 break;
1477 }
1478 }
cf95b4f0 1479 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1480 reglist&=~(1<<rt);
1481 if(temp==-1) {
1482 save_regs(reglist);
1483 regs_saved=1;
1484 temp=(rs==0)?2:0;
1485 }
1486 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1487 temp2=1;
1488 emit_readdword(&mem_rtab,temp);
1489 emit_shrimm(rs,12,temp2);
1490 emit_readdword_dualindexedx8(temp,temp2,temp2);
1491 emit_adds64(temp2,temp2,temp2);
1492 handler_jump=out;
1493 emit_jc(0);
a5cd72d0 1494 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1495 switch(type) {
1496 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1497 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1498 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1499 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1500 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1501 default: assert(0);
d1e4ebd9 1502 }
1503 }
1504 if(regs_saved) {
1505 restore_jump=out;
1506 emit_jmp(0); // jump to reg restore
1507 }
1508 else
1509 emit_jmp(stubs[n].retaddr); // return address
1510 set_jump_target(handler_jump, out);
1511
1512 if(!regs_saved)
1513 save_regs(reglist);
1514 void *handler=NULL;
1515 if(type==LOADB_STUB||type==LOADBU_STUB)
1516 handler=jump_handler_read8;
1517 if(type==LOADH_STUB||type==LOADHU_STUB)
1518 handler=jump_handler_read16;
1519 if(type==LOADW_STUB)
1520 handler=jump_handler_read32;
1521 assert(handler);
1522 pass_args64(rs,temp2);
7da5c7ad 1523 int cc, cc_use;
1524 cc = cc_use = get_reg(i_regmap, CCREG);
1525 if (cc < 0)
1526 emit_loadreg(CCREG, (cc_use = 2));
1527 emit_addimm(cc_use, adj, 2);
1528
2a014d73 1529 emit_far_call(handler);
7da5c7ad 1530
1531#if 0
1532 // cycle reload for read32 only (value in w2 both in and out)
1533 if (type == LOADW_STUB) {
1534 emit_addimm(2, -adj, cc_use);
1535 if (cc < 0)
1536 emit_storereg(CCREG, cc_use);
1537 }
1538#endif
a5cd72d0 1539 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1540 loadstore_extend(type,0,rt);
1541 }
1542 if(restore_jump)
1543 set_jump_target(restore_jump, out);
1544 restore_regs(reglist);
1545 emit_jmp(stubs[n].retaddr);
be516ebe 1546}
1547
81dbbf4c 1548static void inline_readstub(enum stub_type type, int i, u_int addr,
1549 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1550{
277718fa 1551 int ra = cinfo[i].addr;
1552 int rt = get_reg(regmap, target);
1553 assert(ra >= 0);
d1e4ebd9 1554 u_int is_dynamic=0;
1555 uintptr_t host_addr = 0;
1556 void *handler;
7da5c7ad 1557 int cc, cc_use;
1558 cc = cc_use = get_reg(regmap, CCREG);
277718fa 1559 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
d1e4ebd9 1560 // return;
1561 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1562 if (handler == NULL) {
cf95b4f0 1563 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1564 return;
37387d8b 1565 if (addr != host_addr)
277718fa 1566 emit_movimm_from64(addr, ra, host_addr, ra);
d1e4ebd9 1567 switch(type) {
277718fa 1568 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1569 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1570 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1571 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1572 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
d1e4ebd9 1573 default: assert(0);
1574 }
1575 return;
1576 }
37387d8b 1577 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1578 if (is_dynamic) {
d1e4ebd9 1579 if(type==LOADB_STUB||type==LOADBU_STUB)
1580 handler=jump_handler_read8;
1581 if(type==LOADH_STUB||type==LOADHU_STUB)
1582 handler=jump_handler_read16;
1583 if(type==LOADW_STUB)
1584 handler=jump_handler_read32;
1585 }
1586
1587 // call a memhandler
cf95b4f0 1588 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1589 reglist&=~(1<<rt);
1590 save_regs(reglist);
1591 if(target==0)
1592 emit_movimm(addr,0);
277718fa 1593 else if(ra!=0)
1594 emit_mov(ra,0);
7da5c7ad 1595 if (cc < 0)
1596 emit_loadreg(CCREG, (cc_use = 2));
1597 emit_addimm(cc_use, adj, 2);
3968e69e 1598 if(is_dynamic) {
1599 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1600 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1601 if (-4294967296l <= offset && offset < 4294967296l) {
1602 emit_adrp((void *)l1, 1);
1603 emit_addimm64(1, l1 & 0xfff, 1);
1604 }
1605 else
1606 emit_movimm64(l1, 1);
3968e69e 1607 }
d1e4ebd9 1608 else
2a014d73 1609 emit_far_call(do_memhandler_pre);
d1e4ebd9 1610
2a014d73 1611 emit_far_call(handler);
d1e4ebd9 1612
7da5c7ad 1613#if 0
1614 // cycle reload for read32 only (value in w2 both in and out)
1615 if (type == LOADW_STUB) {
1616 if (!is_dynamic)
1617 emit_far_call(do_memhandler_post);
1618 emit_addimm(2, -adj, cc_use);
1619 if (cc < 0)
1620 emit_storereg(CCREG, cc_use);
1621 }
1622#endif
cf95b4f0 1623 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1624 loadstore_extend(type, 0, rt);
1625 restore_regs(reglist);
be516ebe 1626}
1627
1628static void do_writestub(int n)
1629{
1630 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1631 set_jump_target(stubs[n].addr, out);
1632 enum stub_type type=stubs[n].type;
1633 int i=stubs[n].a;
1634 int rs=stubs[n].b;
1635 struct regstat *i_regs=(struct regstat *)stubs[n].c;
7da5c7ad 1636 int adj = (int)stubs[n].d;
d1e4ebd9 1637 u_int reglist=stubs[n].e;
1638 signed char *i_regmap=i_regs->regmap;
1639 int rt,r;
a5cd72d0 1640 if(dops[i].itype==C2LS) {
d1e4ebd9 1641 rt=get_reg(i_regmap,r=FTEMP);
1642 }else{
cf95b4f0 1643 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1644 }
1645 assert(rs>=0);
1646 assert(rt>=0);
1647 int rtmp,temp=-1,temp2,regs_saved=0;
1648 void *restore_jump = NULL, *handler_jump = NULL;
1649 int reglist2=reglist|(1<<rs)|(1<<rt);
1650 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1651 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1652 temp = rtmp;
1653 break;
1654 }
1655 }
1656 if(temp==-1) {
1657 save_regs(reglist);
1658 regs_saved=1;
1659 for(rtmp=0;rtmp<=3;rtmp++)
1660 if(rtmp!=rs&&rtmp!=rt)
1661 {temp=rtmp;break;}
1662 }
1663 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1664 temp2=3;
1665 else {
1666 host_tempreg_acquire();
1667 temp2=HOST_TEMPREG;
1668 }
1669 emit_readdword(&mem_wtab,temp);
1670 emit_shrimm(rs,12,temp2);
1671 emit_readdword_dualindexedx8(temp,temp2,temp2);
1672 emit_adds64(temp2,temp2,temp2);
1673 handler_jump=out;
1674 emit_jc(0);
1675 switch(type) {
1676 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1677 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1678 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1679 default: assert(0);
1680 }
1681 if(regs_saved) {
1682 restore_jump=out;
1683 emit_jmp(0); // jump to reg restore
1684 }
1685 else
1686 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1687 set_jump_target(handler_jump, out);
1688
d1e4ebd9 1689 if(!regs_saved)
1690 save_regs(reglist);
1691 void *handler=NULL;
1692 switch(type) {
1693 case STOREB_STUB: handler=jump_handler_write8; break;
1694 case STOREH_STUB: handler=jump_handler_write16; break;
1695 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1696 default: assert(0);
d1e4ebd9 1697 }
1698 assert(handler);
1699 pass_args(rs,rt);
1700 if(temp2!=3) {
1701 emit_mov64(temp2,3);
1702 host_tempreg_release();
1703 }
7da5c7ad 1704 int cc, cc_use;
1705 cc = cc_use = get_reg(i_regmap, CCREG);
1706 if (cc < 0)
1707 emit_loadreg(CCREG, (cc_use = 2));
1708 emit_addimm(cc_use, adj, 2);
1709
2a014d73 1710 emit_far_call(handler);
7da5c7ad 1711
1712 // new cycle_count returned in x2
1713 emit_addimm(2, -adj, cc_use);
1714 if (cc < 0)
1715 emit_storereg(CCREG, cc_use);
1716 if (restore_jump)
d1e4ebd9 1717 set_jump_target(restore_jump, out);
1718 restore_regs(reglist);
1719 emit_jmp(stubs[n].retaddr);
be516ebe 1720}
1721
81dbbf4c 1722static void inline_writestub(enum stub_type type, int i, u_int addr,
1723 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1724{
277718fa 1725 int ra = cinfo[i].addr;
687b4580 1726 int rt = get_reg(regmap,target);
277718fa 1727 assert(ra >= 0);
687b4580 1728 assert(rt >= 0);
1729 uintptr_t host_addr = 0;
1730 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1731 if (handler == NULL) {
37387d8b 1732 if (addr != host_addr)
277718fa 1733 emit_movimm_from64(addr, ra, host_addr, ra);
d1e4ebd9 1734 switch (type) {
277718fa 1735 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1736 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1737 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
687b4580 1738 default: assert(0);
1739 }
1740 return;
1741 }
1742
1743 // call a memhandler
1744 save_regs(reglist);
277718fa 1745 emit_writeword(ra, &address); // some handlers still need it
d1e4ebd9 1746 loadstore_extend(type, rt, 0);
1747 int cc, cc_use;
1748 cc = cc_use = get_reg(regmap, CCREG);
1749 if (cc < 0)
1750 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1751 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1752
2a014d73 1753 emit_far_call(do_memhandler_pre);
1754 emit_far_call(handler);
1755 emit_far_call(do_memhandler_post);
7da5c7ad 1756 emit_addimm(2, -adj, cc_use);
d1e4ebd9 1757 if (cc < 0)
1758 emit_storereg(CCREG, cc_use);
687b4580 1759 restore_regs(reglist);
be516ebe 1760}
1761
3968e69e 1762/* Special assem */
1763
81dbbf4c 1764static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1765{
1766 save_load_regs_all(1, reglist);
32631e6a 1767 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1768#ifdef PCNT
1769 emit_movimm(op, 0);
2a014d73 1770 emit_far_call(pcnt_gte_start);
3968e69e 1771#endif
1772 // pointer to cop2 regs
1773 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1774}
1775
1776static void c2op_epilogue(u_int op,u_int reglist)
1777{
1778#ifdef PCNT
1779 emit_movimm(op, 0);
2a014d73 1780 emit_far_call(pcnt_gte_end);
3968e69e 1781#endif
1782 save_load_regs_all(0, reglist);
be516ebe 1783}
1784
81dbbf4c 1785static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1786{
3968e69e 1787 u_int c2op=source[i]&0x3f;
1788 u_int hr,reglist_full=0,reglist;
1789 int need_flags,need_ir;
1790 for(hr=0;hr<HOST_REGS;hr++) {
1791 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1792 }
1793 reglist=reglist_full&CALLER_SAVE_REGS;
1794
1795 if (gte_handlers[c2op]!=NULL) {
1796 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1797 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1798 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1799 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1800 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1801 need_flags=0;
1802 //int shift = (source[i] >> 19) & 1;
1803 //int lm = (source[i] >> 10) & 1;
1804 switch(c2op) {
1805 default:
1806 (void)need_ir;
81dbbf4c 1807 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1808 emit_movimm(source[i],1); // opcode
1809 emit_writeword(1,&psxRegs.code);
2a014d73 1810 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1811 break;
1812 }
1813 c2op_epilogue(c2op,reglist);
1814 }
1815}
1816
1817static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1818{
1819 //value = value & 0x7ffff000;
1820 //if (value & 0x7f87e000) value |= 0x80000000;
1821 emit_andimm(sl, 0x7fffe000, temp);
1822 emit_testimm(temp, 0xff87ffff);
1823 emit_andimm(sl, 0x7ffff000, temp);
1824 host_tempreg_acquire();
1825 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1826 emit_cmovne_reg(HOST_TEMPREG, temp);
1827 host_tempreg_release();
1828 assert(0); // testing needed
1829}
1830
1831static void do_mfc2_31_one(u_int copr,signed char temp)
1832{
1833 emit_readshword(&reg_cop2d[copr],temp);
1834 emit_bicsar_imm(temp,31,temp);
1835 emit_cmpimm(temp,0xf80);
1836 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1837 emit_andimm(temp,0xf80,temp);
1838}
1839
1840static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1841{
1842 if (temp < 0) {
1843 host_tempreg_acquire();
1844 temp = HOST_TEMPREG;
1845 }
1846 do_mfc2_31_one(9,temp);
1847 emit_shrimm(temp,7,tl);
1848 do_mfc2_31_one(10,temp);
1849 emit_orrshr_imm(temp,2,tl);
1850 do_mfc2_31_one(11,temp);
1851 emit_orrshl_imm(temp,3,tl);
1852 emit_writeword(tl,&reg_cop2d[29]);
1853
1854 if (temp == HOST_TEMPREG)
1855 host_tempreg_release();
be516ebe 1856}
1857
2330734f 1858static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1859{
3968e69e 1860 // case 0x18: MULT
1861 // case 0x19: MULTU
1862 // case 0x1A: DIV
1863 // case 0x1B: DIVU
cf95b4f0 1864 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1865 {
cf95b4f0 1866 switch(dops[i].opcode2)
3968e69e 1867 {
1868 case 0x18: // MULT
1869 case 0x19: // MULTU
1870 {
cf95b4f0 1871 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1872 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1873 signed char hi=get_reg(i_regs->regmap,HIREG);
1874 signed char lo=get_reg(i_regs->regmap,LOREG);
1875 assert(m1>=0);
1876 assert(m2>=0);
1877 assert(hi>=0);
1878 assert(lo>=0);
1879
cf95b4f0 1880 if(dops[i].opcode2==0x18) // MULT
3968e69e 1881 emit_smull(m1,m2,hi);
1882 else // MULTU
1883 emit_umull(m1,m2,hi);
1884
1885 emit_mov(hi,lo);
1886 emit_shrimm64(hi,32,hi);
1887 break;
1888 }
1889 case 0x1A: // DIV
1890 case 0x1B: // DIVU
1891 {
cf95b4f0 1892 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1893 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1894 signed char quotient=get_reg(i_regs->regmap,LOREG);
1895 signed char remainder=get_reg(i_regs->regmap,HIREG);
1896 assert(numerator>=0);
1897 assert(denominator>=0);
1898 assert(quotient>=0);
1899 assert(remainder>=0);
1900
cf95b4f0 1901 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1902 emit_sdiv(numerator,denominator,quotient);
1903 else // DIVU
1904 emit_udiv(numerator,denominator,quotient);
1905 emit_msub(quotient,denominator,numerator,remainder);
1906
1907 // div 0 quotient (remainder is already correct)
1908 host_tempreg_acquire();
a5cd72d0 1909 if (dops[i].opcode2 == 0x1A) { // DIV
1910 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1911 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1912 }
3968e69e 1913 else
1914 emit_movimm(~0,HOST_TEMPREG);
1915 emit_test(denominator,denominator);
1916 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1917 host_tempreg_release();
1918 break;
1919 }
1920 default:
1921 assert(0);
1922 }
1923 }
1924 else
1925 {
1926 signed char hr=get_reg(i_regs->regmap,HIREG);
1927 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1928 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1929 {
cf95b4f0 1930 if (dops[i].rs1) {
1931 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1932 assert(numerator >= 0);
1933 if (hr >= 0)
1934 emit_mov(numerator,hr);
1935 if (lr >= 0) {
a5cd72d0 1936 if (dops[i].opcode2 == 0x1A) { // DIV
1937 emit_add_lsrimm(WZR,numerator,31,lr);
1938 emit_orn_asrimm(lr,numerator,31,lr);
1939 }
3968e69e 1940 else
1941 emit_movimm(~0,lr);
1942 }
1943 }
1944 else {
1945 if (hr >= 0) emit_zeroreg(hr);
1946 if (lr >= 0) emit_movimm(~0,lr);
1947 }
1948 }
a5cd72d0 1949 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1950 {
1951 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1952 assert(denominator >= 0);
1953 if (hr >= 0) emit_zeroreg(hr);
1954 if (lr >= 0) {
1955 emit_zeroreg(lr);
1956 emit_test(denominator, denominator);
1957 emit_csinvne_reg(lr, lr, lr);
1958 }
1959 }
3968e69e 1960 else
1961 {
1962 // Multiply by zero is zero.
1963 if (hr >= 0) emit_zeroreg(hr);
1964 if (lr >= 0) emit_zeroreg(lr);
1965 }
1966 }
be516ebe 1967}
1968#define multdiv_assemble multdiv_assemble_arm64
1969
a22ccd6a 1970// wb_dirtys making use of stp when possible
1971static void wb_dirtys(const signed char i_regmap[], u_int i_dirty)
1972{
1973 signed char mregs[34+1];
1974 int r, hr;
1975 memset(mregs, -1, sizeof(mregs));
1976 for (hr = 0; hr < HOST_REGS; hr++) {
1977 r = i_regmap[hr];
1978 if (hr == EXCLUDE_REG || r <= 0 || r == CCREG)
1979 continue;
1980 if (!((i_dirty >> hr) & 1))
1981 continue;
1982 assert(r < 34u);
1983 mregs[r] = hr;
1984 }
1985 for (r = 1; r < 34; r++) {
1986 if (mregs[r] < 0)
1987 continue;
1988 if (mregs[r+1] >= 0) {
1989 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
1990 emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset);
1991 r++;
1992 }
1993 else
1994 emit_storereg(r, mregs[r]);
1995 }
1996}
1997#define wb_dirtys wb_dirtys
1998
1999static void load_all_regs(const signed char i_regmap[])
2000{
2001 signed char mregs[34+1];
2002 int r, hr;
2003 memset(mregs, -1, sizeof(mregs));
2004 for (hr = 0; hr < HOST_REGS; hr++) {
2005 r = i_regmap[hr];
2006 if (hr == EXCLUDE_REG || r < 0 || r == CCREG)
2007 continue;
2008 if ((u_int)r < 34u)
2009 mregs[r] = hr;
2010 else if (r < TEMPREG)
2011 emit_loadreg(r, hr);
2012 }
2013 if (mregs[0] >= 0)
2014 emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc
2015 for (r = 1; r < 34; r++) {
2016 if (mregs[r] < 0)
2017 continue;
2018 if (mregs[r+1] >= 0) {
2019 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
2020 emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset);
2021 r++;
2022 }
2023 else
2024 emit_loadreg(r, mregs[r]);
2025 }
2026}
2027#define load_all_regs load_all_regs
2028
d1e4ebd9 2029static void do_jump_vaddr(u_int rs)
2030{
2031 if (rs != 0)
2032 emit_mov(rs, 0);
ea5c2d78 2033 emit_readptr(&hash_table_ptr, 1);
104df9d3 2034 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 2035 emit_jmpreg(0);
2036}
2037
be516ebe 2038static void do_preload_rhash(u_int r) {
2039 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2040 // register. On ARM the hash can be done with a single instruction (below)
2041}
2042
2043static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 2044 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 2045}
2046
2047static void do_rhash(u_int rs,u_int rh) {
2048 emit_andimm(rs, 0xf8, rh);
2049}
2050
d1e4ebd9 2051static void do_miniht_load(int ht, u_int rh) {
2052 emit_add64(ht, rh, ht);
2053 emit_ldst(0, 0, rh, ht, 0);
be516ebe 2054}
2055
d1e4ebd9 2056static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2057 emit_cmp(rh, rs);
2058 void *jaddr = out;
2059 emit_jeq(0);
2060 do_jump_vaddr(rs);
2061
2062 set_jump_target(jaddr, out);
2063 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2064 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2065 emit_jmpreg(ht);
be516ebe 2066}
2067
d1e4ebd9 2068// parsed by set_jump_target?
be516ebe 2069static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2070 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2071 emit_movk(return_address&0xffff,rt);
2072 add_to_linker(out,return_address,1);
2073 emit_adr(out,temp);
2074 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2075 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2076}
2077
0b1633d7 2078static attr_unused void clear_cache_arm64(char *start, char *end)
be516ebe 2079{
919981d0 2080 // Don't rely on GCC's __clear_cache implementation, as it caches
2081 // icache/dcache cache line sizes, that can vary between cores on
2082 // big.LITTLE architectures.
2083 uint64_t addr, ctr_el0;
2084 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2085 size_t isize, dsize;
2086
2087 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2088 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2089 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2090
2091 // use the global minimum cache line size
2092 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2093 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2094
2095 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2096 not required for instruction to data coherence. */
2097 if ((ctr_el0 & (1 << 28)) == 0x0) {
2098 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2099 for (; addr < (uint64_t)end; addr += dsize)
2100 // use "civac" instead of "cvau", as this is the suggested workaround for
2101 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2102 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2103 }
919981d0 2104 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2105
919981d0 2106 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2107 Unification is not required for instruction to data coherence. */
2108 if ((ctr_el0 & (1 << 29)) == 0x0) {
2109 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2110 for (; addr < (uint64_t)end; addr += isize)
2111 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2112
2113 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2114 }
919981d0 2115
2116 __asm__ volatile("isb" : : : "memory");
be516ebe 2117}
2118
2119// CPU-architecture-specific initialization
2a014d73 2120static void arch_init(void)
2121{
2122 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 2123 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 2124 size_t i;
2125 assert(!(diff & 3));
d9e2b173 2126 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 2127 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 2128 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2129 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 2130 }
2131 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2132}
2133
2134// vim:shiftwidth=2:expandtab