1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
27 static void set_jump_target(void *addr, void *target)
29 u_int *ptr = NDRC_WRITE_OFFSET(addr);
30 intptr_t offset = (u_char *)target - (u_char *)addr;
32 if ((*ptr&0xFC000000) == 0x14000000) { // b
33 assert(offset>=-134217728LL&&offset<134217728LL);
34 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
36 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
37 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
38 // Conditional branch are limited to +/- 1MB
39 // block max size is 256k so branching beyond the +/- 1MB limit
40 // should only happen when jumping to an already compiled block (see add_jump_out)
41 // a workaround would be to do a trampoline jump via a stub at the end of the block
42 assert(-1048576 <= offset && offset < 1048576);
43 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
45 else if((*ptr&0x9f000000)==0x10000000) { // adr
46 // generated by do_miniht_insert
47 assert(offset>=-1048576LL&&offset<1048576LL);
48 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
51 abort(); // should not happen
54 // from a pointer to external jump stub (which was produced by emit_extjump2)
55 // find where the jumping insn is
56 static void *find_extjump_insn(void *stub)
58 int *ptr = (int *)stub + 2;
59 assert((*ptr&0x9f000000) == 0x10000000); // adr
60 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
61 return ptr + offset / 4;
65 // find where external branch is liked to using addr of it's stub:
66 // get address that the stub loads (dyna_linker arg1),
67 // treat it as a pointer to branch insn,
68 // return addr where that branch jumps to
69 static void *get_pointer(void *stub)
71 int *i_ptr = find_extjump_insn(stub);
72 if ((*i_ptr&0xfc000000) == 0x14000000) // b
73 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
74 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
75 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
76 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
82 // Allocate a specific ARM register.
83 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
88 // see if it's already allocated (and dealloc it)
89 for(n=0;n<HOST_REGS;n++)
91 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
92 dirty=(cur->dirty>>n)&1;
99 cur->dirty|=dirty<<hr;
100 cur->isconst&=~(1<<hr);
103 // Alloc cycle count into dedicated register
104 static void alloc_cc(struct regstat *cur, int i)
106 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
109 static void alloc_cc_optional(struct regstat *cur, int i)
111 if (cur->regmap[HOST_CCREG] < 0) {
112 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
113 cur->noevict &= ~(1u << HOST_CCREG);
122 static unused const char *regname[32] = {
123 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
124 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
125 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
126 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
129 static unused const char *regname64[32] = {
130 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
131 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
132 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
133 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
137 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
138 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
141 static unused const char *condname[16] = {
142 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
143 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
146 static void output_w32(u_int word)
148 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
152 static u_int rn_rd(u_int rn, u_int rd)
156 return (rn << 5) | rd;
159 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
164 return (rm << 16) | (rn << 5) | rd;
167 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
170 return rm_rn_rd(rm, rn, rd) | (ra << 10);
173 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
179 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
182 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
185 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
188 static u_int imm16_rd(u_int imm16, u_int rd)
190 assert(imm16 < 0x10000);
192 return (imm16 << 5) | rd;
195 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
197 assert(imm12 < 0x1000);
200 return (imm12 << 10) | (rn << 5) | rd;
203 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
205 assert(imm9 < 0x200);
208 return (imm9 << 12) | (rn << 5) | rd;
211 static u_int imm19_rt(u_int imm19, u_int rt)
213 assert(imm19 < 0x80000);
215 return (imm19 << 5) | rt;
218 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
225 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
228 static u_int genjmp(const u_char *addr)
230 intptr_t offset = addr - out;
231 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
232 if (offset < -134217728 || offset > 134217727) {
233 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
237 return ((u_int)offset >> 2) & 0x03ffffff;
240 static u_int genjmpcc(const u_char *addr)
242 intptr_t offset = addr - out;
243 if ((uintptr_t)addr < 3) return 0;
244 if (offset < -1048576 || offset > 1048572) {
245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
249 return ((u_int)offset >> 2) & 0x7ffff;
252 static uint32_t is_mask(u_int value)
254 return value && ((value + 1) & value) == 0;
257 // This function returns true if the argument contains a
258 // non-empty sequence of ones (possibly rotated) with the remainder zero.
259 static uint32_t is_rotated_mask(u_int value)
261 if (value == 0 || value == ~0)
263 if (is_mask((value - 1) | value))
265 return is_mask((~value - 1) | ~value);
268 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
270 int lzeros, tzeros, ones;
272 if (is_mask((value - 1) | value)) {
273 lzeros = __builtin_clz(value);
274 tzeros = __builtin_ctz(value);
275 ones = 32 - lzeros - tzeros;
276 *immr = (32 - tzeros) & 31;
281 if (is_mask((value - 1) | value)) {
282 lzeros = __builtin_clz(value);
283 tzeros = __builtin_ctz(value);
284 ones = 32 - lzeros - tzeros;
292 static void emit_mov(u_int rs, u_int rt)
294 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
295 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
298 static void emit_mov64(u_int rs, u_int rt)
300 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
301 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
304 static void emit_add(u_int rs1, u_int rs2, u_int rt)
306 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
307 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
310 static void emit_adds(u_int rs1, u_int rs2, u_int rt)
312 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
313 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
316 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
318 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
319 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
322 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
324 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
325 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
327 #define emit_adds_ptr emit_adds64
329 static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
331 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
332 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
335 static void emit_neg(u_int rs, u_int rt)
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
341 static void emit_negs(u_int rs, u_int rt)
343 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
344 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
347 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
349 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
350 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
353 static void emit_subs(u_int rs1, u_int rs2, u_int rt)
355 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
356 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
359 static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
361 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
362 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
365 static void emit_movz(u_int imm, u_int rt)
367 assem_debug("movz %s,#%#x\n", regname[rt], imm);
368 output_w32(0x52800000 | imm16_rd(imm, rt));
371 static void emit_movz_lsl16(u_int imm, u_int rt)
373 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x52a00000 | imm16_rd(imm, rt));
377 static void emit_movn(u_int imm, u_int rt)
379 assem_debug("movn %s,#%#x\n", regname[rt], imm);
380 output_w32(0x12800000 | imm16_rd(imm, rt));
383 static void emit_movn_lsl16(u_int imm,u_int rt)
385 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
386 output_w32(0x12a00000 | imm16_rd(imm, rt));
389 static void emit_movk(u_int imm,u_int rt)
391 assem_debug("movk %s,#%#x\n", regname[rt], imm);
392 output_w32(0x72800000 | imm16_rd(imm, rt));
395 static void emit_movk_lsl16(u_int imm,u_int rt)
398 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
399 output_w32(0x72a00000 | imm16_rd(imm, rt));
402 static void emit_zeroreg(u_int rt)
407 static void emit_movimm(u_int imm, u_int rt)
411 else if ((~imm) < 65536)
413 else if ((imm&0xffff) == 0)
414 emit_movz_lsl16(imm >> 16, rt);
415 else if (((~imm)&0xffff) == 0)
416 emit_movn_lsl16(~imm >> 16, rt);
417 else if (is_rotated_mask(imm)) {
419 gen_logical_imm(imm, &immr, &imms);
420 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
421 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
424 emit_movz(imm & 0xffff, rt);
425 emit_movk_lsl16(imm >> 16, rt);
429 static void emit_movimm64(uint64_t imm, u_int rt)
431 u_int shift, op, imm16, insns = 0;
432 for (shift = 0; shift < 4; shift++) {
433 imm16 = (imm >> shift * 16) & 0xffff;
436 op = insns ? 0xf2800000 : 0xd2800000;
437 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
439 assem_debug(",lsl #%u", shift * 16);
441 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
445 assem_debug("movz %s,#0\n", regname64[rt]);
446 output_w32(0xd2800000 | imm16_rd(0, rt));
450 static void emit_readword(void *addr, u_int rt)
452 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
453 if (!(offset & 3) && offset <= 16380) {
454 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
455 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
461 static void emit_readdword(void *addr, u_int rt)
463 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
464 if (!(offset & 7) && offset <= 32760) {
465 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
466 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
471 #define emit_readptr emit_readdword
473 static void emit_readshword(void *addr, u_int rt)
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 1) && offset <= 8190) {
477 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
478 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
484 static void emit_loadreg(u_int r, u_int hr)
492 //case HIREG: addr = &hi; break;
493 //case LOREG: addr = &lo; break;
494 case CCREG: addr = &cycle_count; break;
495 case INVCP: addr = &invc_ptr; is64 = 1; break;
496 case ROREG: addr = &ram_offset; is64 = 1; break;
499 addr = &psxRegs.GPR.r[r];
503 emit_readdword(addr, hr);
505 emit_readword(addr, hr);
509 static void emit_writeword(u_int rt, void *addr)
511 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
512 if (!(offset & 3) && offset <= 16380) {
513 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
514 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
520 static void emit_writedword(u_int rt, void *addr)
522 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
523 if (!(offset & 7) && offset <= 32760) {
524 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
525 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
531 static void emit_storereg(u_int r, u_int hr)
534 void *addr = &psxRegs.GPR.r[r];
536 //case HIREG: addr = &hi; break;
537 //case LOREG: addr = &lo; break;
538 case CCREG: addr = &cycle_count; break;
539 default: assert(r < 34); break;
541 emit_writeword(hr, addr);
544 static void emit_test(u_int rs, u_int rt)
546 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
547 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
550 static void emit_testimm(u_int rs, u_int imm)
553 assem_debug("tst %s,#%#x\n", regname[rs], imm);
554 assert(is_rotated_mask(imm)); // good enough for PCSX
555 gen_logical_imm(imm, &immr, &imms);
556 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
559 static void emit_not(u_int rs,u_int rt)
561 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
562 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
565 static void emit_and(u_int rs1,u_int rs2,u_int rt)
567 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
568 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
571 static void emit_or(u_int rs1,u_int rs2,u_int rt)
573 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
574 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
577 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
579 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
580 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
583 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
585 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
586 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
589 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
591 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
592 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
595 static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
597 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
598 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
601 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
603 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
604 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
607 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
609 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
610 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
613 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
615 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
616 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
619 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
621 unused const char *st = s ? "s" : "";
622 s = s ? 0x20000000 : 0;
623 is64 = is64 ? 0x80000000 : 0;
625 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
626 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
628 else if (-imm < 4096) {
629 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
630 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
632 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
633 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
634 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
636 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
637 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
640 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
641 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
642 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
644 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
645 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
652 host_tempreg_acquire();
655 emit_movimm(imm, tmp);
656 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
657 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
658 if (tmp == HOST_TEMPREG)
659 host_tempreg_release();
663 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
669 emit_addimm_s(0, 0, rs, imm, rt);
672 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
674 emit_addimm_s(0, 1, rs, imm, rt);
677 static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
679 emit_addimm64(rs, imm, rt);
682 static void emit_addimm_and_set_flags(int imm, u_int rt)
684 emit_addimm_s(1, 0, rt, imm, rt);
687 static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
689 emit_addimm_s(1, 0, rs, imm, rt);
692 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
694 const char *names[] = { "and", "orr", "eor", "ands" };
695 const char *name = names[op];
698 if (is_rotated_mask(imm)) {
699 gen_logical_imm(imm, &immr, &imms);
700 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
701 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
704 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
705 host_tempreg_acquire();
706 emit_movimm(imm, HOST_TEMPREG);
707 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
708 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
709 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
710 host_tempreg_release();
715 static void emit_andimm(u_int rs, u_int imm, u_int rt)
720 emit_logicop_imm(0, rs, imm, rt);
723 static void emit_orimm(u_int rs, u_int imm, u_int rt)
730 emit_logicop_imm(1, rs, imm, rt);
733 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
740 emit_logicop_imm(2, rs, imm, rt);
743 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
745 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
746 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
749 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
751 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
752 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
755 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
757 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
758 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
761 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
763 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
764 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
767 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
769 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
770 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
773 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
775 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
776 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
779 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
781 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
785 static void emit_signextend16(u_int rs, u_int rt)
787 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
788 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
791 static void emit_shl(u_int rs,u_int rshift,u_int rt)
793 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
794 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
797 static void emit_shr(u_int rs,u_int rshift,u_int rt)
799 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
800 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
803 static void emit_sar(u_int rs,u_int rshift,u_int rt)
805 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
806 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
809 static void emit_cmpimm(u_int rs, u_int imm)
812 assem_debug("cmp %s,%#x\n", regname[rs], imm);
813 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
815 else if (-imm < 4096) {
816 assem_debug("cmn %s,%#x\n", regname[rs], imm);
817 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
819 else if (imm < 16777216 && !(imm & 0xfff)) {
820 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
821 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
824 host_tempreg_acquire();
825 emit_movimm(imm, HOST_TEMPREG);
826 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
827 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
828 host_tempreg_release();
832 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
834 assert(imm == 0 || imm == 1);
835 assert(cond0 < 0x10);
836 assert(cond1 < 0x10);
838 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
839 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
841 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
842 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
846 static void emit_cmovne_imm(u_int imm,u_int rt)
848 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
851 static void emit_cmovl_imm(u_int imm,u_int rt)
853 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
856 static void emit_cmovb_imm(int imm,u_int rt)
858 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
861 static void emit_cmoveq_reg(u_int rs,u_int rt)
863 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
864 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
867 static void emit_cmovne_reg(u_int rs,u_int rt)
869 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
870 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
873 static void emit_cmovl_reg(u_int rs,u_int rt)
875 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
876 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
879 static void emit_cmovb_reg(u_int rs,u_int rt)
881 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
882 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
885 static void emit_cmovs_reg(u_int rs,u_int rt)
887 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
888 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
891 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
893 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
897 static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
899 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
903 static void emit_slti32(u_int rs,int imm,u_int rt)
905 if(rs!=rt) emit_zeroreg(rt);
907 if(rs==rt) emit_movimm(0,rt);
908 emit_cmovl_imm(1,rt);
911 static void emit_sltiu32(u_int rs,int imm,u_int rt)
913 if(rs!=rt) emit_zeroreg(rt);
915 if(rs==rt) emit_movimm(0,rt);
916 emit_cmovb_imm(1,rt);
919 static void emit_cmp(u_int rs,u_int rt)
921 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
922 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
925 static void emit_cmpcs(u_int rs,u_int rt)
927 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
928 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
931 static void emit_set_gz32(u_int rs, u_int rt)
933 //assem_debug("set_gz32\n");
936 emit_cmovl_imm(0,rt);
939 static void emit_set_nz32(u_int rs, u_int rt)
941 //assem_debug("set_nz32\n");
942 if(rs!=rt) emit_mov(rs,rt);
944 emit_cmovne_imm(1,rt);
947 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
949 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
950 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
952 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
953 emit_cmovl_imm(1,rt);
956 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
958 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
959 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
961 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
962 emit_cmovb_imm(1,rt);
965 static int can_jump_or_call(const void *a)
967 intptr_t diff = (u_char *)a - out;
968 return (-134217728 <= diff && diff <= 134217727);
971 static void emit_call(const void *a)
973 intptr_t diff = (u_char *)a - out;
974 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
976 if (-134217728 <= diff && diff <= 134217727)
977 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
982 static void emit_jmp(const void *a)
984 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
985 u_int offset = genjmp(a);
986 output_w32(0x14000000 | offset);
989 static void emit_jne(const void *a)
991 assem_debug("bne %p\n", a);
992 u_int offset = genjmpcc(a);
993 output_w32(0x54000000 | (offset << 5) | COND_NE);
996 static void emit_jeq(const void *a)
998 assem_debug("beq %p\n", a);
999 u_int offset = genjmpcc(a);
1000 output_w32(0x54000000 | (offset << 5) | COND_EQ);
1003 static void emit_js(const void *a)
1005 assem_debug("bmi %p\n", a);
1006 u_int offset = genjmpcc(a);
1007 output_w32(0x54000000 | (offset << 5) | COND_MI);
1010 static void emit_jns(const void *a)
1012 assem_debug("bpl %p\n", a);
1013 u_int offset = genjmpcc(a);
1014 output_w32(0x54000000 | (offset << 5) | COND_PL);
1017 static void emit_jl(const void *a)
1019 assem_debug("blt %p\n", a);
1020 u_int offset = genjmpcc(a);
1021 output_w32(0x54000000 | (offset << 5) | COND_LT);
1024 static void emit_jge(const void *a)
1026 assem_debug("bge %p\n", a);
1027 u_int offset = genjmpcc(a);
1028 output_w32(0x54000000 | (offset << 5) | COND_GE);
1031 static void emit_jo(const void *a)
1033 assem_debug("bvs %p\n", a);
1034 u_int offset = genjmpcc(a);
1035 output_w32(0x54000000 | (offset << 5) | COND_VS);
1038 static void emit_jno(const void *a)
1040 assem_debug("bvc %p\n", a);
1041 u_int offset = genjmpcc(a);
1042 output_w32(0x54000000 | (offset << 5) | COND_VC);
1045 static void emit_jc(const void *a)
1047 assem_debug("bcs %p\n", a);
1048 u_int offset = genjmpcc(a);
1049 output_w32(0x54000000 | (offset << 5) | COND_CS);
1052 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
1054 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
1055 u_int offset = genjmpcc(a);
1056 is64 = is64 ? 0x80000000 : 0;
1057 isnz = isnz ? 0x01000000 : 0;
1058 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1061 static void *emit_cbz(u_int r, const void *a)
1064 emit_cb(0, 0, a, r);
1068 static void emit_jmpreg(u_int r)
1070 assem_debug("br %s\n", regname64[r]);
1071 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1074 static void emit_retreg(u_int r)
1076 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1077 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1080 static void emit_ret(void)
1085 static void emit_adr(void *addr, u_int rt)
1087 intptr_t offset = (u_char *)addr - out;
1088 assert(-1048576 <= offset && offset < 1048576);
1090 assem_debug("adr x%d,#%#lx\n", rt, offset);
1091 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1094 static void emit_adrp(void *addr, u_int rt)
1096 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1097 assert(-4294967296l <= offset && offset < 4294967296l);
1100 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1101 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1104 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1106 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1107 assert(-256 <= offset && offset < 256);
1108 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1111 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1113 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1114 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1117 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1119 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1120 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1123 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1125 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1126 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1129 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1131 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1132 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1134 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1136 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1138 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1139 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1142 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1144 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1145 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1148 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1150 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1151 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1154 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1156 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1157 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1160 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1162 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1163 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1166 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1168 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1169 assert(-256 <= offset && offset < 256);
1170 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1173 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1175 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1176 assert(-256 <= offset && offset < 256);
1177 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1180 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1182 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1183 assert(-256 <= offset && offset < 256);
1184 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1187 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1189 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1190 assert(-256 <= offset && offset < 256);
1191 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1194 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1196 if (!(offset & 3) && (u_int)offset <= 16380) {
1197 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1198 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1200 else if (-256 <= offset && offset < 256) {
1201 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1202 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1208 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1210 if (!(offset & 1) && (u_int)offset <= 8190) {
1211 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1212 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1214 else if (-256 <= offset && offset < 256) {
1215 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1216 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1222 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1224 if ((u_int)offset < 4096) {
1225 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1226 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1228 else if (-256 <= offset && offset < 256) {
1229 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1230 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1236 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1238 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1239 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1242 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1244 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1245 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1248 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1250 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1251 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1254 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1256 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1257 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1260 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1262 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1263 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1266 static void emit_clz(u_int rs, u_int rt)
1268 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1269 output_w32(0x5ac01000 | rn_rd(rs, rt));
1272 // special case for checking invalid_code
1273 static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1275 emit_shrimm(r, 12, rt);
1276 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1277 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1280 // special for loadlr_assemble, rs2 is destroyed
1281 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1283 emit_shl(rs2, shift, rs2);
1284 emit_bic(rs1, rs2, rt);
1287 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1289 emit_shr(rs2, shift, rs2);
1290 emit_bic(rs1, rs2, rt);
1293 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1295 u_int op = 0xb9000000;
1296 unused const char *ldst = is_st ? "st" : "ld";
1297 unused char rp = is64 ? 'x' : 'w';
1298 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1299 is64 = is64 ? 1 : 0;
1300 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1301 ofs = (ofs >> (2+is64));
1302 if (!is_st) op |= 0x00400000;
1303 if (is64) op |= 0x40000000;
1304 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1307 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1309 u_int op = 0x29000000;
1310 unused const char *ldst = is_st ? "st" : "ld";
1311 unused char rp = is64 ? 'x' : 'w';
1312 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1313 is64 = is64 ? 1 : 0;
1314 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1315 ofs = (ofs >> (2+is64));
1316 assert(-64 <= ofs && ofs <= 63);
1318 if (!is_st) op |= 0x00400000;
1319 if (is64) op |= 0x80000000;
1320 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1323 static void save_load_regs_all(int is_store, u_int reglist)
1327 for (r = 0; reglist; r++, reglist >>= 1) {
1331 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1337 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1340 assert(ofs <= SSP_CALLER_REGS);
1343 // Save registers before function call
1344 static void save_regs(u_int reglist)
1346 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1347 save_load_regs_all(1, reglist);
1350 // Restore registers after function call
1351 static void restore_regs(u_int reglist)
1353 reglist &= CALLER_SAVE_REGS;
1354 save_load_regs_all(0, reglist);
1357 /* Stubs/epilogue */
1359 static void literal_pool(int n)
1364 static void literal_pool_jumpover(int n)
1368 // parsed by get_pointer, find_extjump_insn
1369 static void emit_extjump(u_char *addr, u_int target)
1371 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1373 emit_movz(target & 0xffff, 0);
1374 emit_movk_lsl16(target >> 16, 0);
1376 // addr is in the current recompiled block (max 256k)
1377 // offset shouldn't exceed +/-1MB
1379 emit_far_jump(dyna_linker);
1382 static void check_extjump2(void *src)
1385 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1389 // put rt_val into rt, potentially making use of rs with value rs_val
1390 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1392 int diff = rt_val - rs_val;
1393 if ((-4096 < diff && diff < 4096)
1394 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1395 emit_addimm(rs, diff, rt);
1396 else if (rt_val == ~rs_val)
1398 else if (is_rotated_mask(rs_val ^ rt_val))
1399 emit_xorimm(rs, rs_val ^ rt_val, rt);
1401 emit_movimm(rt_val, rt);
1404 // return 1 if the above function can do it's job cheaply
1405 static int is_similar_value(u_int v1, u_int v2)
1408 return (-4096 < diff && diff < 4096)
1409 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1411 || is_rotated_mask(v1 ^ v2);
1414 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1416 if (rt_val < 0x100000000ull) {
1417 emit_movimm_from(rs_val, rs, rt_val, rt);
1420 // just move the whole thing. At least on Linux all addresses
1421 // seem to be 48bit, so 3 insns - not great not terrible
1422 emit_movimm64(rt_val, rt);
1426 static void pass_args64(u_int a0, u_int a1)
1430 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1432 else if(a0!=0&&a1==0) {
1434 if (a0>=0) emit_mov64(a0,0);
1437 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1438 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1442 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1445 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1447 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1448 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1450 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1452 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1457 #include "pcsxmem.h"
1458 //#include "pcsxmem_inline.c"
1460 static void do_readstub(int n)
1462 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1463 set_jump_target(stubs[n].addr, out);
1464 enum stub_type type = stubs[n].type;
1466 int rs = stubs[n].b;
1467 const struct regstat *i_regs = (void *)stubs[n].c;
1468 int adj = (int)stubs[n].d;
1469 u_int reglist = stubs[n].e;
1470 const signed char *i_regmap = i_regs->regmap;
1472 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1473 rt=get_reg(i_regmap,FTEMP);
1475 rt=get_reg(i_regmap,dops[i].rt1);
1478 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1479 void *restore_jump = NULL, *handler_jump = NULL;
1481 for (r = 0; r < HOST_CCREG; r++) {
1482 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1487 if(rt>=0&&dops[i].rt1!=0)
1494 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1496 emit_readdword(&mem_rtab,temp);
1497 emit_shrimm(rs,12,temp2);
1498 emit_readdword_dualindexedx8(temp,temp2,temp2);
1499 emit_adds64(temp2,temp2,temp2);
1502 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1504 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1505 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1506 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1507 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1508 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1514 emit_jmp(0); // jump to reg restore
1517 emit_jmp(stubs[n].retaddr); // return address
1518 set_jump_target(handler_jump, out);
1523 if(type==LOADB_STUB||type==LOADBU_STUB)
1524 handler=jump_handler_read8;
1525 if(type==LOADH_STUB||type==LOADHU_STUB)
1526 handler=jump_handler_read16;
1527 if(type==LOADW_STUB)
1528 handler=jump_handler_read32;
1530 pass_args64(rs,temp2);
1532 cc = cc_use = get_reg(i_regmap, CCREG);
1534 emit_loadreg(CCREG, (cc_use = 2));
1535 emit_addimm(cc_use, adj, 2);
1537 emit_far_call(handler);
1540 // cycle reload for read32 only (value in w2 both in and out)
1541 if (type == LOADW_STUB) {
1542 emit_addimm(2, -adj, cc_use);
1544 emit_storereg(CCREG, cc_use);
1547 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1548 loadstore_extend(type,0,rt);
1551 set_jump_target(restore_jump, out);
1552 restore_regs(reglist);
1553 emit_jmp(stubs[n].retaddr);
1556 static void inline_readstub(enum stub_type type, int i, u_int addr,
1557 const signed char regmap[], int target, int adj, u_int reglist)
1559 int ra = cinfo[i].addr;
1560 int rt = get_reg(regmap, target);
1563 uintptr_t host_addr = 0;
1566 cc = cc_use = get_reg(regmap, CCREG);
1567 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
1569 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1570 if (handler == NULL) {
1571 if(rt<0||dops[i].rt1==0)
1573 if (addr != host_addr)
1574 emit_movimm_from64(addr, ra, host_addr, ra);
1576 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1577 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1578 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1579 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1580 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
1585 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1587 if(type==LOADB_STUB||type==LOADBU_STUB)
1588 handler=jump_handler_read8;
1589 if(type==LOADH_STUB||type==LOADHU_STUB)
1590 handler=jump_handler_read16;
1591 if(type==LOADW_STUB)
1592 handler=jump_handler_read32;
1595 // call a memhandler
1596 if(rt>=0&&dops[i].rt1!=0)
1600 emit_movimm(addr,0);
1604 emit_loadreg(CCREG, (cc_use = 2));
1605 emit_addimm(cc_use, adj, 2);
1607 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1608 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1609 if (-4294967296l <= offset && offset < 4294967296l) {
1610 emit_adrp((void *)l1, 1);
1611 emit_addimm64(1, l1 & 0xfff, 1);
1614 emit_movimm64(l1, 1);
1617 emit_far_call(do_memhandler_pre);
1619 emit_far_call(handler);
1622 // cycle reload for read32 only (value in w2 both in and out)
1623 if (type == LOADW_STUB) {
1625 emit_far_call(do_memhandler_post);
1626 emit_addimm(2, -adj, cc_use);
1628 emit_storereg(CCREG, cc_use);
1631 if(rt>=0&&dops[i].rt1!=0)
1632 loadstore_extend(type, 0, rt);
1633 restore_regs(reglist);
1636 static void do_writestub(int n)
1638 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1639 set_jump_target(stubs[n].addr, out);
1640 enum stub_type type=stubs[n].type;
1643 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1644 int adj = (int)stubs[n].d;
1645 u_int reglist=stubs[n].e;
1646 signed char *i_regmap=i_regs->regmap;
1648 if(dops[i].itype==C2LS) {
1649 rt=get_reg(i_regmap,r=FTEMP);
1651 rt=get_reg(i_regmap,r=dops[i].rs2);
1655 int rtmp,temp=-1,temp2,regs_saved=0;
1656 void *restore_jump = NULL, *handler_jump = NULL;
1657 int reglist2=reglist|(1<<rs)|(1<<rt);
1658 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1659 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1667 for(rtmp=0;rtmp<=3;rtmp++)
1668 if(rtmp!=rs&&rtmp!=rt)
1671 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1674 host_tempreg_acquire();
1677 emit_readdword(&mem_wtab,temp);
1678 emit_shrimm(rs,12,temp2);
1679 emit_readdword_dualindexedx8(temp,temp2,temp2);
1680 emit_adds64(temp2,temp2,temp2);
1684 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1685 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1686 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1691 emit_jmp(0); // jump to reg restore
1694 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1695 set_jump_target(handler_jump, out);
1701 case STOREB_STUB: handler=jump_handler_write8; break;
1702 case STOREH_STUB: handler=jump_handler_write16; break;
1703 case STOREW_STUB: handler=jump_handler_write32; break;
1709 emit_mov64(temp2,3);
1710 host_tempreg_release();
1713 cc = cc_use = get_reg(i_regmap, CCREG);
1715 emit_loadreg(CCREG, (cc_use = 2));
1716 emit_addimm(cc_use, adj, 2);
1718 emit_far_call(handler);
1720 // new cycle_count returned in x2
1721 emit_addimm(2, -adj, cc_use);
1723 emit_storereg(CCREG, cc_use);
1725 set_jump_target(restore_jump, out);
1726 restore_regs(reglist);
1727 emit_jmp(stubs[n].retaddr);
1730 static void inline_writestub(enum stub_type type, int i, u_int addr,
1731 const signed char regmap[], int target, int adj, u_int reglist)
1733 int ra = cinfo[i].addr;
1734 int rt = get_reg(regmap,target);
1737 uintptr_t host_addr = 0;
1738 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1739 if (handler == NULL) {
1740 if (addr != host_addr)
1741 emit_movimm_from64(addr, ra, host_addr, ra);
1743 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1744 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1745 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
1751 // call a memhandler
1753 emit_writeword(ra, &address); // some handlers still need it
1754 loadstore_extend(type, rt, 0);
1756 cc = cc_use = get_reg(regmap, CCREG);
1758 emit_loadreg(CCREG, (cc_use = 2));
1759 emit_addimm(cc_use, adj, 2);
1761 emit_far_call(do_memhandler_pre);
1762 emit_far_call(handler);
1763 emit_far_call(do_memhandler_post);
1764 emit_addimm(2, -adj, cc_use);
1766 emit_storereg(CCREG, cc_use);
1767 restore_regs(reglist);
1772 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1774 save_load_regs_all(1, reglist);
1775 cop2_do_stall_check(op, i, i_regs, 0);
1778 emit_far_call(pcnt_gte_start);
1780 // pointer to cop2 regs
1781 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1784 static void c2op_epilogue(u_int op,u_int reglist)
1788 emit_far_call(pcnt_gte_end);
1790 save_load_regs_all(0, reglist);
1793 static void c2op_assemble(int i, const struct regstat *i_regs)
1795 u_int c2op=source[i]&0x3f;
1796 u_int hr,reglist_full=0,reglist;
1797 int need_flags,need_ir;
1798 for(hr=0;hr<HOST_REGS;hr++) {
1799 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1801 reglist=reglist_full&CALLER_SAVE_REGS;
1803 if (gte_handlers[c2op]!=NULL) {
1804 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1805 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1806 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1807 source[i],gte_unneeded[i+1],need_flags,need_ir);
1808 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1810 //int shift = (source[i] >> 19) & 1;
1811 //int lm = (source[i] >> 10) & 1;
1815 c2op_prologue(c2op, i, i_regs, reglist);
1816 emit_movimm(source[i],1); // opcode
1817 emit_writeword(1,&psxRegs.code);
1818 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1821 c2op_epilogue(c2op,reglist);
1825 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1827 //value = value & 0x7ffff000;
1828 //if (value & 0x7f87e000) value |= 0x80000000;
1829 emit_andimm(sl, 0x7fffe000, temp);
1830 emit_testimm(temp, 0xff87ffff);
1831 emit_andimm(sl, 0x7ffff000, temp);
1832 host_tempreg_acquire();
1833 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1834 emit_cmovne_reg(HOST_TEMPREG, temp);
1835 host_tempreg_release();
1836 assert(0); // testing needed
1839 static void do_mfc2_31_one(u_int copr,signed char temp)
1841 emit_readshword(®_cop2d[copr],temp);
1842 emit_bicsar_imm(temp,31,temp);
1843 emit_cmpimm(temp,0xf80);
1844 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1845 emit_andimm(temp,0xf80,temp);
1848 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1851 host_tempreg_acquire();
1852 temp = HOST_TEMPREG;
1854 do_mfc2_31_one(9,temp);
1855 emit_shrimm(temp,7,tl);
1856 do_mfc2_31_one(10,temp);
1857 emit_orrshr_imm(temp,2,tl);
1858 do_mfc2_31_one(11,temp);
1859 emit_orrshl_imm(temp,3,tl);
1860 emit_writeword(tl,®_cop2d[29]);
1862 if (temp == HOST_TEMPREG)
1863 host_tempreg_release();
1866 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1872 if(dops[i].rs1&&dops[i].rs2)
1874 switch(dops[i].opcode2)
1879 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1880 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1881 signed char hi=get_reg(i_regs->regmap,HIREG);
1882 signed char lo=get_reg(i_regs->regmap,LOREG);
1888 if(dops[i].opcode2==0x18) // MULT
1889 emit_smull(m1,m2,hi);
1891 emit_umull(m1,m2,hi);
1894 emit_shrimm64(hi,32,hi);
1900 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1901 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1902 signed char quotient=get_reg(i_regs->regmap,LOREG);
1903 signed char remainder=get_reg(i_regs->regmap,HIREG);
1904 assert(numerator>=0);
1905 assert(denominator>=0);
1906 assert(quotient>=0);
1907 assert(remainder>=0);
1909 if (dops[i].opcode2 == 0x1A) // DIV
1910 emit_sdiv(numerator,denominator,quotient);
1912 emit_udiv(numerator,denominator,quotient);
1913 emit_msub(quotient,denominator,numerator,remainder);
1915 // div 0 quotient (remainder is already correct)
1916 host_tempreg_acquire();
1917 if (dops[i].opcode2 == 0x1A) { // DIV
1918 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1919 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1922 emit_movimm(~0,HOST_TEMPREG);
1923 emit_test(denominator,denominator);
1924 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1925 host_tempreg_release();
1934 signed char hr=get_reg(i_regs->regmap,HIREG);
1935 signed char lr=get_reg(i_regs->regmap,LOREG);
1936 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1939 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1940 assert(numerator >= 0);
1942 emit_mov(numerator,hr);
1944 if (dops[i].opcode2 == 0x1A) { // DIV
1945 emit_add_lsrimm(WZR,numerator,31,lr);
1946 emit_orn_asrimm(lr,numerator,31,lr);
1953 if (hr >= 0) emit_zeroreg(hr);
1954 if (lr >= 0) emit_movimm(~0,lr);
1957 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1959 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1960 assert(denominator >= 0);
1961 if (hr >= 0) emit_zeroreg(hr);
1964 emit_test(denominator, denominator);
1965 emit_csinvne_reg(lr, lr, lr);
1970 // Multiply by zero is zero.
1971 if (hr >= 0) emit_zeroreg(hr);
1972 if (lr >= 0) emit_zeroreg(lr);
1976 #define multdiv_assemble multdiv_assemble_arm64
1978 // wb_dirtys making use of stp when possible
1979 static void wb_dirtys(const signed char i_regmap[], u_int i_dirty)
1981 signed char mregs[34+1];
1983 memset(mregs, -1, sizeof(mregs));
1984 for (hr = 0; hr < HOST_REGS; hr++) {
1986 if (hr == EXCLUDE_REG || r <= 0 || r == CCREG)
1988 if (!((i_dirty >> hr) & 1))
1993 for (r = 1; r < 34; r++) {
1996 if (mregs[r+1] >= 0) {
1997 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
1998 emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset);
2002 emit_storereg(r, mregs[r]);
2005 #define wb_dirtys wb_dirtys
2007 static void load_all_regs(const signed char i_regmap[])
2009 signed char mregs[34+1];
2011 memset(mregs, -1, sizeof(mregs));
2012 for (hr = 0; hr < HOST_REGS; hr++) {
2014 if (hr == EXCLUDE_REG || r < 0 || r == CCREG)
2018 else if (r < TEMPREG)
2019 emit_loadreg(r, hr);
2022 emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc
2023 for (r = 1; r < 34; r++) {
2026 if (mregs[r+1] >= 0) {
2027 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
2028 emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset);
2032 emit_loadreg(r, mregs[r]);
2035 #define load_all_regs load_all_regs
2037 static void do_jump_vaddr(u_int rs)
2041 emit_far_call(ndrc_get_addr_ht);
2045 static void do_preload_rhash(u_int r) {
2046 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2047 // register. On ARM the hash can be done with a single instruction (below)
2050 static void do_preload_rhtbl(u_int ht) {
2051 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
2054 static void do_rhash(u_int rs,u_int rh) {
2055 emit_andimm(rs, 0xf8, rh);
2058 static void do_miniht_load(int ht, u_int rh) {
2059 emit_add64(ht, rh, ht);
2060 emit_ldst(0, 0, rh, ht, 0);
2063 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2069 set_jump_target(jaddr, out);
2070 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2071 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2075 // parsed by set_jump_target?
2076 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2077 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2078 emit_movk(return_address&0xffff,rt);
2079 add_to_linker(out,return_address,1);
2081 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2082 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2085 static unused void clear_cache_arm64(char *start, char *end)
2087 // Don't rely on GCC's __clear_cache implementation, as it caches
2088 // icache/dcache cache line sizes, that can vary between cores on
2089 // big.LITTLE architectures.
2090 uint64_t addr, ctr_el0;
2091 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2092 size_t isize, dsize;
2094 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2095 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2096 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2098 // use the global minimum cache line size
2099 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2100 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2102 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2103 not required for instruction to data coherence. */
2104 if ((ctr_el0 & (1 << 28)) == 0x0) {
2105 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2106 for (; addr < (uint64_t)end; addr += dsize)
2107 // use "civac" instead of "cvau", as this is the suggested workaround for
2108 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2109 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2111 __asm__ volatile("dsb ish" : : : "memory");
2113 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2114 Unification is not required for instruction to data coherence. */
2115 if ((ctr_el0 & (1 << 29)) == 0x0) {
2116 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2117 for (; addr < (uint64_t)end; addr += isize)
2118 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2120 __asm__ volatile("dsb ish" : : : "memory");
2123 __asm__ volatile("isb" : : : "memory");
2126 // CPU-architecture-specific initialization
2127 static void arch_init(void)
2129 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2130 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2132 assert(!(diff & 3));
2133 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2134 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2135 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2136 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2138 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2141 // vim:shiftwidth=2:expandtab