1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
27 static void set_jump_target_far1(u_int *insn_, void *target)
29 u_int *insn = NDRC_WRITE_OFFSET(insn_);
30 u_int in = *insn & 0xfc000000;
31 intptr_t offset = (u_char *)target - (u_char *)insn_;
32 assert(in == 0x14000000);
33 assert(-134217728 <= offset && offset < 134217728);
34 in |= (offset >> 2) & 0x3ffffff;
38 static void set_jump_target(void *addr, void *target)
40 u_int *ptr = NDRC_WRITE_OFFSET(addr);
41 intptr_t offset = (u_char *)target - (u_char *)addr;
43 if ((*ptr&0xFC000000) == 0x14000000) { // b
44 set_jump_target_far1(addr, target);
46 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
47 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
48 // Conditional branch are limited to +/- 1MB
49 // block max size is 256k so branching beyond the +/- 1MB limit
50 // should only happen when jumping to an already compiled block (see add_jump_out)
51 // a workaround would be to do a trampoline jump via a stub at the end of the block
52 assert(-1048576 <= offset && offset < 1048576);
53 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
55 else if((*ptr&0x9f000000)==0x10000000) { // adr
56 // generated by do_miniht_insert
57 assert(offset>=-1048576LL&&offset<1048576LL);
58 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
61 abort(); // should not happen
64 // from a pointer to external jump stub (which was produced by emit_extjump2)
65 // find where the jumping insn is
66 static void *find_extjump_insn(void *stub)
68 int *ptr = (int *)stub + 2;
69 assert((*ptr&0x9f000000) == 0x10000000); // adr
70 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
71 return ptr + offset / 4;
74 // Allocate a specific ARM register.
75 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
80 // see if it's already allocated (and dealloc it)
81 for(n=0;n<HOST_REGS;n++)
83 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
84 dirty=(cur->dirty>>n)&1;
91 cur->dirty|=dirty<<hr;
92 cur->isconst&=~(1<<hr);
95 // Alloc cycle count into dedicated register
96 static void alloc_cc(struct regstat *cur, int i)
98 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
101 static void alloc_cc_optional(struct regstat *cur, int i)
103 if (cur->regmap[HOST_CCREG] < 0) {
104 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
105 cur->noevict &= ~(1u << HOST_CCREG);
114 static attr_unused const char *regname[32] = {
115 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
116 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
117 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
118 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
121 static attr_unused const char *regname64[32] = {
122 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
123 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
124 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
125 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
130 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
133 static attr_unused const char *condname[16] = {
134 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
135 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
138 static void output_w32(u_int word)
140 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
144 static u_int rn_rd(u_int rn, u_int rd)
148 return (rn << 5) | rd;
151 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
156 return (rm << 16) | (rn << 5) | rd;
159 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
162 return rm_rn_rd(rm, rn, rd) | (ra << 10);
165 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
171 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
174 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
177 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
180 static u_int imm16_rd(u_int imm16, u_int rd)
182 assert(imm16 < 0x10000);
184 return (imm16 << 5) | rd;
187 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
189 assert(imm12 < 0x1000);
192 return (imm12 << 10) | (rn << 5) | rd;
195 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
197 assert(imm9 < 0x200);
200 return (imm9 << 12) | (rn << 5) | rd;
203 static u_int imm19_rt(u_int imm19, u_int rt)
205 assert(imm19 < 0x80000);
207 return (imm19 << 5) | rt;
210 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
217 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
220 static u_int genjmp(const u_char *addr)
222 intptr_t offset = addr - out;
223 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
224 if (offset < -134217728 || offset > 134217727) {
225 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 return ((u_int)offset >> 2) & 0x03ffffff;
232 static u_int genjmpcc(const u_char *addr)
234 intptr_t offset = addr - out;
235 if ((uintptr_t)addr < 3) return 0;
236 if (offset < -1048576 || offset > 1048572) {
237 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 return ((u_int)offset >> 2) & 0x7ffff;
244 static uint32_t is_mask(u_int value)
246 return value && ((value + 1) & value) == 0;
249 // This function returns true if the argument contains a
250 // non-empty sequence of ones (possibly rotated) with the remainder zero.
251 static uint32_t is_rotated_mask(u_int value)
253 if (value == 0 || value == ~0)
255 if (is_mask((value - 1) | value))
257 return is_mask((~value - 1) | ~value);
260 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
262 int lzeros, tzeros, ones;
264 if (is_mask((value - 1) | value)) {
265 lzeros = __builtin_clz(value);
266 tzeros = __builtin_ctz(value);
267 ones = 32 - lzeros - tzeros;
268 *immr = (32 - tzeros) & 31;
273 if (is_mask((value - 1) | value)) {
274 lzeros = __builtin_clz(value);
275 tzeros = __builtin_ctz(value);
276 ones = 32 - lzeros - tzeros;
284 static void emit_mov(u_int rs, u_int rt)
286 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
287 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
290 static void emit_mov64(u_int rs, u_int rt)
292 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
293 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
296 static void emit_add(u_int rs1, u_int rs2, u_int rt)
298 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
299 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
302 static void emit_adds(u_int rs1, u_int rs2, u_int rt)
304 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
305 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
308 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
310 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
311 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
314 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
316 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
317 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
319 #define emit_adds_ptr emit_adds64
321 static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
323 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
324 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
327 static void emit_neg(u_int rs, u_int rt)
329 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
330 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
333 static void emit_negs(u_int rs, u_int rt)
335 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
339 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
345 static void emit_subs(u_int rs1, u_int rs2, u_int rt)
347 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
348 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
351 static attr_unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
353 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
354 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
357 static void emit_movz(u_int imm, u_int rt)
359 assem_debug("movz %s,#%#x\n", regname[rt], imm);
360 output_w32(0x52800000 | imm16_rd(imm, rt));
363 static void emit_movz_lsl16(u_int imm, u_int rt)
365 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
366 output_w32(0x52a00000 | imm16_rd(imm, rt));
369 static void emit_movn(u_int imm, u_int rt)
371 assem_debug("movn %s,#%#x\n", regname[rt], imm);
372 output_w32(0x12800000 | imm16_rd(imm, rt));
375 static void emit_movn_lsl16(u_int imm,u_int rt)
377 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
378 output_w32(0x12a00000 | imm16_rd(imm, rt));
381 static void emit_movk(u_int imm,u_int rt)
383 assem_debug("movk %s,#%#x\n", regname[rt], imm);
384 output_w32(0x72800000 | imm16_rd(imm, rt));
387 static void emit_movk_lsl16(u_int imm,u_int rt)
390 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
391 output_w32(0x72a00000 | imm16_rd(imm, rt));
394 static void emit_zeroreg(u_int rt)
399 static void emit_movimm(u_int imm, u_int rt)
403 else if ((~imm) < 65536)
405 else if ((imm&0xffff) == 0)
406 emit_movz_lsl16(imm >> 16, rt);
407 else if (((~imm)&0xffff) == 0)
408 emit_movn_lsl16(~imm >> 16, rt);
409 else if (is_rotated_mask(imm)) {
411 gen_logical_imm(imm, &immr, &imms);
412 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
413 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
416 emit_movz(imm & 0xffff, rt);
417 emit_movk_lsl16(imm >> 16, rt);
421 static void emit_movimm64(uint64_t imm, u_int rt)
423 u_int shift, op, imm16, insns = 0;
424 for (shift = 0; shift < 4; shift++) {
425 imm16 = (imm >> shift * 16) & 0xffff;
428 op = insns ? 0xf2800000 : 0xd2800000;
429 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
431 assem_debug(",lsl #%u", shift * 16);
433 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
437 assem_debug("movz %s,#0\n", regname64[rt]);
438 output_w32(0xd2800000 | imm16_rd(0, rt));
442 static void emit_readword(void *addr, u_int rt)
444 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
445 if (!(offset & 3) && offset <= 16380) {
446 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
447 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
453 static void emit_readdword(void *addr, u_int rt)
455 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
456 if (!(offset & 7) && offset <= 32760) {
457 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
458 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
463 #define emit_readptr emit_readdword
465 static void emit_readshword(void *addr, u_int rt)
467 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
468 if (!(offset & 1) && offset <= 8190) {
469 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
470 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
476 static void emit_loadreg(u_int r, u_int hr)
484 //case HIREG: addr = &hi; break;
485 //case LOREG: addr = &lo; break;
486 case CCREG: addr = &cycle_count; break;
487 case INVCP: addr = &invc_ptr; is64 = 1; break;
488 case ROREG: addr = &ram_offset; is64 = 1; break;
491 addr = &psxRegs.GPR.r[r];
495 emit_readdword(addr, hr);
497 emit_readword(addr, hr);
501 static void emit_writeword(u_int rt, void *addr)
503 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
504 if (!(offset & 3) && offset <= 16380) {
505 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
506 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
512 static void emit_writedword(u_int rt, void *addr)
514 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
515 if (!(offset & 7) && offset <= 32760) {
516 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
517 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
523 static void emit_storereg(u_int r, u_int hr)
528 //case HIREG: addr = &hi; break;
529 //case LOREG: addr = &lo; break;
530 case CCREG: addr = &cycle_count; break;
531 default: assert(r < 34u); addr = &psxRegs.GPR.r[r]; break;
533 emit_writeword(hr, addr);
536 static void emit_test(u_int rs, u_int rt)
538 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
539 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
542 static void emit_testimm(u_int rs, u_int imm)
545 assem_debug("tst %s,#%#x\n", regname[rs], imm);
546 assert(is_rotated_mask(imm)); // good enough for PCSX
547 gen_logical_imm(imm, &immr, &imms);
548 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
551 static void emit_not(u_int rs,u_int rt)
553 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
554 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
557 static void emit_and(u_int rs1,u_int rs2,u_int rt)
559 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
560 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
563 static void emit_or(u_int rs1,u_int rs2,u_int rt)
565 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
566 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
569 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
571 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
572 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
575 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
577 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
578 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
581 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
583 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
584 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
587 static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
589 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
590 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
593 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
595 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
596 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
599 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
601 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
602 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
605 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
607 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
608 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
611 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
613 attr_unused const char *st = s ? "s" : "";
614 s = s ? 0x20000000 : 0;
615 is64 = is64 ? 0x80000000 : 0;
617 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
618 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
620 else if (-imm < 4096) {
621 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
622 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
624 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
625 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
626 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
628 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
629 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
632 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
633 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
634 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
636 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
637 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
644 host_tempreg_acquire();
647 emit_movimm(imm, tmp);
648 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
649 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
650 if (tmp == HOST_TEMPREG)
651 host_tempreg_release();
655 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
661 emit_addimm_s(0, 0, rs, imm, rt);
664 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
666 emit_addimm_s(0, 1, rs, imm, rt);
669 static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
671 emit_addimm64(rs, imm, rt);
674 static void emit_addimm_and_set_flags(int imm, u_int rt)
676 emit_addimm_s(1, 0, rt, imm, rt);
679 static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
681 emit_addimm_s(1, 0, rs, imm, rt);
684 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
686 const char *names[] = { "and", "orr", "eor", "ands" };
687 const char *name = names[op];
690 if (is_rotated_mask(imm)) {
691 gen_logical_imm(imm, &immr, &imms);
692 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
693 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
696 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
697 host_tempreg_acquire();
698 emit_movimm(imm, HOST_TEMPREG);
699 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
700 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
701 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
702 host_tempreg_release();
707 static void emit_andimm(u_int rs, u_int imm, u_int rt)
712 emit_logicop_imm(0, rs, imm, rt);
715 static void emit_orimm(u_int rs, u_int imm, u_int rt)
722 emit_logicop_imm(1, rs, imm, rt);
725 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
732 emit_logicop_imm(2, rs, imm, rt);
735 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
737 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
738 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
741 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
743 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
744 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
747 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
749 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
750 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
753 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
755 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
756 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
759 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
761 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
762 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
765 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
767 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
768 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
771 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
773 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
777 static void emit_signextend16(u_int rs, u_int rt)
779 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
780 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
783 static void emit_shl(u_int rs,u_int rshift,u_int rt)
785 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
786 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
789 static void emit_shr(u_int rs,u_int rshift,u_int rt)
791 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
792 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
795 static void emit_sar(u_int rs,u_int rshift,u_int rt)
797 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
798 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
801 static void emit_cmpimm(u_int rs, u_int imm)
804 assem_debug("cmp %s,%#x\n", regname[rs], imm);
805 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
807 else if (-imm < 4096) {
808 assem_debug("cmn %s,%#x\n", regname[rs], imm);
809 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
811 else if (imm < 16777216 && !(imm & 0xfff)) {
812 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
813 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
816 host_tempreg_acquire();
817 emit_movimm(imm, HOST_TEMPREG);
818 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
819 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
820 host_tempreg_release();
824 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
826 assert(imm == 0 || imm == 1);
827 assert(cond0 < 0x10);
828 assert(cond1 < 0x10);
830 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
831 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
833 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
834 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
838 static void emit_cmovne_imm(u_int imm,u_int rt)
840 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
843 static void emit_cmovl_imm(u_int imm,u_int rt)
845 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
848 static void emit_cmovb_imm(int imm,u_int rt)
850 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
853 static void emit_cmoveq_reg(u_int rs,u_int rt)
855 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
856 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
859 static void emit_cmovne_reg(u_int rs,u_int rt)
861 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
862 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
865 static void emit_cmovl_reg(u_int rs,u_int rt)
867 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
868 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
871 static void emit_cmovb_reg(u_int rs,u_int rt)
873 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
874 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
877 static void emit_cmovs_reg(u_int rs,u_int rt)
879 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
880 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
883 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
885 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
886 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
889 static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
891 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
892 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
895 static void emit_slti32(u_int rs,int imm,u_int rt)
897 if(rs!=rt) emit_zeroreg(rt);
899 if(rs==rt) emit_movimm(0,rt);
900 emit_cmovl_imm(1,rt);
903 static void emit_sltiu32(u_int rs,int imm,u_int rt)
905 if(rs!=rt) emit_zeroreg(rt);
907 if(rs==rt) emit_movimm(0,rt);
908 emit_cmovb_imm(1,rt);
911 static void emit_cmp(u_int rs,u_int rt)
913 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
914 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
917 static void emit_cmpcs(u_int rs,u_int rt)
919 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
920 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
923 static void emit_set_gz32(u_int rs, u_int rt)
925 //assem_debug("set_gz32\n");
928 emit_cmovl_imm(0,rt);
931 static void emit_set_nz32(u_int rs, u_int rt)
933 //assem_debug("set_nz32\n");
934 if(rs!=rt) emit_mov(rs,rt);
936 emit_cmovne_imm(1,rt);
939 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
941 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
942 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
944 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
945 emit_cmovl_imm(1,rt);
948 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
950 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
951 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
953 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
954 emit_cmovb_imm(1,rt);
957 static int can_jump_or_call(const void *a)
959 intptr_t diff = (u_char *)a - out;
960 return (-134217728 <= diff && diff <= 134217727);
963 static void emit_call(const void *a)
965 intptr_t diff = (u_char *)a - out;
966 assem_debug("bl %p%s\n", log_addr(a), func_name(a));
968 if (-134217728 <= diff && diff <= 134217727)
969 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
974 static void emit_jmp(const void *a)
976 assem_debug("b %p%s\n", log_addr(a), func_name(a));
977 u_int offset = genjmp(a);
978 output_w32(0x14000000 | offset);
981 static void emit_jne(const void *a)
983 assem_debug("bne %p\n", log_addr(a));
984 u_int offset = genjmpcc(a);
985 output_w32(0x54000000 | (offset << 5) | COND_NE);
988 static void emit_jeq(const void *a)
990 assem_debug("beq %p\n", log_addr(a));
991 u_int offset = genjmpcc(a);
992 output_w32(0x54000000 | (offset << 5) | COND_EQ);
995 static void emit_js(const void *a)
997 assem_debug("bmi %p\n", log_addr(a));
998 u_int offset = genjmpcc(a);
999 output_w32(0x54000000 | (offset << 5) | COND_MI);
1002 static void emit_jns(const void *a)
1004 assem_debug("bpl %p\n", log_addr(a));
1005 u_int offset = genjmpcc(a);
1006 output_w32(0x54000000 | (offset << 5) | COND_PL);
1009 static void emit_jl(const void *a)
1011 assem_debug("blt %p\n", log_addr(a));
1012 u_int offset = genjmpcc(a);
1013 output_w32(0x54000000 | (offset << 5) | COND_LT);
1016 static void emit_jge(const void *a)
1018 assem_debug("bge %p\n", log_addr(a));
1019 u_int offset = genjmpcc(a);
1020 output_w32(0x54000000 | (offset << 5) | COND_GE);
1023 static void emit_jo(const void *a)
1025 assem_debug("bvs %p\n", log_addr(a));
1026 u_int offset = genjmpcc(a);
1027 output_w32(0x54000000 | (offset << 5) | COND_VS);
1030 static void emit_jno(const void *a)
1032 assem_debug("bvc %p\n", log_addr(a));
1033 u_int offset = genjmpcc(a);
1034 output_w32(0x54000000 | (offset << 5) | COND_VC);
1037 static void emit_jc(const void *a)
1039 assem_debug("bcs %p\n", log_addr(a));
1040 u_int offset = genjmpcc(a);
1041 output_w32(0x54000000 | (offset << 5) | COND_CS);
1044 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
1046 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], log_addr(a));
1047 u_int offset = genjmpcc(a);
1048 is64 = is64 ? 0x80000000 : 0;
1049 isnz = isnz ? 0x01000000 : 0;
1050 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1053 static void *emit_cbz(u_int r, const void *a)
1056 emit_cb(0, 0, a, r);
1060 static void emit_jmpreg(u_int r)
1062 assem_debug("br %s\n", regname64[r]);
1063 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1066 static void emit_retreg(u_int r)
1068 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1069 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1072 static void emit_ret(void)
1077 static void emit_adr(void *addr, u_int rt)
1079 intptr_t offset = (u_char *)addr - out;
1080 assert(-1048576 <= offset && offset < 1048576);
1082 assem_debug("adr x%d,#%#lx\n", rt, offset);
1083 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1086 static void emit_adrp(void *addr, u_int rt)
1088 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1089 assert(-4294967296l <= offset && offset < 4294967296l);
1092 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1093 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1096 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1098 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1103 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1105 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1106 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1109 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1111 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1112 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1115 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1117 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1118 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1121 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1123 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1124 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1126 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1128 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1130 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1131 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1134 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1136 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1137 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1140 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1142 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1143 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1146 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1148 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1149 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1152 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1154 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1155 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1158 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1160 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1161 assert(-256 <= offset && offset < 256);
1162 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1165 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1167 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1168 assert(-256 <= offset && offset < 256);
1169 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1172 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1174 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1175 assert(-256 <= offset && offset < 256);
1176 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1179 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1181 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1182 assert(-256 <= offset && offset < 256);
1183 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1186 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1188 if (!(offset & 3) && (u_int)offset <= 16380) {
1189 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1190 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1192 else if (-256 <= offset && offset < 256) {
1193 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1194 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1200 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1202 if (!(offset & 1) && (u_int)offset <= 8190) {
1203 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1204 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1206 else if (-256 <= offset && offset < 256) {
1207 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1208 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1214 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1216 if ((u_int)offset < 4096) {
1217 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1218 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1220 else if (-256 <= offset && offset < 256) {
1221 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1222 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1228 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1230 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1231 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1234 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1236 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1237 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1240 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1242 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1243 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1246 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1248 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1249 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1252 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1254 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1255 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1258 static void emit_clz(u_int rs, u_int rt)
1260 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1261 output_w32(0x5ac01000 | rn_rd(rs, rt));
1264 // special case for checking invalid_code
1265 static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1267 emit_shrimm(r, 12, rt);
1268 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1269 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1272 // special for loadlr_assemble, rs2 is destroyed
1273 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1275 emit_shl(rs2, shift, rs2);
1276 emit_bic(rs1, rs2, rt);
1279 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1281 emit_shr(rs2, shift, rs2);
1282 emit_bic(rs1, rs2, rt);
1285 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1287 u_int op = 0xb9000000;
1288 attr_unused const char *ldst = is_st ? "st" : "ld";
1289 attr_unused char rp = is64 ? 'x' : 'w';
1290 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1291 is64 = is64 ? 1 : 0;
1292 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1293 ofs = (ofs >> (2+is64));
1294 if (!is_st) op |= 0x00400000;
1295 if (is64) op |= 0x40000000;
1296 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1299 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1301 u_int op = 0x29000000;
1302 attr_unused const char *ldst = is_st ? "st" : "ld";
1303 attr_unused char rp = is64 ? 'x' : 'w';
1304 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1305 is64 = is64 ? 1 : 0;
1306 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1307 ofs = (ofs >> (2+is64));
1308 assert(-64 <= ofs && ofs <= 63);
1310 if (!is_st) op |= 0x00400000;
1311 if (is64) op |= 0x80000000;
1312 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1315 static void save_load_regs_all(int is_store, u_int reglist)
1319 for (r = 0; reglist; r++, reglist >>= 1) {
1323 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1329 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1332 assert(ofs <= SSP_CALLER_REGS);
1335 // Save registers before function call
1336 static void save_regs(u_int reglist)
1338 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1339 save_load_regs_all(1, reglist);
1342 // Restore registers after function call
1343 static void restore_regs(u_int reglist)
1345 reglist &= CALLER_SAVE_REGS;
1346 save_load_regs_all(0, reglist);
1349 /* Stubs/epilogue */
1351 static void literal_pool(int n)
1356 static void literal_pool_jumpover(int n)
1360 // parsed by find_extjump_insn, check_extjump2
1361 static void emit_extjump(u_char *addr, u_int target)
1363 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1365 emit_movz(target & 0xffff, 0);
1366 emit_movk_lsl16(target >> 16, 0);
1368 // addr is in the current recompiled block (max 256k)
1369 // offset shouldn't exceed +/-1MB
1371 emit_far_jump(dyna_linker);
1374 static void check_extjump2(void *src)
1377 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1381 // put rt_val into rt, potentially making use of rs with value rs_val
1382 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1384 int diff = rt_val - rs_val;
1385 if ((-4096 < diff && diff < 4096)
1386 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1387 emit_addimm(rs, diff, rt);
1388 else if (rt_val == ~rs_val)
1390 else if (is_rotated_mask(rs_val ^ rt_val))
1391 emit_xorimm(rs, rs_val ^ rt_val, rt);
1393 emit_movimm(rt_val, rt);
1396 // return 1 if the above function can do it's job cheaply
1397 static int is_similar_value(u_int v1, u_int v2)
1400 return (-4096 < diff && diff < 4096)
1401 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1403 || is_rotated_mask(v1 ^ v2);
1406 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1408 if (rt_val < 0x100000000ull) {
1409 emit_movimm_from(rs_val, rs, rt_val, rt);
1412 // just move the whole thing. At least on Linux all addresses
1413 // seem to be 48bit, so 3 insns - not great not terrible
1414 emit_movimm64(rt_val, rt);
1418 static void pass_args64(u_int a0, u_int a1)
1422 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1424 else if(a0!=0&&a1==0) {
1426 if (a0>=0) emit_mov64(a0,0);
1429 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1430 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1434 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1437 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1439 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1440 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1442 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1444 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1449 #include "pcsxmem.h"
1450 //#include "pcsxmem_inline.c"
1452 static void do_readstub(int n)
1454 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1455 set_jump_target(stubs[n].addr, out);
1456 enum stub_type type = stubs[n].type;
1458 int rs = stubs[n].b;
1459 const struct regstat *i_regs = (void *)stubs[n].c;
1460 int adj = (int)stubs[n].d;
1461 u_int reglist = stubs[n].e;
1462 const signed char *i_regmap = i_regs->regmap;
1464 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1465 rt=get_reg(i_regmap,FTEMP);
1467 rt=get_reg(i_regmap,dops[i].rt1);
1470 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1471 void *restore_jump = NULL, *handler_jump = NULL;
1473 for (r = 0; r < HOST_CCREG; r++) {
1474 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1479 if(rt>=0&&dops[i].rt1!=0)
1486 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1488 emit_readdword(&mem_rtab,temp);
1489 emit_shrimm(rs,12,temp2);
1490 emit_readdword_dualindexedx8(temp,temp2,temp2);
1491 emit_adds64(temp2,temp2,temp2);
1494 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1496 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1497 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1498 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1499 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1500 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1506 emit_jmp(0); // jump to reg restore
1509 emit_jmp(stubs[n].retaddr); // return address
1510 set_jump_target(handler_jump, out);
1515 if(type==LOADB_STUB||type==LOADBU_STUB)
1516 handler=jump_handler_read8;
1517 if(type==LOADH_STUB||type==LOADHU_STUB)
1518 handler=jump_handler_read16;
1519 if(type==LOADW_STUB)
1520 handler=jump_handler_read32;
1522 pass_args64(rs,temp2);
1524 cc = cc_use = get_reg(i_regmap, CCREG);
1526 emit_loadreg(CCREG, (cc_use = 2));
1527 emit_addimm(cc_use, adj, 2);
1529 emit_far_call(handler);
1532 // cycle reload for read32 only (value in w2 both in and out)
1533 if (type == LOADW_STUB) {
1534 emit_addimm(2, -adj, cc_use);
1536 emit_storereg(CCREG, cc_use);
1539 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1540 loadstore_extend(type,0,rt);
1543 set_jump_target(restore_jump, out);
1544 restore_regs(reglist);
1545 emit_jmp(stubs[n].retaddr);
1548 static void inline_readstub(enum stub_type type, int i, u_int addr,
1549 const signed char regmap[], int target, int adj, u_int reglist)
1551 int ra = cinfo[i].addr;
1552 int rt = get_reg(regmap, target);
1555 uintptr_t host_addr = 0;
1558 cc = cc_use = get_reg(regmap, CCREG);
1559 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
1561 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1562 if (handler == NULL) {
1563 if(rt<0||dops[i].rt1==0)
1565 if (addr != host_addr)
1566 emit_movimm_from64(addr, ra, host_addr, ra);
1568 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1569 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1570 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1571 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1572 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
1577 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1579 if(type==LOADB_STUB||type==LOADBU_STUB)
1580 handler=jump_handler_read8;
1581 if(type==LOADH_STUB||type==LOADHU_STUB)
1582 handler=jump_handler_read16;
1583 if(type==LOADW_STUB)
1584 handler=jump_handler_read32;
1587 // call a memhandler
1588 if(rt>=0&&dops[i].rt1!=0)
1592 emit_movimm(addr,0);
1596 emit_loadreg(CCREG, (cc_use = 2));
1597 emit_addimm(cc_use, adj, 2);
1599 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1600 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1601 if (-4294967296l <= offset && offset < 4294967296l) {
1602 emit_adrp((void *)l1, 1);
1603 emit_addimm64(1, l1 & 0xfff, 1);
1606 emit_movimm64(l1, 1);
1609 emit_far_call(do_memhandler_pre);
1611 emit_far_call(handler);
1614 // cycle reload for read32 only (value in w2 both in and out)
1615 if (type == LOADW_STUB) {
1617 emit_far_call(do_memhandler_post);
1618 emit_addimm(2, -adj, cc_use);
1620 emit_storereg(CCREG, cc_use);
1623 if(rt>=0&&dops[i].rt1!=0)
1624 loadstore_extend(type, 0, rt);
1625 restore_regs(reglist);
1628 static void do_writestub(int n)
1630 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1631 set_jump_target(stubs[n].addr, out);
1632 enum stub_type type=stubs[n].type;
1635 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1636 int adj = (int)stubs[n].d;
1637 u_int reglist=stubs[n].e;
1638 signed char *i_regmap=i_regs->regmap;
1640 if(dops[i].itype==C2LS) {
1641 rt=get_reg(i_regmap,r=FTEMP);
1643 rt=get_reg(i_regmap,r=dops[i].rs2);
1647 int rtmp,temp=-1,temp2,regs_saved=0;
1648 void *restore_jump = NULL, *handler_jump = NULL;
1649 int reglist2=reglist|(1<<rs)|(1<<rt);
1650 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1651 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1659 for(rtmp=0;rtmp<=3;rtmp++)
1660 if(rtmp!=rs&&rtmp!=rt)
1663 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1666 host_tempreg_acquire();
1669 emit_readdword(&mem_wtab,temp);
1670 emit_shrimm(rs,12,temp2);
1671 emit_readdword_dualindexedx8(temp,temp2,temp2);
1672 emit_adds64(temp2,temp2,temp2);
1676 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1677 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1678 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1683 emit_jmp(0); // jump to reg restore
1686 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1687 set_jump_target(handler_jump, out);
1693 case STOREB_STUB: handler=jump_handler_write8; break;
1694 case STOREH_STUB: handler=jump_handler_write16; break;
1695 case STOREW_STUB: handler=jump_handler_write32; break;
1701 emit_mov64(temp2,3);
1702 host_tempreg_release();
1705 cc = cc_use = get_reg(i_regmap, CCREG);
1707 emit_loadreg(CCREG, (cc_use = 2));
1708 emit_addimm(cc_use, adj, 2);
1710 emit_far_call(handler);
1712 // new cycle_count returned in x2
1713 emit_addimm(2, -adj, cc_use);
1715 emit_storereg(CCREG, cc_use);
1717 set_jump_target(restore_jump, out);
1718 restore_regs(reglist);
1719 emit_jmp(stubs[n].retaddr);
1722 static void inline_writestub(enum stub_type type, int i, u_int addr,
1723 const signed char regmap[], int target, int adj, u_int reglist)
1725 int ra = cinfo[i].addr;
1726 int rt = get_reg(regmap,target);
1729 uintptr_t host_addr = 0;
1730 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1731 if (handler == NULL) {
1732 if (addr != host_addr)
1733 emit_movimm_from64(addr, ra, host_addr, ra);
1735 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1736 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1737 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
1743 // call a memhandler
1745 emit_writeword(ra, &address); // some handlers still need it
1746 loadstore_extend(type, rt, 0);
1748 cc = cc_use = get_reg(regmap, CCREG);
1750 emit_loadreg(CCREG, (cc_use = 2));
1751 emit_addimm(cc_use, adj, 2);
1753 emit_far_call(do_memhandler_pre);
1754 emit_far_call(handler);
1755 emit_far_call(do_memhandler_post);
1756 emit_addimm(2, -adj, cc_use);
1758 emit_storereg(CCREG, cc_use);
1759 restore_regs(reglist);
1764 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1766 save_load_regs_all(1, reglist);
1767 cop2_do_stall_check(op, i, i_regs, 0);
1770 emit_far_call(pcnt_gte_start);
1772 // pointer to cop2 regs
1773 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1776 static void c2op_epilogue(u_int op,u_int reglist)
1780 emit_far_call(pcnt_gte_end);
1782 save_load_regs_all(0, reglist);
1785 static void c2op_assemble(int i, const struct regstat *i_regs)
1787 u_int c2op=source[i]&0x3f;
1788 u_int hr,reglist_full=0,reglist;
1789 int need_flags,need_ir;
1790 for(hr=0;hr<HOST_REGS;hr++) {
1791 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1793 reglist=reglist_full&CALLER_SAVE_REGS;
1795 if (gte_handlers[c2op]!=NULL) {
1796 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1797 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1798 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1799 source[i],gte_unneeded[i+1],need_flags,need_ir);
1800 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1802 //int shift = (source[i] >> 19) & 1;
1803 //int lm = (source[i] >> 10) & 1;
1807 c2op_prologue(c2op, i, i_regs, reglist);
1808 emit_movimm(source[i],1); // opcode
1809 emit_writeword(1,&psxRegs.code);
1810 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1813 c2op_epilogue(c2op,reglist);
1817 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1819 //value = value & 0x7ffff000;
1820 //if (value & 0x7f87e000) value |= 0x80000000;
1821 emit_andimm(sl, 0x7fffe000, temp);
1822 emit_testimm(temp, 0xff87ffff);
1823 emit_andimm(sl, 0x7ffff000, temp);
1824 host_tempreg_acquire();
1825 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1826 emit_cmovne_reg(HOST_TEMPREG, temp);
1827 host_tempreg_release();
1828 assert(0); // testing needed
1831 static void do_mfc2_31_one(u_int copr,signed char temp)
1833 emit_readshword(®_cop2d[copr],temp);
1834 emit_bicsar_imm(temp,31,temp);
1835 emit_cmpimm(temp,0xf80);
1836 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1837 emit_andimm(temp,0xf80,temp);
1840 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1843 host_tempreg_acquire();
1844 temp = HOST_TEMPREG;
1846 do_mfc2_31_one(9,temp);
1847 emit_shrimm(temp,7,tl);
1848 do_mfc2_31_one(10,temp);
1849 emit_orrshr_imm(temp,2,tl);
1850 do_mfc2_31_one(11,temp);
1851 emit_orrshl_imm(temp,3,tl);
1852 emit_writeword(tl,®_cop2d[29]);
1854 if (temp == HOST_TEMPREG)
1855 host_tempreg_release();
1858 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1864 if(dops[i].rs1&&dops[i].rs2)
1866 switch(dops[i].opcode2)
1871 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1872 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1873 signed char hi=get_reg(i_regs->regmap,HIREG);
1874 signed char lo=get_reg(i_regs->regmap,LOREG);
1880 if(dops[i].opcode2==0x18) // MULT
1881 emit_smull(m1,m2,hi);
1883 emit_umull(m1,m2,hi);
1886 emit_shrimm64(hi,32,hi);
1892 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1893 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1894 signed char quotient=get_reg(i_regs->regmap,LOREG);
1895 signed char remainder=get_reg(i_regs->regmap,HIREG);
1896 assert(numerator>=0);
1897 assert(denominator>=0);
1898 assert(quotient>=0);
1899 assert(remainder>=0);
1901 if (dops[i].opcode2 == 0x1A) // DIV
1902 emit_sdiv(numerator,denominator,quotient);
1904 emit_udiv(numerator,denominator,quotient);
1905 emit_msub(quotient,denominator,numerator,remainder);
1907 // div 0 quotient (remainder is already correct)
1908 host_tempreg_acquire();
1909 if (dops[i].opcode2 == 0x1A) { // DIV
1910 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1911 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1914 emit_movimm(~0,HOST_TEMPREG);
1915 emit_test(denominator,denominator);
1916 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1917 host_tempreg_release();
1926 signed char hr=get_reg(i_regs->regmap,HIREG);
1927 signed char lr=get_reg(i_regs->regmap,LOREG);
1928 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1931 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1932 assert(numerator >= 0);
1934 emit_mov(numerator,hr);
1936 if (dops[i].opcode2 == 0x1A) { // DIV
1937 emit_add_lsrimm(WZR,numerator,31,lr);
1938 emit_orn_asrimm(lr,numerator,31,lr);
1945 if (hr >= 0) emit_zeroreg(hr);
1946 if (lr >= 0) emit_movimm(~0,lr);
1949 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1951 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1952 assert(denominator >= 0);
1953 if (hr >= 0) emit_zeroreg(hr);
1956 emit_test(denominator, denominator);
1957 emit_csinvne_reg(lr, lr, lr);
1962 // Multiply by zero is zero.
1963 if (hr >= 0) emit_zeroreg(hr);
1964 if (lr >= 0) emit_zeroreg(lr);
1968 #define multdiv_assemble multdiv_assemble_arm64
1970 // wb_dirtys making use of stp when possible
1971 static void wb_dirtys(const signed char i_regmap[], u_int i_dirty)
1973 signed char mregs[34+1];
1975 memset(mregs, -1, sizeof(mregs));
1976 for (hr = 0; hr < HOST_REGS; hr++) {
1978 if (hr == EXCLUDE_REG || r <= 0 || r == CCREG)
1980 if (!((i_dirty >> hr) & 1))
1985 for (r = 1; r < 34; r++) {
1988 if (mregs[r+1] >= 0) {
1989 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
1990 emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset);
1994 emit_storereg(r, mregs[r]);
1997 #define wb_dirtys wb_dirtys
1999 static void load_all_regs(const signed char i_regmap[])
2001 signed char mregs[34+1];
2003 memset(mregs, -1, sizeof(mregs));
2004 for (hr = 0; hr < HOST_REGS; hr++) {
2006 if (hr == EXCLUDE_REG || r < 0 || r == CCREG)
2010 else if (r < TEMPREG)
2011 emit_loadreg(r, hr);
2014 emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc
2015 for (r = 1; r < 34; r++) {
2018 if (mregs[r+1] >= 0) {
2019 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
2020 emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset);
2024 emit_loadreg(r, mregs[r]);
2027 #define load_all_regs load_all_regs
2029 static void do_jump_vaddr(u_int rs)
2033 emit_readptr(&hash_table_ptr, 1);
2034 emit_far_call(ndrc_get_addr_ht);
2038 static void do_preload_rhash(u_int r) {
2039 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2040 // register. On ARM the hash can be done with a single instruction (below)
2043 static void do_preload_rhtbl(u_int ht) {
2044 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
2047 static void do_rhash(u_int rs,u_int rh) {
2048 emit_andimm(rs, 0xf8, rh);
2051 static void do_miniht_load(int ht, u_int rh) {
2052 emit_add64(ht, rh, ht);
2053 emit_ldst(0, 0, rh, ht, 0);
2056 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2062 set_jump_target(jaddr, out);
2063 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2064 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2068 // parsed by set_jump_target?
2069 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2070 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2071 emit_movk(return_address&0xffff,rt);
2072 add_to_linker(out,return_address,1);
2074 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2075 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2078 static attr_unused void clear_cache_arm64(char *start, char *end)
2080 // Don't rely on GCC's __clear_cache implementation, as it caches
2081 // icache/dcache cache line sizes, that can vary between cores on
2082 // big.LITTLE architectures.
2083 uint64_t addr, ctr_el0;
2084 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2085 size_t isize, dsize;
2087 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2088 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2089 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2091 // use the global minimum cache line size
2092 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2093 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2095 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2096 not required for instruction to data coherence. */
2097 if ((ctr_el0 & (1 << 28)) == 0x0) {
2098 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2099 for (; addr < (uint64_t)end; addr += dsize)
2100 // use "civac" instead of "cvau", as this is the suggested workaround for
2101 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2102 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2104 __asm__ volatile("dsb ish" : : : "memory");
2106 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2107 Unification is not required for instruction to data coherence. */
2108 if ((ctr_el0 & (1 << 29)) == 0x0) {
2109 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2110 for (; addr < (uint64_t)end; addr += isize)
2111 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2113 __asm__ volatile("dsb ish" : : : "memory");
2116 __asm__ volatile("isb" : : : "memory");
2119 // CPU-architecture-specific initialization
2120 static void arch_init(void)
2122 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2123 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2125 assert(!(diff & 3));
2126 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2127 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2128 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2129 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2131 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2134 // vim:shiftwidth=2:expandtab