1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 void do_memhandler_pre();
27 void do_memhandler_post();
30 static void set_jump_target(void *addr, void *target)
32 u_int *ptr = NDRC_WRITE_OFFSET(addr);
33 intptr_t offset = (u_char *)target - (u_char *)addr;
35 if ((*ptr&0xFC000000) == 0x14000000) { // b
36 assert(offset>=-134217728LL&&offset<134217728LL);
37 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
39 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
40 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
41 // Conditional branch are limited to +/- 1MB
42 // block max size is 256k so branching beyond the +/- 1MB limit
43 // should only happen when jumping to an already compiled block (see add_jump_out)
44 // a workaround would be to do a trampoline jump via a stub at the end of the block
45 assert(-1048576 <= offset && offset < 1048576);
46 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
48 else if((*ptr&0x9f000000)==0x10000000) { // adr
49 // generated by do_miniht_insert
50 assert(offset>=-1048576LL&&offset<1048576LL);
51 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
54 abort(); // should not happen
57 // from a pointer to external jump stub (which was produced by emit_extjump2)
58 // find where the jumping insn is
59 static void *find_extjump_insn(void *stub)
61 int *ptr = (int *)stub + 2;
62 assert((*ptr&0x9f000000) == 0x10000000); // adr
63 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
64 return ptr + offset / 4;
68 // find where external branch is liked to using addr of it's stub:
69 // get address that the stub loads (dyna_linker arg1),
70 // treat it as a pointer to branch insn,
71 // return addr where that branch jumps to
72 static void *get_pointer(void *stub)
74 int *i_ptr = find_extjump_insn(stub);
75 if ((*i_ptr&0xfc000000) == 0x14000000) // b
76 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
77 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
78 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
79 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
85 // Allocate a specific ARM register.
86 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
106 // Alloc cycle count into dedicated register
107 static void alloc_cc(struct regstat *cur,int i)
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
117 static unused const char *regname[32] = {
118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124 static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136 static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141 static void output_w32(u_int word)
143 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
147 static u_int rn_rd(u_int rn, u_int rd)
151 return (rn << 5) | rd;
154 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
159 return (rm << 16) | (rn << 5) | rd;
162 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
165 return rm_rn_rd(rm, rn, rd) | (ra << 10);
168 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
174 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
177 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
180 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
183 static u_int imm16_rd(u_int imm16, u_int rd)
185 assert(imm16 < 0x10000);
187 return (imm16 << 5) | rd;
190 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
192 assert(imm12 < 0x1000);
195 return (imm12 << 10) | (rn << 5) | rd;
198 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
200 assert(imm9 < 0x200);
203 return (imm9 << 12) | (rn << 5) | rd;
206 static u_int imm19_rt(u_int imm19, u_int rt)
208 assert(imm19 < 0x80000);
210 return (imm19 << 5) | rt;
213 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
220 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
223 static u_int genjmp(const u_char *addr)
225 intptr_t offset = addr - out;
226 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
227 if (offset < -134217728 || offset > 134217727) {
228 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
232 return ((u_int)offset >> 2) & 0x03ffffff;
235 static u_int genjmpcc(const u_char *addr)
237 intptr_t offset = addr - out;
238 if ((uintptr_t)addr < 3) return 0;
239 if (offset < -1048576 || offset > 1048572) {
240 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
244 return ((u_int)offset >> 2) & 0x7ffff;
247 static uint32_t is_mask(u_int value)
249 return value && ((value + 1) & value) == 0;
252 // This function returns true if the argument contains a
253 // non-empty sequence of ones (possibly rotated) with the remainder zero.
254 static uint32_t is_rotated_mask(u_int value)
256 if (value == 0 || value == ~0)
258 if (is_mask((value - 1) | value))
260 return is_mask((~value - 1) | ~value);
263 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
265 int lzeros, tzeros, ones;
267 if (is_mask((value - 1) | value)) {
268 lzeros = __builtin_clz(value);
269 tzeros = __builtin_ctz(value);
270 ones = 32 - lzeros - tzeros;
271 *immr = (32 - tzeros) & 31;
276 if (is_mask((value - 1) | value)) {
277 lzeros = __builtin_clz(value);
278 tzeros = __builtin_ctz(value);
279 ones = 32 - lzeros - tzeros;
287 static void emit_mov(u_int rs, u_int rt)
289 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
290 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
293 static void emit_mov64(u_int rs, u_int rt)
295 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
296 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
299 static void emit_add(u_int rs1, u_int rs2, u_int rt)
301 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
302 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
305 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
307 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
308 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
311 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
313 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
314 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
316 #define emit_adds_ptr emit_adds64
318 static void emit_neg(u_int rs, u_int rt)
320 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
321 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
324 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
326 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
327 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
330 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
332 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
333 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
336 static void emit_movz(u_int imm, u_int rt)
338 assem_debug("movz %s,#%#x\n", regname[rt], imm);
339 output_w32(0x52800000 | imm16_rd(imm, rt));
342 static void emit_movz_lsl16(u_int imm, u_int rt)
344 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
345 output_w32(0x52a00000 | imm16_rd(imm, rt));
348 static void emit_movn(u_int imm, u_int rt)
350 assem_debug("movn %s,#%#x\n", regname[rt], imm);
351 output_w32(0x12800000 | imm16_rd(imm, rt));
354 static void emit_movn_lsl16(u_int imm,u_int rt)
356 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
357 output_w32(0x12a00000 | imm16_rd(imm, rt));
360 static void emit_movk(u_int imm,u_int rt)
362 assem_debug("movk %s,#%#x\n", regname[rt], imm);
363 output_w32(0x72800000 | imm16_rd(imm, rt));
366 static void emit_movk_lsl16(u_int imm,u_int rt)
369 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
370 output_w32(0x72a00000 | imm16_rd(imm, rt));
373 static void emit_zeroreg(u_int rt)
378 static void emit_movimm(u_int imm, u_int rt)
382 else if ((~imm) < 65536)
384 else if ((imm&0xffff) == 0)
385 emit_movz_lsl16(imm >> 16, rt);
386 else if (((~imm)&0xffff) == 0)
387 emit_movn_lsl16(~imm >> 16, rt);
388 else if (is_rotated_mask(imm)) {
390 gen_logical_imm(imm, &immr, &imms);
391 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
392 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
395 emit_movz(imm & 0xffff, rt);
396 emit_movk_lsl16(imm >> 16, rt);
400 static void emit_movimm64(uint64_t imm, u_int rt)
402 u_int shift, op, imm16, insns = 0;
403 for (shift = 0; shift < 4; shift++) {
404 imm16 = (imm >> shift * 16) & 0xffff;
407 op = insns ? 0xf2800000 : 0xd2800000;
408 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
410 assem_debug(",lsl #%u", shift * 16);
412 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
416 assem_debug("movz %s,#0\n", regname64[rt]);
417 output_w32(0xd2800000 | imm16_rd(0, rt));
421 static void emit_readword(void *addr, u_int rt)
423 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
424 if (!(offset & 3) && offset <= 16380) {
425 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
426 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
432 static void emit_readdword(void *addr, u_int rt)
434 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
435 if (!(offset & 7) && offset <= 32760) {
436 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
437 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
442 #define emit_readptr emit_readdword
444 static void emit_readshword(void *addr, u_int rt)
446 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
447 if (!(offset & 1) && offset <= 8190) {
448 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
449 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
455 static void emit_loadreg(u_int r, u_int hr)
463 //case HIREG: addr = &hi; break;
464 //case LOREG: addr = &lo; break;
465 case CCREG: addr = &cycle_count; break;
466 case CSREG: addr = &psxRegs.CP0.n.Status; break;
467 case INVCP: addr = &invc_ptr; is64 = 1; break;
468 case ROREG: addr = &ram_offset; is64 = 1; break;
471 addr = &psxRegs.GPR.r[r];
475 emit_readdword(addr, hr);
477 emit_readword(addr, hr);
481 static void emit_writeword(u_int rt, void *addr)
483 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
484 if (!(offset & 3) && offset <= 16380) {
485 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
486 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
492 static void emit_writedword(u_int rt, void *addr)
494 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
495 if (!(offset & 7) && offset <= 32760) {
496 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
497 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
503 static void emit_storereg(u_int r, u_int hr)
506 void *addr = &psxRegs.GPR.r[r];
508 //case HIREG: addr = &hi; break;
509 //case LOREG: addr = &lo; break;
510 case CCREG: addr = &cycle_count; break;
511 default: assert(r < 34); break;
513 emit_writeword(hr, addr);
516 static void emit_test(u_int rs, u_int rt)
518 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
519 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
522 static void emit_testimm(u_int rs, u_int imm)
525 assem_debug("tst %s,#%#x\n", regname[rs], imm);
526 assert(is_rotated_mask(imm)); // good enough for PCSX
527 gen_logical_imm(imm, &immr, &imms);
528 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
531 static void emit_not(u_int rs,u_int rt)
533 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
534 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
537 static void emit_and(u_int rs1,u_int rs2,u_int rt)
539 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
540 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
543 static void emit_or(u_int rs1,u_int rs2,u_int rt)
545 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
546 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
549 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
551 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
552 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
555 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
557 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
558 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
561 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
563 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
564 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
567 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
569 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
570 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
573 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
575 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
576 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
579 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
581 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
582 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
585 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
587 unused const char *st = s ? "s" : "";
588 s = s ? 0x20000000 : 0;
589 is64 = is64 ? 0x80000000 : 0;
591 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
592 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
594 else if (-imm < 4096) {
595 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
596 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
598 else if (imm < 16777216) {
599 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
600 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
603 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
606 else if (-imm < 16777216) {
607 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
608 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
609 if ((imm & 0xfff) || s) {
610 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
611 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
618 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
624 emit_addimm_s(0, 0, rs, imm, rt);
627 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
629 emit_addimm_s(0, 1, rs, imm, rt);
632 static void emit_addimm_and_set_flags(int imm, u_int rt)
634 emit_addimm_s(1, 0, rt, imm, rt);
637 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
639 const char *names[] = { "and", "orr", "eor", "ands" };
640 const char *name = names[op];
643 if (is_rotated_mask(imm)) {
644 gen_logical_imm(imm, &immr, &imms);
645 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
646 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
649 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
650 host_tempreg_acquire();
651 emit_movimm(imm, HOST_TEMPREG);
652 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
653 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_release();
660 static void emit_andimm(u_int rs, u_int imm, u_int rt)
665 emit_logicop_imm(0, rs, imm, rt);
668 static void emit_orimm(u_int rs, u_int imm, u_int rt)
675 emit_logicop_imm(1, rs, imm, rt);
678 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
685 emit_logicop_imm(2, rs, imm, rt);
688 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
690 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
694 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
696 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
700 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
702 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
706 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
712 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
714 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
718 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
720 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
724 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
726 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
727 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
730 static void emit_signextend16(u_int rs, u_int rt)
732 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
733 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
736 static void emit_shl(u_int rs,u_int rshift,u_int rt)
738 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
742 static void emit_shr(u_int rs,u_int rshift,u_int rt)
744 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
748 static void emit_sar(u_int rs,u_int rshift,u_int rt)
750 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
751 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
754 static void emit_cmpimm(u_int rs, u_int imm)
757 assem_debug("cmp %s,%#x\n", regname[rs], imm);
758 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
760 else if (-imm < 4096) {
761 assem_debug("cmn %s,%#x\n", regname[rs], imm);
762 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
764 else if (imm < 16777216 && !(imm & 0xfff)) {
765 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
766 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
769 host_tempreg_acquire();
770 emit_movimm(imm, HOST_TEMPREG);
771 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
772 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
773 host_tempreg_release();
777 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
779 assert(imm == 0 || imm == 1);
780 assert(cond0 < 0x10);
781 assert(cond1 < 0x10);
783 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
784 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
786 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
787 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
791 static void emit_cmovne_imm(u_int imm,u_int rt)
793 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
796 static void emit_cmovl_imm(u_int imm,u_int rt)
798 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
801 static void emit_cmovb_imm(int imm,u_int rt)
803 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
806 static void emit_cmoveq_reg(u_int rs,u_int rt)
808 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
812 static void emit_cmovne_reg(u_int rs,u_int rt)
814 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
818 static void emit_cmovl_reg(u_int rs,u_int rt)
820 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
824 static void emit_cmovb_reg(u_int rs,u_int rt)
826 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
830 static void emit_cmovs_reg(u_int rs,u_int rt)
832 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
833 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
836 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
838 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
839 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
842 static void emit_slti32(u_int rs,int imm,u_int rt)
844 if(rs!=rt) emit_zeroreg(rt);
846 if(rs==rt) emit_movimm(0,rt);
847 emit_cmovl_imm(1,rt);
850 static void emit_sltiu32(u_int rs,int imm,u_int rt)
852 if(rs!=rt) emit_zeroreg(rt);
854 if(rs==rt) emit_movimm(0,rt);
855 emit_cmovb_imm(1,rt);
858 static void emit_cmp(u_int rs,u_int rt)
860 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
861 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
864 static void emit_cmpcs(u_int rs,u_int rt)
866 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
867 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
870 static void emit_set_gz32(u_int rs, u_int rt)
872 //assem_debug("set_gz32\n");
875 emit_cmovl_imm(0,rt);
878 static void emit_set_nz32(u_int rs, u_int rt)
880 //assem_debug("set_nz32\n");
881 if(rs!=rt) emit_mov(rs,rt);
883 emit_cmovne_imm(1,rt);
886 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
888 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
889 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
891 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
892 emit_cmovl_imm(1,rt);
895 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
897 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
898 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
900 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
901 emit_cmovb_imm(1,rt);
904 static int can_jump_or_call(const void *a)
906 intptr_t diff = (u_char *)a - out;
907 return (-134217728 <= diff && diff <= 134217727);
910 static void emit_call(const void *a)
912 intptr_t diff = (u_char *)a - out;
913 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
915 if (-134217728 <= diff && diff <= 134217727)
916 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
921 static void emit_jmp(const void *a)
923 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
924 u_int offset = genjmp(a);
925 output_w32(0x14000000 | offset);
928 static void emit_jne(const void *a)
930 assem_debug("bne %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_NE);
935 static void emit_jeq(const void *a)
937 assem_debug("beq %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_EQ);
942 static void emit_js(const void *a)
944 assem_debug("bmi %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_MI);
949 static void emit_jns(const void *a)
951 assem_debug("bpl %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_PL);
956 static void emit_jl(const void *a)
958 assem_debug("blt %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_LT);
963 static void emit_jge(const void *a)
965 assem_debug("bge %p\n", a);
966 u_int offset = genjmpcc(a);
967 output_w32(0x54000000 | (offset << 5) | COND_GE);
970 static void emit_jno(const void *a)
972 assem_debug("bvc %p\n", a);
973 u_int offset = genjmpcc(a);
974 output_w32(0x54000000 | (offset << 5) | COND_VC);
977 static void emit_jc(const void *a)
979 assem_debug("bcs %p\n", a);
980 u_int offset = genjmpcc(a);
981 output_w32(0x54000000 | (offset << 5) | COND_CS);
984 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
986 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
987 u_int offset = genjmpcc(a);
988 is64 = is64 ? 0x80000000 : 0;
989 isnz = isnz ? 0x01000000 : 0;
990 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
993 static void *emit_cbz(u_int r, const void *a)
1000 static void emit_jmpreg(u_int r)
1002 assem_debug("br %s\n", regname64[r]);
1003 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1006 static void emit_retreg(u_int r)
1008 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1009 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1012 static void emit_ret(void)
1017 static void emit_adr(void *addr, u_int rt)
1019 intptr_t offset = (u_char *)addr - out;
1020 assert(-1048576 <= offset && offset < 1048576);
1022 assem_debug("adr x%d,#%#lx\n", rt, offset);
1023 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1026 static void emit_adrp(void *addr, u_int rt)
1028 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1029 assert(-4294967296l <= offset && offset < 4294967296l);
1032 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1033 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1036 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1038 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1039 assert(-256 <= offset && offset < 256);
1040 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1043 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1045 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1049 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1051 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1052 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1055 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1057 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1058 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1061 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1063 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1064 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1066 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1068 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1071 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1074 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1076 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1077 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1080 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1082 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1083 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1086 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1088 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1089 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1092 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1094 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1095 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1098 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1100 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1105 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1107 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1108 assert(-256 <= offset && offset < 256);
1109 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1112 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1114 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1115 assert(-256 <= offset && offset < 256);
1116 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1119 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1121 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1122 assert(-256 <= offset && offset < 256);
1123 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1126 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1128 if (!(offset & 3) && (u_int)offset <= 16380) {
1129 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1130 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1132 else if (-256 <= offset && offset < 256) {
1133 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1134 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1142 if (!(offset & 1) && (u_int)offset <= 8190) {
1143 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1146 else if (-256 <= offset && offset < 256) {
1147 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1148 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1154 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1156 if ((u_int)offset < 4096) {
1157 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1158 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1160 else if (-256 <= offset && offset < 256) {
1161 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1162 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1168 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1170 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1174 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1176 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1180 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1182 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1183 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1186 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1188 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1192 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1194 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1198 static void emit_clz(u_int rs, u_int rt)
1200 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1201 output_w32(0x5ac01000 | rn_rd(rs, rt));
1204 // special case for checking invalid_code
1205 static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1207 emit_shrimm(r, 12, rt);
1208 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1209 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1212 // special for loadlr_assemble, rs2 is destroyed
1213 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1215 emit_shl(rs2, shift, rs2);
1216 emit_bic(rs1, rs2, rt);
1219 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1221 emit_shr(rs2, shift, rs2);
1222 emit_bic(rs1, rs2, rt);
1225 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1227 u_int op = 0xb9000000;
1228 unused const char *ldst = is_st ? "st" : "ld";
1229 unused char rp = is64 ? 'x' : 'w';
1230 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1231 is64 = is64 ? 1 : 0;
1232 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1233 ofs = (ofs >> (2+is64));
1234 if (!is_st) op |= 0x00400000;
1235 if (is64) op |= 0x40000000;
1236 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1239 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1241 u_int op = 0x29000000;
1242 unused const char *ldst = is_st ? "st" : "ld";
1243 unused char rp = is64 ? 'x' : 'w';
1244 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1245 is64 = is64 ? 1 : 0;
1246 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1247 ofs = (ofs >> (2+is64));
1248 assert(-64 <= ofs && ofs <= 63);
1250 if (!is_st) op |= 0x00400000;
1251 if (is64) op |= 0x80000000;
1252 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1255 static void save_load_regs_all(int is_store, u_int reglist)
1259 for (r = 0; reglist; r++, reglist >>= 1) {
1263 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1269 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1272 assert(ofs <= SSP_CALLER_REGS);
1275 // Save registers before function call
1276 static void save_regs(u_int reglist)
1278 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1279 save_load_regs_all(1, reglist);
1282 // Restore registers after function call
1283 static void restore_regs(u_int reglist)
1285 reglist &= CALLER_SAVE_REGS;
1286 save_load_regs_all(0, reglist);
1289 /* Stubs/epilogue */
1291 static void literal_pool(int n)
1296 static void literal_pool_jumpover(int n)
1300 // parsed by get_pointer, find_extjump_insn
1301 static void emit_extjump(u_char *addr, u_int target)
1303 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1305 emit_movz(target & 0xffff, 0);
1306 emit_movk_lsl16(target >> 16, 0);
1308 // addr is in the current recompiled block (max 256k)
1309 // offset shouldn't exceed +/-1MB
1311 emit_far_jump(dyna_linker);
1314 static void check_extjump2(void *src)
1317 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1321 // put rt_val into rt, potentially making use of rs with value rs_val
1322 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1324 int diff = rt_val - rs_val;
1325 if ((-4096 < diff && diff < 4096)
1326 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1327 emit_addimm(rs, diff, rt);
1328 else if (rt_val == ~rs_val)
1330 else if (is_rotated_mask(rs_val ^ rt_val))
1331 emit_xorimm(rs, rs_val ^ rt_val, rt);
1333 emit_movimm(rt_val, rt);
1336 // return 1 if the above function can do it's job cheaply
1337 static int is_similar_value(u_int v1, u_int v2)
1340 return (-4096 < diff && diff < 4096)
1341 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1343 || is_rotated_mask(v1 ^ v2);
1346 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1348 if (rt_val < 0x100000000ull) {
1349 emit_movimm_from(rs_val, rs, rt_val, rt);
1352 // just move the whole thing. At least on Linux all addresses
1353 // seem to be 48bit, so 3 insns - not great not terrible
1354 emit_movimm64(rt_val, rt);
1358 static void pass_args64(u_int a0, u_int a1)
1362 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1364 else if(a0!=0&&a1==0) {
1366 if (a0>=0) emit_mov64(a0,0);
1369 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1370 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1374 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1377 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1379 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1380 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1382 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1384 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1389 #include "pcsxmem.h"
1390 //#include "pcsxmem_inline.c"
1392 static void do_readstub(int n)
1394 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1395 set_jump_target(stubs[n].addr, out);
1396 enum stub_type type = stubs[n].type;
1398 int rs = stubs[n].b;
1399 const struct regstat *i_regs = (void *)stubs[n].c;
1400 u_int reglist = stubs[n].e;
1401 const signed char *i_regmap = i_regs->regmap;
1403 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1404 rt=get_reg(i_regmap,FTEMP);
1406 rt=get_reg(i_regmap,dops[i].rt1);
1409 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1410 void *restore_jump = NULL, *handler_jump = NULL;
1412 for (r = 0; r < HOST_CCREG; r++) {
1413 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1418 if(rt>=0&&dops[i].rt1!=0)
1425 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1427 emit_readdword(&mem_rtab,temp);
1428 emit_shrimm(rs,12,temp2);
1429 emit_readdword_dualindexedx8(temp,temp2,temp2);
1430 emit_adds64(temp2,temp2,temp2);
1433 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1435 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1436 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1437 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1438 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1439 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1445 emit_jmp(0); // jump to reg restore
1448 emit_jmp(stubs[n].retaddr); // return address
1449 set_jump_target(handler_jump, out);
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1461 pass_args64(rs,temp2);
1462 int cc=get_reg(i_regmap,CCREG);
1464 emit_loadreg(CCREG,2);
1465 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1466 emit_far_call(handler);
1467 // (no cycle reload after read)
1468 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1469 loadstore_extend(type,0,rt);
1472 set_jump_target(restore_jump, out);
1473 restore_regs(reglist);
1474 emit_jmp(stubs[n].retaddr);
1477 static void inline_readstub(enum stub_type type, int i, u_int addr,
1478 const signed char regmap[], int target, int adj, u_int reglist)
1480 int rs=get_reg(regmap,target);
1481 int rt=get_reg(regmap,target);
1482 if(rs<0) rs=get_reg_temp(regmap);
1485 uintptr_t host_addr = 0;
1487 int cc=get_reg(regmap,CCREG);
1488 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1490 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1491 if (handler == NULL) {
1492 if(rt<0||dops[i].rt1==0)
1494 if (addr != host_addr)
1495 emit_movimm_from64(addr, rs, host_addr, rs);
1497 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1498 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1499 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1500 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1501 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1506 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1508 if(type==LOADB_STUB||type==LOADBU_STUB)
1509 handler=jump_handler_read8;
1510 if(type==LOADH_STUB||type==LOADHU_STUB)
1511 handler=jump_handler_read16;
1512 if(type==LOADW_STUB)
1513 handler=jump_handler_read32;
1516 // call a memhandler
1517 if(rt>=0&&dops[i].rt1!=0)
1521 emit_movimm(addr,0);
1525 emit_loadreg(CCREG,2);
1526 emit_addimm(cc<0?2:cc,adj,2);
1528 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1529 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1530 if (-4294967296l <= offset && offset < 4294967296l) {
1531 emit_adrp((void *)l1, 1);
1532 emit_addimm64(1, l1 & 0xfff, 1);
1535 emit_movimm64(l1, 1);
1538 emit_far_call(do_memhandler_pre);
1540 emit_far_call(handler);
1542 // (no cycle reload after read)
1543 if(rt>=0&&dops[i].rt1!=0)
1544 loadstore_extend(type, 0, rt);
1545 restore_regs(reglist);
1548 static void do_writestub(int n)
1550 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1551 set_jump_target(stubs[n].addr, out);
1552 enum stub_type type=stubs[n].type;
1555 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1556 u_int reglist=stubs[n].e;
1557 signed char *i_regmap=i_regs->regmap;
1559 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1560 rt=get_reg(i_regmap,r=FTEMP);
1562 rt=get_reg(i_regmap,r=dops[i].rs2);
1566 int rtmp,temp=-1,temp2,regs_saved=0;
1567 void *restore_jump = NULL, *handler_jump = NULL;
1568 int reglist2=reglist|(1<<rs)|(1<<rt);
1569 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1570 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1578 for(rtmp=0;rtmp<=3;rtmp++)
1579 if(rtmp!=rs&&rtmp!=rt)
1582 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1585 host_tempreg_acquire();
1588 emit_readdword(&mem_wtab,temp);
1589 emit_shrimm(rs,12,temp2);
1590 emit_readdword_dualindexedx8(temp,temp2,temp2);
1591 emit_adds64(temp2,temp2,temp2);
1595 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1596 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1597 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1602 emit_jmp(0); // jump to reg restore
1605 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1606 set_jump_target(handler_jump, out);
1612 case STOREB_STUB: handler=jump_handler_write8; break;
1613 case STOREH_STUB: handler=jump_handler_write16; break;
1614 case STOREW_STUB: handler=jump_handler_write32; break;
1620 emit_mov64(temp2,3);
1621 host_tempreg_release();
1623 int cc=get_reg(i_regmap,CCREG);
1625 emit_loadreg(CCREG,2);
1626 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1627 // returns new cycle_count
1628 emit_far_call(handler);
1629 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1631 emit_storereg(CCREG,2);
1633 set_jump_target(restore_jump, out);
1634 restore_regs(reglist);
1635 emit_jmp(stubs[n].retaddr);
1638 static void inline_writestub(enum stub_type type, int i, u_int addr,
1639 const signed char regmap[], int target, int adj, u_int reglist)
1641 int rs = get_reg_temp(regmap);
1642 int rt = get_reg(regmap,target);
1645 uintptr_t host_addr = 0;
1646 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1647 if (handler == NULL) {
1648 if (addr != host_addr)
1649 emit_movimm_from64(addr, rs, host_addr, rs);
1651 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1652 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1653 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1659 // call a memhandler
1661 emit_writeword(rs, &address); // some handlers still need it
1662 loadstore_extend(type, rt, 0);
1664 cc = cc_use = get_reg(regmap, CCREG);
1666 emit_loadreg(CCREG, (cc_use = 2));
1667 emit_addimm(cc_use, adj, 2);
1669 emit_far_call(do_memhandler_pre);
1670 emit_far_call(handler);
1671 emit_far_call(do_memhandler_post);
1672 emit_addimm(0, -adj, cc_use);
1674 emit_storereg(CCREG, cc_use);
1675 restore_regs(reglist);
1680 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1682 save_load_regs_all(1, reglist);
1683 cop2_do_stall_check(op, i, i_regs, 0);
1686 emit_far_call(pcnt_gte_start);
1688 // pointer to cop2 regs
1689 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1692 static void c2op_epilogue(u_int op,u_int reglist)
1696 emit_far_call(pcnt_gte_end);
1698 save_load_regs_all(0, reglist);
1701 static void c2op_assemble(int i, const struct regstat *i_regs)
1703 u_int c2op=source[i]&0x3f;
1704 u_int hr,reglist_full=0,reglist;
1705 int need_flags,need_ir;
1706 for(hr=0;hr<HOST_REGS;hr++) {
1707 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1709 reglist=reglist_full&CALLER_SAVE_REGS;
1711 if (gte_handlers[c2op]!=NULL) {
1712 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1713 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1714 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1715 source[i],gte_unneeded[i+1],need_flags,need_ir);
1716 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1718 //int shift = (source[i] >> 19) & 1;
1719 //int lm = (source[i] >> 10) & 1;
1723 c2op_prologue(c2op, i, i_regs, reglist);
1724 emit_movimm(source[i],1); // opcode
1725 emit_writeword(1,&psxRegs.code);
1726 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1729 c2op_epilogue(c2op,reglist);
1733 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1735 //value = value & 0x7ffff000;
1736 //if (value & 0x7f87e000) value |= 0x80000000;
1737 emit_andimm(sl, 0x7fffe000, temp);
1738 emit_testimm(temp, 0xff87ffff);
1739 emit_andimm(sl, 0x7ffff000, temp);
1740 host_tempreg_acquire();
1741 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1742 emit_cmovne_reg(HOST_TEMPREG, temp);
1743 host_tempreg_release();
1744 assert(0); // testing needed
1747 static void do_mfc2_31_one(u_int copr,signed char temp)
1749 emit_readshword(®_cop2d[copr],temp);
1750 emit_bicsar_imm(temp,31,temp);
1751 emit_cmpimm(temp,0xf80);
1752 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1753 emit_andimm(temp,0xf80,temp);
1756 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1759 host_tempreg_acquire();
1760 temp = HOST_TEMPREG;
1762 do_mfc2_31_one(9,temp);
1763 emit_shrimm(temp,7,tl);
1764 do_mfc2_31_one(10,temp);
1765 emit_orrshr_imm(temp,2,tl);
1766 do_mfc2_31_one(11,temp);
1767 emit_orrshl_imm(temp,3,tl);
1768 emit_writeword(tl,®_cop2d[29]);
1770 if (temp == HOST_TEMPREG)
1771 host_tempreg_release();
1774 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1780 if(dops[i].rs1&&dops[i].rs2)
1782 switch(dops[i].opcode2)
1787 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1788 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1789 signed char hi=get_reg(i_regs->regmap,HIREG);
1790 signed char lo=get_reg(i_regs->regmap,LOREG);
1796 if(dops[i].opcode2==0x18) // MULT
1797 emit_smull(m1,m2,hi);
1799 emit_umull(m1,m2,hi);
1802 emit_shrimm64(hi,32,hi);
1808 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1809 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1810 signed char quotient=get_reg(i_regs->regmap,LOREG);
1811 signed char remainder=get_reg(i_regs->regmap,HIREG);
1812 assert(numerator>=0);
1813 assert(denominator>=0);
1814 assert(quotient>=0);
1815 assert(remainder>=0);
1817 if (dops[i].opcode2 == 0x1A) // DIV
1818 emit_sdiv(numerator,denominator,quotient);
1820 emit_udiv(numerator,denominator,quotient);
1821 emit_msub(quotient,denominator,numerator,remainder);
1823 // div 0 quotient (remainder is already correct)
1824 host_tempreg_acquire();
1825 if (dops[i].opcode2 == 0x1A) // DIV
1826 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1828 emit_movimm(~0,HOST_TEMPREG);
1829 emit_test(denominator,denominator);
1830 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1831 host_tempreg_release();
1840 signed char hr=get_reg(i_regs->regmap,HIREG);
1841 signed char lr=get_reg(i_regs->regmap,LOREG);
1842 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1845 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1846 assert(numerator >= 0);
1848 emit_mov(numerator,hr);
1850 if (dops[i].opcode2 == 0x1A) // DIV
1851 emit_sub_asrimm(0,numerator,31,lr);
1857 if (hr >= 0) emit_zeroreg(hr);
1858 if (lr >= 0) emit_movimm(~0,lr);
1863 // Multiply by zero is zero.
1864 if (hr >= 0) emit_zeroreg(hr);
1865 if (lr >= 0) emit_zeroreg(lr);
1869 #define multdiv_assemble multdiv_assemble_arm64
1871 static void do_jump_vaddr(u_int rs)
1875 emit_far_call(ndrc_get_addr_ht);
1879 static void do_preload_rhash(u_int r) {
1880 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1881 // register. On ARM the hash can be done with a single instruction (below)
1884 static void do_preload_rhtbl(u_int ht) {
1885 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1888 static void do_rhash(u_int rs,u_int rh) {
1889 emit_andimm(rs, 0xf8, rh);
1892 static void do_miniht_load(int ht, u_int rh) {
1893 emit_add64(ht, rh, ht);
1894 emit_ldst(0, 0, rh, ht, 0);
1897 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1903 set_jump_target(jaddr, out);
1904 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1905 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1909 // parsed by set_jump_target?
1910 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1911 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1912 emit_movk(return_address&0xffff,rt);
1913 add_to_linker(out,return_address,1);
1915 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1916 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1919 static unused void clear_cache_arm64(char *start, char *end)
1921 // Don't rely on GCC's __clear_cache implementation, as it caches
1922 // icache/dcache cache line sizes, that can vary between cores on
1923 // big.LITTLE architectures.
1924 uint64_t addr, ctr_el0;
1925 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1926 size_t isize, dsize;
1928 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1929 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1930 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1932 // use the global minimum cache line size
1933 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1934 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1936 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1937 not required for instruction to data coherence. */
1938 if ((ctr_el0 & (1 << 28)) == 0x0) {
1939 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1940 for (; addr < (uint64_t)end; addr += dsize)
1941 // use "civac" instead of "cvau", as this is the suggested workaround for
1942 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1943 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1945 __asm__ volatile("dsb ish" : : : "memory");
1947 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1948 Unification is not required for instruction to data coherence. */
1949 if ((ctr_el0 & (1 << 29)) == 0x0) {
1950 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1951 for (; addr < (uint64_t)end; addr += isize)
1952 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1954 __asm__ volatile("dsb ish" : : : "memory");
1957 __asm__ volatile("isb" : : : "memory");
1960 // CPU-architecture-specific initialization
1961 static void arch_init(void)
1963 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1964 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
1966 assert(!(diff & 3));
1967 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1968 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1969 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1970 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1972 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1975 // vim:shiftwidth=2:expandtab