1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define unused __attribute__((unused))
28 void do_memhandler_pre();
29 void do_memhandler_post();
32 static void set_jump_target(void *addr, void *target)
34 u_int *ptr = NDRC_WRITE_OFFSET(addr);
35 intptr_t offset = (u_char *)target - (u_char *)addr;
37 if ((*ptr&0xFC000000) == 0x14000000) { // b
38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
45 // should only happen when jumping to an already compiled block (see add_jump_out)
46 // a workaround would be to do a trampoline jump via a stub at the end of the block
47 assert(-1048576 <= offset && offset < 1048576);
48 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
50 else if((*ptr&0x9f000000)==0x10000000) { // adr
51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 abort(); // should not happen
59 // from a pointer to external jump stub (which was produced by emit_extjump2)
60 // find where the jumping insn is
61 static void *find_extjump_insn(void *stub)
63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
70 // find where external branch is liked to using addr of it's stub:
71 // get address that the stub loads (dyna_linker arg1),
72 // treat it as a pointer to branch insn,
73 // return addr where that branch jumps to
74 static void *get_pointer(void *stub)
76 int *i_ptr = find_extjump_insn(stub);
77 if ((*i_ptr&0xfc000000) == 0x14000000) // b
78 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
79 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
80 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
81 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
87 // Allocate a specific ARM register.
88 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
108 // Alloc cycle count into dedicated register
109 static void alloc_cc(struct regstat *cur,int i)
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
119 static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
126 static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
138 static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
143 static void output_w32(u_int word)
145 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
149 static u_int rn_rd(u_int rn, u_int rd)
153 return (rn << 5) | rd;
156 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
161 return (rm << 16) | (rn << 5) | rd;
164 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
167 return rm_rn_rd(rm, rn, rd) | (ra << 10);
170 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
176 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
179 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
182 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
185 static u_int imm16_rd(u_int imm16, u_int rd)
187 assert(imm16 < 0x10000);
189 return (imm16 << 5) | rd;
192 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
194 assert(imm12 < 0x1000);
197 return (imm12 << 10) | (rn << 5) | rd;
200 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
202 assert(imm9 < 0x200);
205 return (imm9 << 12) | (rn << 5) | rd;
208 static u_int imm19_rt(u_int imm19, u_int rt)
210 assert(imm19 < 0x80000);
212 return (imm19 << 5) | rt;
215 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
222 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
225 static u_int genjmp(const u_char *addr)
227 intptr_t offset = addr - out;
228 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
229 if (offset < -134217728 || offset > 134217727) {
230 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
234 return ((u_int)offset >> 2) & 0x03ffffff;
237 static u_int genjmpcc(const u_char *addr)
239 intptr_t offset = addr - out;
240 if ((uintptr_t)addr < 3) return 0;
241 if (offset < -1048576 || offset > 1048572) {
242 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 return ((u_int)offset >> 2) & 0x7ffff;
249 static uint32_t is_mask(u_int value)
251 return value && ((value + 1) & value) == 0;
254 // This function returns true if the argument contains a
255 // non-empty sequence of ones (possibly rotated) with the remainder zero.
256 static uint32_t is_rotated_mask(u_int value)
258 if (value == 0 || value == ~0)
260 if (is_mask((value - 1) | value))
262 return is_mask((~value - 1) | ~value);
265 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
267 int lzeros, tzeros, ones;
269 if (is_mask((value - 1) | value)) {
270 lzeros = __builtin_clz(value);
271 tzeros = __builtin_ctz(value);
272 ones = 32 - lzeros - tzeros;
273 *immr = (32 - tzeros) & 31;
278 if (is_mask((value - 1) | value)) {
279 lzeros = __builtin_clz(value);
280 tzeros = __builtin_ctz(value);
281 ones = 32 - lzeros - tzeros;
289 static void emit_mov(u_int rs, u_int rt)
291 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
292 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
295 static void emit_mov64(u_int rs, u_int rt)
297 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
298 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
301 static void emit_add(u_int rs1, u_int rs2, u_int rt)
303 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
304 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
307 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
309 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
310 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
313 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
315 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
316 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
318 #define emit_adds_ptr emit_adds64
320 static void emit_neg(u_int rs, u_int rt)
322 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
323 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
326 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
328 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
329 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
332 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
334 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
335 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
338 static void emit_movz(u_int imm, u_int rt)
340 assem_debug("movz %s,#%#x\n", regname[rt], imm);
341 output_w32(0x52800000 | imm16_rd(imm, rt));
344 static void emit_movz_lsl16(u_int imm, u_int rt)
346 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
347 output_w32(0x52a00000 | imm16_rd(imm, rt));
350 static void emit_movn(u_int imm, u_int rt)
352 assem_debug("movn %s,#%#x\n", regname[rt], imm);
353 output_w32(0x12800000 | imm16_rd(imm, rt));
356 static void emit_movn_lsl16(u_int imm,u_int rt)
358 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
359 output_w32(0x12a00000 | imm16_rd(imm, rt));
362 static void emit_movk(u_int imm,u_int rt)
364 assem_debug("movk %s,#%#x\n", regname[rt], imm);
365 output_w32(0x72800000 | imm16_rd(imm, rt));
368 static void emit_movk_lsl16(u_int imm,u_int rt)
371 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x72a00000 | imm16_rd(imm, rt));
375 static void emit_zeroreg(u_int rt)
380 static void emit_movimm(u_int imm, u_int rt)
384 else if ((~imm) < 65536)
386 else if ((imm&0xffff) == 0)
387 emit_movz_lsl16(imm >> 16, rt);
388 else if (((~imm)&0xffff) == 0)
389 emit_movn_lsl16(~imm >> 16, rt);
390 else if (is_rotated_mask(imm)) {
392 gen_logical_imm(imm, &immr, &imms);
393 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
394 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
397 emit_movz(imm & 0xffff, rt);
398 emit_movk_lsl16(imm >> 16, rt);
402 static void emit_movimm64(uint64_t imm, u_int rt)
404 u_int shift, op, imm16, insns = 0;
405 for (shift = 0; shift < 4; shift++) {
406 imm16 = (imm >> shift * 16) & 0xffff;
409 op = insns ? 0xf2800000 : 0xd2800000;
410 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
412 assem_debug(",lsl #%u", shift * 16);
414 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
418 assem_debug("movz %s,#0\n", regname64[rt]);
419 output_w32(0xd2800000 | imm16_rd(0, rt));
423 static void emit_readword(void *addr, u_int rt)
425 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
426 if (!(offset & 3) && offset <= 16380) {
427 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
428 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
434 static void emit_readdword(void *addr, u_int rt)
436 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
437 if (!(offset & 7) && offset <= 32760) {
438 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
439 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
444 #define emit_readptr emit_readdword
446 static void emit_readshword(void *addr, u_int rt)
448 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
449 if (!(offset & 1) && offset <= 8190) {
450 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
451 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
457 static void emit_loadreg(u_int r, u_int hr)
465 //case HIREG: addr = &hi; break;
466 //case LOREG: addr = &lo; break;
467 case CCREG: addr = &cycle_count; break;
468 case CSREG: addr = &Status; break;
469 case INVCP: addr = &invc_ptr; is64 = 1; break;
470 case ROREG: addr = &ram_offset; is64 = 1; break;
473 addr = &psxRegs.GPR.r[r];
477 emit_readdword(addr, hr);
479 emit_readword(addr, hr);
483 static void emit_writeword(u_int rt, void *addr)
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 3) && offset <= 16380) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
488 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
494 static void emit_writedword(u_int rt, void *addr)
496 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
497 if (!(offset & 7) && offset <= 32760) {
498 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
499 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
505 static void emit_storereg(u_int r, u_int hr)
508 void *addr = &psxRegs.GPR.r[r];
510 //case HIREG: addr = &hi; break;
511 //case LOREG: addr = &lo; break;
512 case CCREG: addr = &cycle_count; break;
513 default: assert(r < 34); break;
515 emit_writeword(hr, addr);
518 static void emit_test(u_int rs, u_int rt)
520 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
521 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
524 static void emit_testimm(u_int rs, u_int imm)
527 assem_debug("tst %s,#%#x\n", regname[rs], imm);
528 assert(is_rotated_mask(imm)); // good enough for PCSX
529 gen_logical_imm(imm, &immr, &imms);
530 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
533 static void emit_not(u_int rs,u_int rt)
535 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
536 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
539 static void emit_and(u_int rs1,u_int rs2,u_int rt)
541 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
542 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
545 static void emit_or(u_int rs1,u_int rs2,u_int rt)
547 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
548 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
551 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
553 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
554 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
557 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
559 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
560 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
563 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
565 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
566 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
569 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
571 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
572 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
575 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
577 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
578 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
581 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
583 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
584 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
587 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
589 unused const char *st = s ? "s" : "";
590 s = s ? 0x20000000 : 0;
591 is64 = is64 ? 0x80000000 : 0;
593 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
596 else if (-imm < 4096) {
597 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
598 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
600 else if (imm < 16777216) {
601 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
602 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
603 if ((imm & 0xfff) || s) {
604 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
605 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
608 else if (-imm < 16777216) {
609 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
610 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
611 if ((imm & 0xfff) || s) {
612 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
613 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
620 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
622 emit_addimm_s(0, 0, rs, imm, rt);
625 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
627 emit_addimm_s(0, 1, rs, imm, rt);
630 static void emit_addimm_and_set_flags(int imm, u_int rt)
632 emit_addimm_s(1, 0, rt, imm, rt);
635 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
637 const char *names[] = { "and", "orr", "eor", "ands" };
638 const char *name = names[op];
641 if (is_rotated_mask(imm)) {
642 gen_logical_imm(imm, &immr, &imms);
643 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
644 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
647 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
648 host_tempreg_acquire();
649 emit_movimm(imm, HOST_TEMPREG);
650 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
651 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
652 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
653 host_tempreg_release();
658 static void emit_andimm(u_int rs, u_int imm, u_int rt)
663 emit_logicop_imm(0, rs, imm, rt);
666 static void emit_orimm(u_int rs, u_int imm, u_int rt)
673 emit_logicop_imm(1, rs, imm, rt);
676 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
683 emit_logicop_imm(2, rs, imm, rt);
686 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
688 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
692 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
694 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
698 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
700 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
704 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
710 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
712 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
716 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
718 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
719 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
722 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
724 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
725 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
728 static void emit_signextend16(u_int rs, u_int rt)
730 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
731 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
734 static void emit_shl(u_int rs,u_int rshift,u_int rt)
736 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
740 static void emit_shr(u_int rs,u_int rshift,u_int rt)
742 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
746 static void emit_sar(u_int rs,u_int rshift,u_int rt)
748 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
749 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
752 static void emit_cmpimm(u_int rs, u_int imm)
755 assem_debug("cmp %s,%#x\n", regname[rs], imm);
756 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
758 else if (-imm < 4096) {
759 assem_debug("cmn %s,%#x\n", regname[rs], imm);
760 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
762 else if (imm < 16777216 && !(imm & 0xfff)) {
763 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
764 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
767 host_tempreg_acquire();
768 emit_movimm(imm, HOST_TEMPREG);
769 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
770 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
771 host_tempreg_release();
775 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
777 assert(imm == 0 || imm == 1);
778 assert(cond0 < 0x10);
779 assert(cond1 < 0x10);
781 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
782 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
784 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
785 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
789 static void emit_cmovne_imm(u_int imm,u_int rt)
791 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
794 static void emit_cmovl_imm(u_int imm,u_int rt)
796 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
799 static void emit_cmovb_imm(int imm,u_int rt)
801 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
804 static void emit_cmoveq_reg(u_int rs,u_int rt)
806 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
810 static void emit_cmovne_reg(u_int rs,u_int rt)
812 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
816 static void emit_cmovl_reg(u_int rs,u_int rt)
818 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
822 static void emit_cmovb_reg(u_int rs,u_int rt)
824 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
825 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
828 static void emit_cmovs_reg(u_int rs,u_int rt)
830 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
831 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
834 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
836 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
837 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
840 static void emit_slti32(u_int rs,int imm,u_int rt)
842 if(rs!=rt) emit_zeroreg(rt);
844 if(rs==rt) emit_movimm(0,rt);
845 emit_cmovl_imm(1,rt);
848 static void emit_sltiu32(u_int rs,int imm,u_int rt)
850 if(rs!=rt) emit_zeroreg(rt);
852 if(rs==rt) emit_movimm(0,rt);
853 emit_cmovb_imm(1,rt);
856 static void emit_cmp(u_int rs,u_int rt)
858 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
859 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
862 static void emit_cmpcs(u_int rs,u_int rt)
864 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
865 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
868 static void emit_set_gz32(u_int rs, u_int rt)
870 //assem_debug("set_gz32\n");
873 emit_cmovl_imm(0,rt);
876 static void emit_set_nz32(u_int rs, u_int rt)
878 //assem_debug("set_nz32\n");
879 if(rs!=rt) emit_mov(rs,rt);
881 emit_cmovne_imm(1,rt);
884 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
886 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
887 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
889 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
890 emit_cmovl_imm(1,rt);
893 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
895 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
896 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
898 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
899 emit_cmovb_imm(1,rt);
902 static int can_jump_or_call(const void *a)
904 intptr_t diff = (u_char *)a - out;
905 return (-134217728 <= diff && diff <= 134217727);
908 static void emit_call(const void *a)
910 intptr_t diff = (u_char *)a - out;
911 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
913 if (-134217728 <= diff && diff <= 134217727)
914 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
919 static void emit_jmp(const void *a)
921 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
922 u_int offset = genjmp(a);
923 output_w32(0x14000000 | offset);
926 static void emit_jne(const void *a)
928 assem_debug("bne %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_NE);
933 static void emit_jeq(const void *a)
935 assem_debug("beq %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_EQ);
940 static void emit_js(const void *a)
942 assem_debug("bmi %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_MI);
947 static void emit_jns(const void *a)
949 assem_debug("bpl %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_PL);
954 static void emit_jl(const void *a)
956 assem_debug("blt %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_LT);
961 static void emit_jge(const void *a)
963 assem_debug("bge %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_GE);
968 static void emit_jno(const void *a)
970 assem_debug("bvc %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_VC);
975 static void emit_jc(const void *a)
977 assem_debug("bcs %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_CS);
982 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
984 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
985 u_int offset = genjmpcc(a);
986 is64 = is64 ? 0x80000000 : 0;
987 isnz = isnz ? 0x01000000 : 0;
988 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
991 static unused void emit_cbz(const void *a, u_int r)
996 static void emit_jmpreg(u_int r)
998 assem_debug("br %s\n", regname64[r]);
999 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1002 static void emit_retreg(u_int r)
1004 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1005 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1008 static void emit_ret(void)
1013 static void emit_adr(void *addr, u_int rt)
1015 intptr_t offset = (u_char *)addr - out;
1016 assert(-1048576 <= offset && offset < 1048576);
1018 assem_debug("adr x%d,#%#lx\n", rt, offset);
1019 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1022 static void emit_adrp(void *addr, u_int rt)
1024 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1025 assert(-4294967296l <= offset && offset < 4294967296l);
1028 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1029 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1032 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1034 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1035 assert(-256 <= offset && offset < 256);
1036 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1039 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1041 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1045 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1047 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1051 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1053 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1057 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1059 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1062 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1064 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1066 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1067 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1070 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1072 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1073 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1076 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1078 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1079 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1082 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1084 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1085 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1088 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1090 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1091 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1094 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1096 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1097 assert(-256 <= offset && offset < 256);
1098 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1101 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1103 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1104 assert(-256 <= offset && offset < 256);
1105 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1108 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1110 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1111 assert(-256 <= offset && offset < 256);
1112 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1115 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1117 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1118 assert(-256 <= offset && offset < 256);
1119 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1122 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1124 if (!(offset & 3) && (u_int)offset <= 16380) {
1125 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1126 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1128 else if (-256 <= offset && offset < 256) {
1129 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1130 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1136 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1138 if (!(offset & 1) && (u_int)offset <= 8190) {
1139 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1140 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1142 else if (-256 <= offset && offset < 256) {
1143 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1150 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1152 if ((u_int)offset < 4096) {
1153 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1154 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1156 else if (-256 <= offset && offset < 256) {
1157 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1158 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1164 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1166 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1170 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1172 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1176 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1178 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1179 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1182 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1184 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1188 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1190 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1194 static void emit_clz(u_int rs, u_int rt)
1196 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1197 output_w32(0x5ac01000 | rn_rd(rs, rt));
1200 // special case for checking invalid_code
1201 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1203 host_tempreg_acquire();
1204 emit_shrimm(r, 12, HOST_TEMPREG);
1205 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1206 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1207 emit_cmpimm(HOST_TEMPREG, imm);
1208 host_tempreg_release();
1211 // special for loadlr_assemble, rs2 is destroyed
1212 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1214 emit_shl(rs2, shift, rs2);
1215 emit_bic(rs1, rs2, rt);
1218 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1220 emit_shr(rs2, shift, rs2);
1221 emit_bic(rs1, rs2, rt);
1224 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1226 u_int op = 0xb9000000;
1227 unused const char *ldst = is_st ? "st" : "ld";
1228 unused char rp = is64 ? 'x' : 'w';
1229 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1230 is64 = is64 ? 1 : 0;
1231 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1232 ofs = (ofs >> (2+is64));
1233 if (!is_st) op |= 0x00400000;
1234 if (is64) op |= 0x40000000;
1235 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1238 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1240 u_int op = 0x29000000;
1241 unused const char *ldst = is_st ? "st" : "ld";
1242 unused char rp = is64 ? 'x' : 'w';
1243 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1244 is64 = is64 ? 1 : 0;
1245 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1246 ofs = (ofs >> (2+is64));
1247 assert(-64 <= ofs && ofs <= 63);
1249 if (!is_st) op |= 0x00400000;
1250 if (is64) op |= 0x80000000;
1251 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1254 static void save_load_regs_all(int is_store, u_int reglist)
1258 for (r = 0; reglist; r++, reglist >>= 1) {
1262 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1268 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1271 assert(ofs <= SSP_CALLER_REGS);
1274 // Save registers before function call
1275 static void save_regs(u_int reglist)
1277 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1278 save_load_regs_all(1, reglist);
1281 // Restore registers after function call
1282 static void restore_regs(u_int reglist)
1284 reglist &= CALLER_SAVE_REGS;
1285 save_load_regs_all(0, reglist);
1288 /* Stubs/epilogue */
1290 static void literal_pool(int n)
1295 static void literal_pool_jumpover(int n)
1299 // parsed by get_pointer, find_extjump_insn
1300 static void emit_extjump(u_char *addr, u_int target)
1302 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1304 emit_movz(target & 0xffff, 0);
1305 emit_movk_lsl16(target >> 16, 0);
1307 // addr is in the current recompiled block (max 256k)
1308 // offset shouldn't exceed +/-1MB
1310 emit_far_jump(dyna_linker);
1313 static void check_extjump2(void *src)
1316 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1320 // put rt_val into rt, potentially making use of rs with value rs_val
1321 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1323 int diff = rt_val - rs_val;
1324 if ((-4096 < diff && diff < 4096)
1325 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1326 emit_addimm(rs, diff, rt);
1327 else if (rt_val == ~rs_val)
1329 else if (is_rotated_mask(rs_val ^ rt_val))
1330 emit_xorimm(rs, rs_val ^ rt_val, rt);
1332 emit_movimm(rt_val, rt);
1335 // return 1 if the above function can do it's job cheaply
1336 static int is_similar_value(u_int v1, u_int v2)
1339 return (-4096 < diff && diff < 4096)
1340 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1342 || is_rotated_mask(v1 ^ v2);
1345 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1347 if (rt_val < 0x100000000ull) {
1348 emit_movimm_from(rs_val, rs, rt_val, rt);
1351 // just move the whole thing. At least on Linux all addresses
1352 // seem to be 48bit, so 3 insns - not great not terrible
1353 emit_movimm64(rt_val, rt);
1357 static void pass_args64(u_int a0, u_int a1)
1361 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1363 else if(a0!=0&&a1==0) {
1365 if (a0>=0) emit_mov64(a0,0);
1368 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1369 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1373 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1376 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1378 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1379 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1381 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1383 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1388 #include "pcsxmem.h"
1389 //#include "pcsxmem_inline.c"
1391 static void do_readstub(int n)
1393 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1394 set_jump_target(stubs[n].addr, out);
1395 enum stub_type type = stubs[n].type;
1397 int rs = stubs[n].b;
1398 const struct regstat *i_regs = (void *)stubs[n].c;
1399 u_int reglist = stubs[n].e;
1400 const signed char *i_regmap = i_regs->regmap;
1402 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1403 rt=get_reg(i_regmap,FTEMP);
1405 rt=get_reg(i_regmap,dops[i].rt1);
1408 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1409 void *restore_jump = NULL, *handler_jump = NULL;
1411 for (r = 0; r < HOST_CCREG; r++) {
1412 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1417 if(rt>=0&&dops[i].rt1!=0)
1424 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1426 emit_readdword(&mem_rtab,temp);
1427 emit_shrimm(rs,12,temp2);
1428 emit_readdword_dualindexedx8(temp,temp2,temp2);
1429 emit_adds64(temp2,temp2,temp2);
1432 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1434 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1435 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1436 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1437 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1438 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1444 emit_jmp(0); // jump to reg restore
1447 emit_jmp(stubs[n].retaddr); // return address
1448 set_jump_target(handler_jump, out);
1453 if(type==LOADB_STUB||type==LOADBU_STUB)
1454 handler=jump_handler_read8;
1455 if(type==LOADH_STUB||type==LOADHU_STUB)
1456 handler=jump_handler_read16;
1457 if(type==LOADW_STUB)
1458 handler=jump_handler_read32;
1460 pass_args64(rs,temp2);
1461 int cc=get_reg(i_regmap,CCREG);
1463 emit_loadreg(CCREG,2);
1464 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1465 emit_far_call(handler);
1466 // (no cycle reload after read)
1467 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1468 loadstore_extend(type,0,rt);
1471 set_jump_target(restore_jump, out);
1472 restore_regs(reglist);
1473 emit_jmp(stubs[n].retaddr);
1476 static void inline_readstub(enum stub_type type, int i, u_int addr,
1477 const signed char regmap[], int target, int adj, u_int reglist)
1479 int rs=get_reg(regmap,target);
1480 int rt=get_reg(regmap,target);
1481 if(rs<0) rs=get_reg_temp(regmap);
1484 uintptr_t host_addr = 0;
1486 int cc=get_reg(regmap,CCREG);
1487 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1489 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1490 if (handler == NULL) {
1491 if(rt<0||dops[i].rt1==0)
1493 if (addr != host_addr)
1494 emit_movimm_from64(addr, rs, host_addr, rs);
1496 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1497 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1498 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1499 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1500 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1505 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1507 if(type==LOADB_STUB||type==LOADBU_STUB)
1508 handler=jump_handler_read8;
1509 if(type==LOADH_STUB||type==LOADHU_STUB)
1510 handler=jump_handler_read16;
1511 if(type==LOADW_STUB)
1512 handler=jump_handler_read32;
1515 // call a memhandler
1516 if(rt>=0&&dops[i].rt1!=0)
1520 emit_movimm(addr,0);
1524 emit_loadreg(CCREG,2);
1525 emit_addimm(cc<0?2:cc,adj,2);
1527 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1528 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1529 if (-4294967296l <= offset && offset < 4294967296l) {
1530 emit_adrp((void *)l1, 1);
1531 emit_addimm64(1, l1 & 0xfff, 1);
1534 emit_movimm64(l1, 1);
1537 emit_far_call(do_memhandler_pre);
1539 emit_far_call(handler);
1541 // (no cycle reload after read)
1542 if(rt>=0&&dops[i].rt1!=0)
1543 loadstore_extend(type, 0, rt);
1544 restore_regs(reglist);
1547 static void do_writestub(int n)
1549 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1550 set_jump_target(stubs[n].addr, out);
1551 enum stub_type type=stubs[n].type;
1554 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1555 u_int reglist=stubs[n].e;
1556 signed char *i_regmap=i_regs->regmap;
1558 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1559 rt=get_reg(i_regmap,r=FTEMP);
1561 rt=get_reg(i_regmap,r=dops[i].rs2);
1565 int rtmp,temp=-1,temp2,regs_saved=0;
1566 void *restore_jump = NULL, *handler_jump = NULL;
1567 int reglist2=reglist|(1<<rs)|(1<<rt);
1568 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1569 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1577 for(rtmp=0;rtmp<=3;rtmp++)
1578 if(rtmp!=rs&&rtmp!=rt)
1581 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1584 host_tempreg_acquire();
1587 emit_readdword(&mem_wtab,temp);
1588 emit_shrimm(rs,12,temp2);
1589 emit_readdword_dualindexedx8(temp,temp2,temp2);
1590 emit_adds64(temp2,temp2,temp2);
1594 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1595 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1596 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1601 emit_jmp(0); // jump to reg restore
1604 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1605 set_jump_target(handler_jump, out);
1611 case STOREB_STUB: handler=jump_handler_write8; break;
1612 case STOREH_STUB: handler=jump_handler_write16; break;
1613 case STOREW_STUB: handler=jump_handler_write32; break;
1619 emit_mov64(temp2,3);
1620 host_tempreg_release();
1622 int cc=get_reg(i_regmap,CCREG);
1624 emit_loadreg(CCREG,2);
1625 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1626 // returns new cycle_count
1627 emit_far_call(handler);
1628 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1630 emit_storereg(CCREG,2);
1632 set_jump_target(restore_jump, out);
1633 restore_regs(reglist);
1634 emit_jmp(stubs[n].retaddr);
1637 static void inline_writestub(enum stub_type type, int i, u_int addr,
1638 const signed char regmap[], int target, int adj, u_int reglist)
1640 int rs = get_reg_temp(regmap);
1641 int rt = get_reg(regmap,target);
1644 uintptr_t host_addr = 0;
1645 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1646 if (handler == NULL) {
1647 if (addr != host_addr)
1648 emit_movimm_from64(addr, rs, host_addr, rs);
1650 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1651 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1652 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1658 // call a memhandler
1660 emit_writeword(rs, &address); // some handlers still need it
1661 loadstore_extend(type, rt, 0);
1663 cc = cc_use = get_reg(regmap, CCREG);
1665 emit_loadreg(CCREG, (cc_use = 2));
1666 emit_addimm(cc_use, adj, 2);
1668 emit_far_call(do_memhandler_pre);
1669 emit_far_call(handler);
1670 emit_far_call(do_memhandler_post);
1671 emit_addimm(0, -adj, cc_use);
1673 emit_storereg(CCREG, cc_use);
1674 restore_regs(reglist);
1679 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1681 save_load_regs_all(1, reglist);
1682 cop2_do_stall_check(op, i, i_regs, 0);
1685 emit_far_call(pcnt_gte_start);
1687 // pointer to cop2 regs
1688 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1691 static void c2op_epilogue(u_int op,u_int reglist)
1695 emit_far_call(pcnt_gte_end);
1697 save_load_regs_all(0, reglist);
1700 static void c2op_assemble(int i, const struct regstat *i_regs)
1702 u_int c2op=source[i]&0x3f;
1703 u_int hr,reglist_full=0,reglist;
1704 int need_flags,need_ir;
1705 for(hr=0;hr<HOST_REGS;hr++) {
1706 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1708 reglist=reglist_full&CALLER_SAVE_REGS;
1710 if (gte_handlers[c2op]!=NULL) {
1711 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1712 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1713 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1714 source[i],gte_unneeded[i+1],need_flags,need_ir);
1715 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1717 //int shift = (source[i] >> 19) & 1;
1718 //int lm = (source[i] >> 10) & 1;
1722 c2op_prologue(c2op, i, i_regs, reglist);
1723 emit_movimm(source[i],1); // opcode
1724 emit_writeword(1,&psxRegs.code);
1725 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1728 c2op_epilogue(c2op,reglist);
1732 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1734 //value = value & 0x7ffff000;
1735 //if (value & 0x7f87e000) value |= 0x80000000;
1736 emit_andimm(sl, 0x7fffe000, temp);
1737 emit_testimm(temp, 0xff87ffff);
1738 emit_andimm(sl, 0x7ffff000, temp);
1739 host_tempreg_acquire();
1740 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1741 emit_cmovne_reg(HOST_TEMPREG, temp);
1742 host_tempreg_release();
1743 assert(0); // testing needed
1746 static void do_mfc2_31_one(u_int copr,signed char temp)
1748 emit_readshword(®_cop2d[copr],temp);
1749 emit_bicsar_imm(temp,31,temp);
1750 emit_cmpimm(temp,0xf80);
1751 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1752 emit_andimm(temp,0xf80,temp);
1755 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1758 host_tempreg_acquire();
1759 temp = HOST_TEMPREG;
1761 do_mfc2_31_one(9,temp);
1762 emit_shrimm(temp,7,tl);
1763 do_mfc2_31_one(10,temp);
1764 emit_orrshr_imm(temp,2,tl);
1765 do_mfc2_31_one(11,temp);
1766 emit_orrshl_imm(temp,3,tl);
1767 emit_writeword(tl,®_cop2d[29]);
1769 if (temp == HOST_TEMPREG)
1770 host_tempreg_release();
1773 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1779 if(dops[i].rs1&&dops[i].rs2)
1781 switch(dops[i].opcode2)
1786 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1787 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1788 signed char hi=get_reg(i_regs->regmap,HIREG);
1789 signed char lo=get_reg(i_regs->regmap,LOREG);
1795 if(dops[i].opcode2==0x18) // MULT
1796 emit_smull(m1,m2,hi);
1798 emit_umull(m1,m2,hi);
1801 emit_shrimm64(hi,32,hi);
1807 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1808 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1809 signed char quotient=get_reg(i_regs->regmap,LOREG);
1810 signed char remainder=get_reg(i_regs->regmap,HIREG);
1811 assert(numerator>=0);
1812 assert(denominator>=0);
1813 assert(quotient>=0);
1814 assert(remainder>=0);
1816 if (dops[i].opcode2 == 0x1A) // DIV
1817 emit_sdiv(numerator,denominator,quotient);
1819 emit_udiv(numerator,denominator,quotient);
1820 emit_msub(quotient,denominator,numerator,remainder);
1822 // div 0 quotient (remainder is already correct)
1823 host_tempreg_acquire();
1824 if (dops[i].opcode2 == 0x1A) // DIV
1825 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1827 emit_movimm(~0,HOST_TEMPREG);
1828 emit_test(denominator,denominator);
1829 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1830 host_tempreg_release();
1839 signed char hr=get_reg(i_regs->regmap,HIREG);
1840 signed char lr=get_reg(i_regs->regmap,LOREG);
1841 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1844 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1845 assert(numerator >= 0);
1847 emit_mov(numerator,hr);
1849 if (dops[i].opcode2 == 0x1A) // DIV
1850 emit_sub_asrimm(0,numerator,31,lr);
1856 if (hr >= 0) emit_zeroreg(hr);
1857 if (lr >= 0) emit_movimm(~0,lr);
1862 // Multiply by zero is zero.
1863 if (hr >= 0) emit_zeroreg(hr);
1864 if (lr >= 0) emit_zeroreg(lr);
1868 #define multdiv_assemble multdiv_assemble_arm64
1870 static void do_jump_vaddr(u_int rs)
1874 emit_far_call(ndrc_get_addr_ht);
1878 static void do_preload_rhash(u_int r) {
1879 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1880 // register. On ARM the hash can be done with a single instruction (below)
1883 static void do_preload_rhtbl(u_int ht) {
1884 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1887 static void do_rhash(u_int rs,u_int rh) {
1888 emit_andimm(rs, 0xf8, rh);
1891 static void do_miniht_load(int ht, u_int rh) {
1892 emit_add64(ht, rh, ht);
1893 emit_ldst(0, 0, rh, ht, 0);
1896 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1902 set_jump_target(jaddr, out);
1903 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1904 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1908 // parsed by set_jump_target?
1909 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1910 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1911 emit_movk(return_address&0xffff,rt);
1912 add_to_linker(out,return_address,1);
1914 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1915 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1918 static unused void clear_cache_arm64(char *start, char *end)
1920 // Don't rely on GCC's __clear_cache implementation, as it caches
1921 // icache/dcache cache line sizes, that can vary between cores on
1922 // big.LITTLE architectures.
1923 uint64_t addr, ctr_el0;
1924 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1925 size_t isize, dsize;
1927 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1928 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1929 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1931 // use the global minimum cache line size
1932 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1933 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1935 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1936 not required for instruction to data coherence. */
1937 if ((ctr_el0 & (1 << 28)) == 0x0) {
1938 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1939 for (; addr < (uint64_t)end; addr += dsize)
1940 // use "civac" instead of "cvau", as this is the suggested workaround for
1941 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1942 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1944 __asm__ volatile("dsb ish" : : : "memory");
1946 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1947 Unification is not required for instruction to data coherence. */
1948 if ((ctr_el0 & (1 << 29)) == 0x0) {
1949 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1950 for (; addr < (uint64_t)end; addr += isize)
1951 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1953 __asm__ volatile("dsb ish" : : : "memory");
1956 __asm__ volatile("isb" : : : "memory");
1959 // CPU-architecture-specific initialization
1960 static void arch_init(void)
1962 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1963 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
1965 assert(!(diff & 3));
1966 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1967 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1968 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1969 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1971 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1974 // vim:shiftwidth=2:expandtab