1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
26 #define unused __attribute__((unused))
28 void do_memhandler_pre();
29 void do_memhandler_post();
32 static void set_jump_target(void *addr, void *target)
35 intptr_t offset = (u_char *)target - (u_char *)addr;
37 ptr += ndrc_write_ofs / sizeof(ptr[0]);
39 if ((*ptr&0xFC000000) == 0x14000000) { // b
40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_jump_out)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
49 assert(-1048576 <= offset && offset < 1048576);
50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
52 else if((*ptr&0x9f000000)==0x10000000) { // adr
53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
58 abort(); // should not happen
61 // from a pointer to external jump stub (which was produced by emit_extjump2)
62 // find where the jumping insn is
63 static void *find_extjump_insn(void *stub)
65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
72 // find where external branch is liked to using addr of it's stub:
73 // get address that the stub loads (dyna_linker arg1),
74 // treat it as a pointer to branch insn,
75 // return addr where that branch jumps to
76 static void *get_pointer(void *stub)
78 int *i_ptr = find_extjump_insn(stub);
79 if ((*i_ptr&0xfc000000) == 0x14000000) // b
80 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
81 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
82 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
83 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
89 // Allocate a specific ARM register.
90 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
95 // see if it's already allocated (and dealloc it)
96 for(n=0;n<HOST_REGS;n++)
98 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
99 dirty=(cur->dirty>>n)&1;
105 cur->dirty&=~(1<<hr);
106 cur->dirty|=dirty<<hr;
107 cur->isconst&=~(1<<hr);
110 // Alloc cycle count into dedicated register
111 static void alloc_cc(struct regstat *cur,int i)
113 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
121 static unused const char *regname[32] = {
122 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
123 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
124 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
125 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
128 static unused const char *regname64[32] = {
129 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
130 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
131 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
132 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
136 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
137 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
140 static unused const char *condname[16] = {
141 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
142 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
145 static void output_w32(u_int word)
147 *((u_int *)(out + ndrc_write_ofs)) = word;
151 static u_int rn_rd(u_int rn, u_int rd)
155 return (rn << 5) | rd;
158 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
163 return (rm << 16) | (rn << 5) | rd;
166 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
169 return rm_rn_rd(rm, rn, rd) | (ra << 10);
172 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
178 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
181 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
184 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
187 static u_int imm16_rd(u_int imm16, u_int rd)
189 assert(imm16 < 0x10000);
191 return (imm16 << 5) | rd;
194 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
196 assert(imm12 < 0x1000);
199 return (imm12 << 10) | (rn << 5) | rd;
202 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
204 assert(imm9 < 0x200);
207 return (imm9 << 12) | (rn << 5) | rd;
210 static u_int imm19_rt(u_int imm19, u_int rt)
212 assert(imm19 < 0x80000);
214 return (imm19 << 5) | rt;
217 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
224 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
227 static u_int genjmp(const u_char *addr)
229 intptr_t offset = addr - out;
230 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
231 if (offset < -134217728 || offset > 134217727) {
232 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
236 return ((u_int)offset >> 2) & 0x03ffffff;
239 static u_int genjmpcc(const u_char *addr)
241 intptr_t offset = addr - out;
242 if ((uintptr_t)addr < 3) return 0;
243 if (offset < -1048576 || offset > 1048572) {
244 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
248 return ((u_int)offset >> 2) & 0x7ffff;
251 static uint32_t is_mask(u_int value)
253 return value && ((value + 1) & value) == 0;
256 // This function returns true if the argument contains a
257 // non-empty sequence of ones (possibly rotated) with the remainder zero.
258 static uint32_t is_rotated_mask(u_int value)
260 if (value == 0 || value == ~0)
262 if (is_mask((value - 1) | value))
264 return is_mask((~value - 1) | ~value);
267 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
269 int lzeros, tzeros, ones;
271 if (is_mask((value - 1) | value)) {
272 lzeros = __builtin_clz(value);
273 tzeros = __builtin_ctz(value);
274 ones = 32 - lzeros - tzeros;
275 *immr = (32 - tzeros) & 31;
280 if (is_mask((value - 1) | value)) {
281 lzeros = __builtin_clz(value);
282 tzeros = __builtin_ctz(value);
283 ones = 32 - lzeros - tzeros;
291 static void emit_mov(u_int rs, u_int rt)
293 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
294 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
297 static void emit_mov64(u_int rs, u_int rt)
299 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
300 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
303 static void emit_add(u_int rs1, u_int rs2, u_int rt)
305 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
306 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
309 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
311 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
312 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
315 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
317 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
318 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
320 #define emit_adds_ptr emit_adds64
322 static void emit_neg(u_int rs, u_int rt)
324 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
325 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
328 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
330 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
331 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
334 static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
336 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
337 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
340 static void emit_movz(u_int imm, u_int rt)
342 assem_debug("movz %s,#%#x\n", regname[rt], imm);
343 output_w32(0x52800000 | imm16_rd(imm, rt));
346 static void emit_movz_lsl16(u_int imm, u_int rt)
348 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
349 output_w32(0x52a00000 | imm16_rd(imm, rt));
352 static void emit_movn(u_int imm, u_int rt)
354 assem_debug("movn %s,#%#x\n", regname[rt], imm);
355 output_w32(0x12800000 | imm16_rd(imm, rt));
358 static void emit_movn_lsl16(u_int imm,u_int rt)
360 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x12a00000 | imm16_rd(imm, rt));
364 static void emit_movk(u_int imm,u_int rt)
366 assem_debug("movk %s,#%#x\n", regname[rt], imm);
367 output_w32(0x72800000 | imm16_rd(imm, rt));
370 static void emit_movk_lsl16(u_int imm,u_int rt)
373 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x72a00000 | imm16_rd(imm, rt));
377 static void emit_zeroreg(u_int rt)
382 static void emit_movimm(u_int imm, u_int rt)
386 else if ((~imm) < 65536)
388 else if ((imm&0xffff) == 0)
389 emit_movz_lsl16(imm >> 16, rt);
390 else if (((~imm)&0xffff) == 0)
391 emit_movn_lsl16(~imm >> 16, rt);
392 else if (is_rotated_mask(imm)) {
394 gen_logical_imm(imm, &immr, &imms);
395 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
396 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
399 emit_movz(imm & 0xffff, rt);
400 emit_movk_lsl16(imm >> 16, rt);
404 static void emit_readword(void *addr, u_int rt)
406 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
407 if (!(offset & 3) && offset <= 16380) {
408 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
409 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
415 static void emit_readdword(void *addr, u_int rt)
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 7) && offset <= 32760) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
420 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
425 #define emit_readptr emit_readdword
427 static void emit_readshword(void *addr, u_int rt)
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 1) && offset <= 8190) {
431 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
432 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
438 static void emit_loadreg(u_int r, u_int hr)
446 //case HIREG: addr = &hi; break;
447 //case LOREG: addr = &lo; break;
448 case CCREG: addr = &cycle_count; break;
449 case CSREG: addr = &Status; break;
450 case INVCP: addr = &invc_ptr; is64 = 1; break;
451 case ROREG: addr = &ram_offset; is64 = 1; break;
454 addr = &psxRegs.GPR.r[r];
458 emit_readdword(addr, hr);
460 emit_readword(addr, hr);
464 static void emit_writeword(u_int rt, void *addr)
466 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
467 if (!(offset & 3) && offset <= 16380) {
468 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
469 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
475 static void emit_writedword(u_int rt, void *addr)
477 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
478 if (!(offset & 7) && offset <= 32760) {
479 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
480 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
486 static void emit_storereg(u_int r, u_int hr)
489 void *addr = &psxRegs.GPR.r[r];
491 //case HIREG: addr = &hi; break;
492 //case LOREG: addr = &lo; break;
493 case CCREG: addr = &cycle_count; break;
494 default: assert(r < 34); break;
496 emit_writeword(hr, addr);
499 static void emit_test(u_int rs, u_int rt)
501 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
502 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
505 static void emit_testimm(u_int rs, u_int imm)
508 assem_debug("tst %s,#%#x\n", regname[rs], imm);
509 assert(is_rotated_mask(imm)); // good enough for PCSX
510 gen_logical_imm(imm, &immr, &imms);
511 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
514 static void emit_not(u_int rs,u_int rt)
516 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
517 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
520 static void emit_and(u_int rs1,u_int rs2,u_int rt)
522 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
523 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
526 static void emit_or(u_int rs1,u_int rs2,u_int rt)
528 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
529 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
532 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
534 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
535 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
538 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
540 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
541 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
544 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
546 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
547 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
550 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
552 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
553 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
556 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
558 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
559 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
562 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
564 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
565 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
568 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
570 unused const char *st = s ? "s" : "";
571 s = s ? 0x20000000 : 0;
572 is64 = is64 ? 0x80000000 : 0;
574 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
575 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
577 else if (-imm < 4096) {
578 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
579 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
581 else if (imm < 16777216) {
582 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
583 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
584 if ((imm & 0xfff) || s) {
585 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
586 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
589 else if (-imm < 16777216) {
590 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
591 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
594 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
601 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
603 emit_addimm_s(0, 0, rs, imm, rt);
606 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
608 emit_addimm_s(0, 1, rs, imm, rt);
611 static void emit_addimm_and_set_flags(int imm, u_int rt)
613 emit_addimm_s(1, 0, rt, imm, rt);
616 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
618 const char *names[] = { "and", "orr", "eor", "ands" };
619 const char *name = names[op];
622 if (is_rotated_mask(imm)) {
623 gen_logical_imm(imm, &immr, &imms);
624 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
625 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
628 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
629 host_tempreg_acquire();
630 emit_movimm(imm, HOST_TEMPREG);
631 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
632 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
633 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
634 host_tempreg_release();
639 static void emit_andimm(u_int rs, u_int imm, u_int rt)
644 emit_logicop_imm(0, rs, imm, rt);
647 static void emit_orimm(u_int rs, u_int imm, u_int rt)
654 emit_logicop_imm(1, rs, imm, rt);
657 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
664 emit_logicop_imm(2, rs, imm, rt);
667 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
669 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
670 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
673 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
675 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
676 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
679 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
681 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
682 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
685 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
687 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
688 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
691 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
693 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
694 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
697 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
699 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
700 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
703 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
705 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
706 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
709 static void emit_signextend16(u_int rs, u_int rt)
711 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
712 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
715 static void emit_shl(u_int rs,u_int rshift,u_int rt)
717 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
718 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
721 static void emit_shr(u_int rs,u_int rshift,u_int rt)
723 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
724 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
727 static void emit_sar(u_int rs,u_int rshift,u_int rt)
729 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
730 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
733 static void emit_cmpimm(u_int rs, u_int imm)
736 assem_debug("cmp %s,%#x\n", regname[rs], imm);
737 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
739 else if (-imm < 4096) {
740 assem_debug("cmn %s,%#x\n", regname[rs], imm);
741 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
743 else if (imm < 16777216 && !(imm & 0xfff)) {
744 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
745 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
748 host_tempreg_acquire();
749 emit_movimm(imm, HOST_TEMPREG);
750 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
751 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
752 host_tempreg_release();
756 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
758 assert(imm == 0 || imm == 1);
759 assert(cond0 < 0x10);
760 assert(cond1 < 0x10);
762 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
763 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
765 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
766 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
770 static void emit_cmovne_imm(u_int imm,u_int rt)
772 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
775 static void emit_cmovl_imm(u_int imm,u_int rt)
777 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
780 static void emit_cmovb_imm(int imm,u_int rt)
782 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
785 static void emit_cmoveq_reg(u_int rs,u_int rt)
787 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
788 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
791 static void emit_cmovne_reg(u_int rs,u_int rt)
793 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
794 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
797 static void emit_cmovl_reg(u_int rs,u_int rt)
799 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
800 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
803 static void emit_cmovb_reg(u_int rs,u_int rt)
805 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
806 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
809 static void emit_cmovs_reg(u_int rs,u_int rt)
811 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
812 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
815 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
817 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
818 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
821 static void emit_slti32(u_int rs,int imm,u_int rt)
823 if(rs!=rt) emit_zeroreg(rt);
825 if(rs==rt) emit_movimm(0,rt);
826 emit_cmovl_imm(1,rt);
829 static void emit_sltiu32(u_int rs,int imm,u_int rt)
831 if(rs!=rt) emit_zeroreg(rt);
833 if(rs==rt) emit_movimm(0,rt);
834 emit_cmovb_imm(1,rt);
837 static void emit_cmp(u_int rs,u_int rt)
839 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
840 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
843 static void emit_cmpcs(u_int rs,u_int rt)
845 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
846 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
849 static void emit_set_gz32(u_int rs, u_int rt)
851 //assem_debug("set_gz32\n");
854 emit_cmovl_imm(0,rt);
857 static void emit_set_nz32(u_int rs, u_int rt)
859 //assem_debug("set_nz32\n");
860 if(rs!=rt) emit_mov(rs,rt);
862 emit_cmovne_imm(1,rt);
865 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
867 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
868 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
870 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
871 emit_cmovl_imm(1,rt);
874 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
876 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovb_imm(1,rt);
883 static int can_jump_or_call(const void *a)
885 intptr_t diff = (u_char *)a - out;
886 return (-134217728 <= diff && diff <= 134217727);
889 static void emit_call(const void *a)
891 intptr_t diff = (u_char *)a - out;
892 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
894 if (-134217728 <= diff && diff <= 134217727)
895 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
900 static void emit_jmp(const void *a)
902 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
903 u_int offset = genjmp(a);
904 output_w32(0x14000000 | offset);
907 static void emit_jne(const void *a)
909 assem_debug("bne %p\n", a);
910 u_int offset = genjmpcc(a);
911 output_w32(0x54000000 | (offset << 5) | COND_NE);
914 static void emit_jeq(const void *a)
916 assem_debug("beq %p\n", a);
917 u_int offset = genjmpcc(a);
918 output_w32(0x54000000 | (offset << 5) | COND_EQ);
921 static void emit_js(const void *a)
923 assem_debug("bmi %p\n", a);
924 u_int offset = genjmpcc(a);
925 output_w32(0x54000000 | (offset << 5) | COND_MI);
928 static void emit_jns(const void *a)
930 assem_debug("bpl %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_PL);
935 static void emit_jl(const void *a)
937 assem_debug("blt %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_LT);
942 static void emit_jge(const void *a)
944 assem_debug("bge %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_GE);
949 static void emit_jno(const void *a)
951 assem_debug("bvc %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_VC);
956 static void emit_jc(const void *a)
958 assem_debug("bcs %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_CS);
963 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
965 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
966 u_int offset = genjmpcc(a);
967 is64 = is64 ? 0x80000000 : 0;
968 isnz = isnz ? 0x01000000 : 0;
969 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
972 static unused void emit_cbz(const void *a, u_int r)
977 static void emit_jmpreg(u_int r)
979 assem_debug("br %s\n", regname64[r]);
980 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
983 static void emit_retreg(u_int r)
985 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
986 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
989 static void emit_ret(void)
994 static void emit_adr(void *addr, u_int rt)
996 intptr_t offset = (u_char *)addr - out;
997 assert(-1048576 <= offset && offset < 1048576);
999 assem_debug("adr x%d,#%#lx\n", rt, offset);
1000 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1003 static void emit_adrp(void *addr, u_int rt)
1005 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1006 assert(-4294967296l <= offset && offset < 4294967296l);
1009 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1010 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1013 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1015 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1016 assert(-256 <= offset && offset < 256);
1017 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1020 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1022 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1023 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1026 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1028 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1032 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1034 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1038 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1040 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1043 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1045 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1047 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1051 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1053 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1057 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1059 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1063 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1065 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1069 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1071 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1075 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1077 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1082 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1084 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1089 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1091 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1096 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1098 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1103 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1105 if (!(offset & 3) && (u_int)offset <= 16380) {
1106 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1107 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1109 else if (-256 <= offset && offset < 256) {
1110 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1111 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1117 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1119 if (!(offset & 1) && (u_int)offset <= 8190) {
1120 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1121 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1123 else if (-256 <= offset && offset < 256) {
1124 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1125 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1131 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1133 if ((u_int)offset < 4096) {
1134 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1135 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1139 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1145 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1147 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1151 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1153 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1157 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1159 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1160 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1163 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1165 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1169 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1171 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1175 static void emit_clz(u_int rs, u_int rt)
1177 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1178 output_w32(0x5ac01000 | rn_rd(rs, rt));
1181 // special case for checking invalid_code
1182 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1184 host_tempreg_acquire();
1185 emit_shrimm(r, 12, HOST_TEMPREG);
1186 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1187 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1188 emit_cmpimm(HOST_TEMPREG, imm);
1189 host_tempreg_release();
1192 // special for loadlr_assemble, rs2 is destroyed
1193 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1195 emit_shl(rs2, shift, rs2);
1196 emit_bic(rs1, rs2, rt);
1199 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1201 emit_shr(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
1205 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1207 u_int op = 0xb9000000;
1208 unused const char *ldst = is_st ? "st" : "ld";
1209 unused char rp = is64 ? 'x' : 'w';
1210 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1211 is64 = is64 ? 1 : 0;
1212 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1213 ofs = (ofs >> (2+is64));
1214 if (!is_st) op |= 0x00400000;
1215 if (is64) op |= 0x40000000;
1216 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1219 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1221 u_int op = 0x29000000;
1222 unused const char *ldst = is_st ? "st" : "ld";
1223 unused char rp = is64 ? 'x' : 'w';
1224 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1225 is64 = is64 ? 1 : 0;
1226 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1227 ofs = (ofs >> (2+is64));
1228 assert(-64 <= ofs && ofs <= 63);
1230 if (!is_st) op |= 0x00400000;
1231 if (is64) op |= 0x80000000;
1232 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1235 static void save_load_regs_all(int is_store, u_int reglist)
1239 for (r = 0; reglist; r++, reglist >>= 1) {
1243 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1249 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1252 assert(ofs <= SSP_CALLER_REGS);
1255 // Save registers before function call
1256 static void save_regs(u_int reglist)
1258 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1259 save_load_regs_all(1, reglist);
1262 // Restore registers after function call
1263 static void restore_regs(u_int reglist)
1265 reglist &= CALLER_SAVE_REGS;
1266 save_load_regs_all(0, reglist);
1269 /* Stubs/epilogue */
1271 static void literal_pool(int n)
1276 static void literal_pool_jumpover(int n)
1280 // parsed by get_pointer, find_extjump_insn
1281 static void emit_extjump(u_char *addr, u_int target)
1283 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1285 emit_movz(target & 0xffff, 0);
1286 emit_movk_lsl16(target >> 16, 0);
1288 // addr is in the current recompiled block (max 256k)
1289 // offset shouldn't exceed +/-1MB
1291 emit_far_jump(dyna_linker);
1294 static void check_extjump2(void *src)
1297 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1301 // put rt_val into rt, potentially making use of rs with value rs_val
1302 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1304 int diff = rt_val - rs_val;
1305 if ((-4096 < diff && diff < 4096)
1306 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1307 emit_addimm(rs, diff, rt);
1308 else if (rt_val == ~rs_val)
1310 else if (is_rotated_mask(rs_val ^ rt_val))
1311 emit_xorimm(rs, rs_val ^ rt_val, rt);
1313 emit_movimm(rt_val, rt);
1316 // return 1 if the above function can do it's job cheaply
1317 static int is_similar_value(u_int v1, u_int v2)
1320 return (-4096 < diff && diff < 4096)
1321 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1323 || is_rotated_mask(v1 ^ v2);
1326 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1328 if (rt_val < 0x100000000ull) {
1329 emit_movimm_from(rs_val, rs, rt_val, rt);
1332 // just move the whole thing. At least on Linux all addresses
1333 // seem to be 48bit, so 3 insns - not great not terrible
1334 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1335 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1336 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1337 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1338 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1339 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1341 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1342 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1347 static void pass_args64(u_int a0, u_int a1)
1351 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1353 else if(a0!=0&&a1==0) {
1355 if (a0>=0) emit_mov64(a0,0);
1358 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1359 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1363 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1366 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1368 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1369 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1371 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1373 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1378 #include "pcsxmem.h"
1379 //#include "pcsxmem_inline.c"
1381 static void do_readstub(int n)
1383 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1384 set_jump_target(stubs[n].addr, out);
1385 enum stub_type type = stubs[n].type;
1387 int rs = stubs[n].b;
1388 const struct regstat *i_regs = (void *)stubs[n].c;
1389 u_int reglist = stubs[n].e;
1390 const signed char *i_regmap = i_regs->regmap;
1392 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1393 rt=get_reg(i_regmap,FTEMP);
1395 rt=get_reg(i_regmap,dops[i].rt1);
1398 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1399 void *restore_jump = NULL, *handler_jump = NULL;
1401 for (r = 0; r < HOST_CCREG; r++) {
1402 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1407 if(rt>=0&&dops[i].rt1!=0)
1414 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1416 emit_readdword(&mem_rtab,temp);
1417 emit_shrimm(rs,12,temp2);
1418 emit_readdword_dualindexedx8(temp,temp2,temp2);
1419 emit_adds64(temp2,temp2,temp2);
1422 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1424 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1425 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1426 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1427 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1428 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1434 emit_jmp(0); // jump to reg restore
1437 emit_jmp(stubs[n].retaddr); // return address
1438 set_jump_target(handler_jump, out);
1443 if(type==LOADB_STUB||type==LOADBU_STUB)
1444 handler=jump_handler_read8;
1445 if(type==LOADH_STUB||type==LOADHU_STUB)
1446 handler=jump_handler_read16;
1447 if(type==LOADW_STUB)
1448 handler=jump_handler_read32;
1450 pass_args64(rs,temp2);
1451 int cc=get_reg(i_regmap,CCREG);
1453 emit_loadreg(CCREG,2);
1454 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1455 emit_far_call(handler);
1456 // (no cycle reload after read)
1457 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1458 loadstore_extend(type,0,rt);
1461 set_jump_target(restore_jump, out);
1462 restore_regs(reglist);
1463 emit_jmp(stubs[n].retaddr);
1466 static void inline_readstub(enum stub_type type, int i, u_int addr,
1467 const signed char regmap[], int target, int adj, u_int reglist)
1469 int rs=get_reg(regmap,target);
1470 int rt=get_reg(regmap,target);
1471 if(rs<0) rs=get_reg_temp(regmap);
1474 uintptr_t host_addr = 0;
1476 int cc=get_reg(regmap,CCREG);
1477 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1479 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1480 if (handler == NULL) {
1481 if(rt<0||dops[i].rt1==0)
1483 if (addr != host_addr)
1484 emit_movimm_from64(addr, rs, host_addr, rs);
1486 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1487 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1488 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1489 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1490 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1495 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1497 if(type==LOADB_STUB||type==LOADBU_STUB)
1498 handler=jump_handler_read8;
1499 if(type==LOADH_STUB||type==LOADHU_STUB)
1500 handler=jump_handler_read16;
1501 if(type==LOADW_STUB)
1502 handler=jump_handler_read32;
1505 // call a memhandler
1506 if(rt>=0&&dops[i].rt1!=0)
1510 emit_movimm(addr,0);
1514 emit_loadreg(CCREG,2);
1515 emit_addimm(cc<0?2:cc,adj,2);
1517 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1518 emit_adrp((void *)l1, 1);
1519 emit_addimm64(1, l1 & 0xfff, 1);
1522 emit_far_call(do_memhandler_pre);
1524 emit_far_call(handler);
1526 // (no cycle reload after read)
1527 if(rt>=0&&dops[i].rt1!=0)
1528 loadstore_extend(type, 0, rt);
1529 restore_regs(reglist);
1532 static void do_writestub(int n)
1534 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1535 set_jump_target(stubs[n].addr, out);
1536 enum stub_type type=stubs[n].type;
1539 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1540 u_int reglist=stubs[n].e;
1541 signed char *i_regmap=i_regs->regmap;
1543 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1544 rt=get_reg(i_regmap,r=FTEMP);
1546 rt=get_reg(i_regmap,r=dops[i].rs2);
1550 int rtmp,temp=-1,temp2,regs_saved=0;
1551 void *restore_jump = NULL, *handler_jump = NULL;
1552 int reglist2=reglist|(1<<rs)|(1<<rt);
1553 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1554 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1562 for(rtmp=0;rtmp<=3;rtmp++)
1563 if(rtmp!=rs&&rtmp!=rt)
1566 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1569 host_tempreg_acquire();
1572 emit_readdword(&mem_wtab,temp);
1573 emit_shrimm(rs,12,temp2);
1574 emit_readdword_dualindexedx8(temp,temp2,temp2);
1575 emit_adds64(temp2,temp2,temp2);
1579 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1580 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1581 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1586 emit_jmp(0); // jump to reg restore
1589 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1590 set_jump_target(handler_jump, out);
1596 case STOREB_STUB: handler=jump_handler_write8; break;
1597 case STOREH_STUB: handler=jump_handler_write16; break;
1598 case STOREW_STUB: handler=jump_handler_write32; break;
1604 emit_mov64(temp2,3);
1605 host_tempreg_release();
1607 int cc=get_reg(i_regmap,CCREG);
1609 emit_loadreg(CCREG,2);
1610 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1611 // returns new cycle_count
1612 emit_far_call(handler);
1613 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1615 emit_storereg(CCREG,2);
1617 set_jump_target(restore_jump, out);
1618 restore_regs(reglist);
1619 emit_jmp(stubs[n].retaddr);
1622 static void inline_writestub(enum stub_type type, int i, u_int addr,
1623 const signed char regmap[], int target, int adj, u_int reglist)
1625 int rs = get_reg_temp(regmap);
1626 int rt = get_reg(regmap,target);
1629 uintptr_t host_addr = 0;
1630 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1631 if (handler == NULL) {
1632 if (addr != host_addr)
1633 emit_movimm_from64(addr, rs, host_addr, rs);
1635 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1636 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1637 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1643 // call a memhandler
1645 emit_writeword(rs, &address); // some handlers still need it
1646 loadstore_extend(type, rt, 0);
1648 cc = cc_use = get_reg(regmap, CCREG);
1650 emit_loadreg(CCREG, (cc_use = 2));
1651 emit_addimm(cc_use, adj, 2);
1653 emit_far_call(do_memhandler_pre);
1654 emit_far_call(handler);
1655 emit_far_call(do_memhandler_post);
1656 emit_addimm(0, -adj, cc_use);
1658 emit_storereg(CCREG, cc_use);
1659 restore_regs(reglist);
1664 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1666 save_load_regs_all(1, reglist);
1667 cop2_do_stall_check(op, i, i_regs, 0);
1670 emit_far_call(pcnt_gte_start);
1672 // pointer to cop2 regs
1673 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1676 static void c2op_epilogue(u_int op,u_int reglist)
1680 emit_far_call(pcnt_gte_end);
1682 save_load_regs_all(0, reglist);
1685 static void c2op_assemble(int i, const struct regstat *i_regs)
1687 u_int c2op=source[i]&0x3f;
1688 u_int hr,reglist_full=0,reglist;
1689 int need_flags,need_ir;
1690 for(hr=0;hr<HOST_REGS;hr++) {
1691 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1693 reglist=reglist_full&CALLER_SAVE_REGS;
1695 if (gte_handlers[c2op]!=NULL) {
1696 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1697 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1698 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1699 source[i],gte_unneeded[i+1],need_flags,need_ir);
1700 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1702 //int shift = (source[i] >> 19) & 1;
1703 //int lm = (source[i] >> 10) & 1;
1707 c2op_prologue(c2op, i, i_regs, reglist);
1708 emit_movimm(source[i],1); // opcode
1709 emit_writeword(1,&psxRegs.code);
1710 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1713 c2op_epilogue(c2op,reglist);
1717 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1719 //value = value & 0x7ffff000;
1720 //if (value & 0x7f87e000) value |= 0x80000000;
1721 emit_andimm(sl, 0x7fffe000, temp);
1722 emit_testimm(temp, 0xff87ffff);
1723 emit_andimm(sl, 0x7ffff000, temp);
1724 host_tempreg_acquire();
1725 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1726 emit_cmovne_reg(HOST_TEMPREG, temp);
1727 host_tempreg_release();
1728 assert(0); // testing needed
1731 static void do_mfc2_31_one(u_int copr,signed char temp)
1733 emit_readshword(®_cop2d[copr],temp);
1734 emit_bicsar_imm(temp,31,temp);
1735 emit_cmpimm(temp,0xf80);
1736 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1737 emit_andimm(temp,0xf80,temp);
1740 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1743 host_tempreg_acquire();
1744 temp = HOST_TEMPREG;
1746 do_mfc2_31_one(9,temp);
1747 emit_shrimm(temp,7,tl);
1748 do_mfc2_31_one(10,temp);
1749 emit_orrshr_imm(temp,2,tl);
1750 do_mfc2_31_one(11,temp);
1751 emit_orrshl_imm(temp,3,tl);
1752 emit_writeword(tl,®_cop2d[29]);
1754 if (temp == HOST_TEMPREG)
1755 host_tempreg_release();
1758 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1764 if(dops[i].rs1&&dops[i].rs2)
1766 switch(dops[i].opcode2)
1771 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1772 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1773 signed char hi=get_reg(i_regs->regmap,HIREG);
1774 signed char lo=get_reg(i_regs->regmap,LOREG);
1780 if(dops[i].opcode2==0x18) // MULT
1781 emit_smull(m1,m2,hi);
1783 emit_umull(m1,m2,hi);
1786 emit_shrimm64(hi,32,hi);
1792 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1793 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1794 signed char quotient=get_reg(i_regs->regmap,LOREG);
1795 signed char remainder=get_reg(i_regs->regmap,HIREG);
1796 assert(numerator>=0);
1797 assert(denominator>=0);
1798 assert(quotient>=0);
1799 assert(remainder>=0);
1801 if (dops[i].opcode2 == 0x1A) // DIV
1802 emit_sdiv(numerator,denominator,quotient);
1804 emit_udiv(numerator,denominator,quotient);
1805 emit_msub(quotient,denominator,numerator,remainder);
1807 // div 0 quotient (remainder is already correct)
1808 host_tempreg_acquire();
1809 if (dops[i].opcode2 == 0x1A) // DIV
1810 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1812 emit_movimm(~0,HOST_TEMPREG);
1813 emit_test(denominator,denominator);
1814 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1815 host_tempreg_release();
1824 signed char hr=get_reg(i_regs->regmap,HIREG);
1825 signed char lr=get_reg(i_regs->regmap,LOREG);
1826 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1829 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1830 assert(numerator >= 0);
1832 emit_mov(numerator,hr);
1834 if (dops[i].opcode2 == 0x1A) // DIV
1835 emit_sub_asrimm(0,numerator,31,lr);
1841 if (hr >= 0) emit_zeroreg(hr);
1842 if (lr >= 0) emit_movimm(~0,lr);
1847 // Multiply by zero is zero.
1848 if (hr >= 0) emit_zeroreg(hr);
1849 if (lr >= 0) emit_zeroreg(lr);
1853 #define multdiv_assemble multdiv_assemble_arm64
1855 static void do_jump_vaddr(u_int rs)
1859 emit_far_call(ndrc_get_addr_ht);
1863 static void do_preload_rhash(u_int r) {
1864 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1865 // register. On ARM the hash can be done with a single instruction (below)
1868 static void do_preload_rhtbl(u_int ht) {
1869 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1872 static void do_rhash(u_int rs,u_int rh) {
1873 emit_andimm(rs, 0xf8, rh);
1876 static void do_miniht_load(int ht, u_int rh) {
1877 emit_add64(ht, rh, ht);
1878 emit_ldst(0, 0, rh, ht, 0);
1881 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1887 set_jump_target(jaddr, out);
1888 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1889 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1893 // parsed by set_jump_target?
1894 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1895 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1896 emit_movk(return_address&0xffff,rt);
1897 add_to_linker(out,return_address,1);
1899 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1900 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1903 static void clear_cache_arm64(char *start, char *end)
1905 // Don't rely on GCC's __clear_cache implementation, as it caches
1906 // icache/dcache cache line sizes, that can vary between cores on
1907 // big.LITTLE architectures.
1908 uint64_t addr, ctr_el0;
1909 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1910 size_t isize, dsize;
1912 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1913 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1914 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1916 // use the global minimum cache line size
1917 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1918 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1920 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1921 not required for instruction to data coherence. */
1922 if ((ctr_el0 & (1 << 28)) == 0x0) {
1923 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1924 for (; addr < (uint64_t)end; addr += dsize)
1925 // use "civac" instead of "cvau", as this is the suggested workaround for
1926 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1927 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1929 __asm__ volatile("dsb ish" : : : "memory");
1931 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1932 Unification is not required for instruction to data coherence. */
1933 if ((ctr_el0 & (1 << 29)) == 0x0) {
1934 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1935 for (; addr < (uint64_t)end; addr += isize)
1936 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1938 __asm__ volatile("dsb ish" : : : "memory");
1941 __asm__ volatile("isb" : : : "memory");
1944 // CPU-architecture-specific initialization
1945 static void arch_init(void)
1947 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1948 struct tramp_insns *ops = ndrc->tramp.ops, *opsw;
1950 assert(!(diff & 3));
1951 opsw = start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1952 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1953 opsw[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1954 opsw[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1956 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1959 // vim:shiftwidth=2:expandtab