1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
24 #include "arm_features.h"
27 static void set_jump_target(void *addr, void *target)
29 u_int *ptr = NDRC_WRITE_OFFSET(addr);
30 intptr_t offset = (u_char *)target - (u_char *)addr;
32 if ((*ptr&0xFC000000) == 0x14000000) { // b
33 assert(offset>=-134217728LL&&offset<134217728LL);
34 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
36 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
37 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
38 // Conditional branch are limited to +/- 1MB
39 // block max size is 256k so branching beyond the +/- 1MB limit
40 // should only happen when jumping to an already compiled block (see add_jump_out)
41 // a workaround would be to do a trampoline jump via a stub at the end of the block
42 assert(-1048576 <= offset && offset < 1048576);
43 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
45 else if((*ptr&0x9f000000)==0x10000000) { // adr
46 // generated by do_miniht_insert
47 assert(offset>=-1048576LL&&offset<1048576LL);
48 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
51 abort(); // should not happen
54 // from a pointer to external jump stub (which was produced by emit_extjump2)
55 // find where the jumping insn is
56 static void *find_extjump_insn(void *stub)
58 int *ptr = (int *)stub + 2;
59 assert((*ptr&0x9f000000) == 0x10000000); // adr
60 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
61 return ptr + offset / 4;
65 // find where external branch is liked to using addr of it's stub:
66 // get address that the stub loads (dyna_linker arg1),
67 // treat it as a pointer to branch insn,
68 // return addr where that branch jumps to
69 static void *get_pointer(void *stub)
71 int *i_ptr = find_extjump_insn(stub);
72 if ((*i_ptr&0xfc000000) == 0x14000000) // b
73 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
74 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
75 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
76 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
82 // Allocate a specific ARM register.
83 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
88 // see if it's already allocated (and dealloc it)
89 for(n=0;n<HOST_REGS;n++)
91 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
92 dirty=(cur->dirty>>n)&1;
99 cur->dirty|=dirty<<hr;
100 cur->isconst&=~(1<<hr);
103 // Alloc cycle count into dedicated register
104 static void alloc_cc(struct regstat *cur,int i)
106 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
114 static unused const char *regname[32] = {
115 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
116 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
117 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
118 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
121 static unused const char *regname64[32] = {
122 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
123 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
124 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
125 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
130 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
133 static unused const char *condname[16] = {
134 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
135 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
138 static void output_w32(u_int word)
140 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
144 static u_int rn_rd(u_int rn, u_int rd)
148 return (rn << 5) | rd;
151 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
156 return (rm << 16) | (rn << 5) | rd;
159 static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
162 return rm_rn_rd(rm, rn, rd) | (ra << 10);
165 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
171 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
174 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
177 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
180 static u_int imm16_rd(u_int imm16, u_int rd)
182 assert(imm16 < 0x10000);
184 return (imm16 << 5) | rd;
187 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
189 assert(imm12 < 0x1000);
192 return (imm12 << 10) | (rn << 5) | rd;
195 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
197 assert(imm9 < 0x200);
200 return (imm9 << 12) | (rn << 5) | rd;
203 static u_int imm19_rt(u_int imm19, u_int rt)
205 assert(imm19 < 0x80000);
207 return (imm19 << 5) | rt;
210 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
217 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
220 static u_int genjmp(const u_char *addr)
222 intptr_t offset = addr - out;
223 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
224 if (offset < -134217728 || offset > 134217727) {
225 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 return ((u_int)offset >> 2) & 0x03ffffff;
232 static u_int genjmpcc(const u_char *addr)
234 intptr_t offset = addr - out;
235 if ((uintptr_t)addr < 3) return 0;
236 if (offset < -1048576 || offset > 1048572) {
237 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 return ((u_int)offset >> 2) & 0x7ffff;
244 static uint32_t is_mask(u_int value)
246 return value && ((value + 1) & value) == 0;
249 // This function returns true if the argument contains a
250 // non-empty sequence of ones (possibly rotated) with the remainder zero.
251 static uint32_t is_rotated_mask(u_int value)
253 if (value == 0 || value == ~0)
255 if (is_mask((value - 1) | value))
257 return is_mask((~value - 1) | ~value);
260 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
262 int lzeros, tzeros, ones;
264 if (is_mask((value - 1) | value)) {
265 lzeros = __builtin_clz(value);
266 tzeros = __builtin_ctz(value);
267 ones = 32 - lzeros - tzeros;
268 *immr = (32 - tzeros) & 31;
273 if (is_mask((value - 1) | value)) {
274 lzeros = __builtin_clz(value);
275 tzeros = __builtin_ctz(value);
276 ones = 32 - lzeros - tzeros;
284 static void emit_mov(u_int rs, u_int rt)
286 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
287 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
290 static void emit_mov64(u_int rs, u_int rt)
292 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
293 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
296 static void emit_add(u_int rs1, u_int rs2, u_int rt)
298 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
299 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
302 static void emit_adds(u_int rs1, u_int rs2, u_int rt)
304 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
305 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
308 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
310 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
311 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
314 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
316 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
317 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
319 #define emit_adds_ptr emit_adds64
321 static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
323 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
324 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
327 static void emit_neg(u_int rs, u_int rt)
329 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
330 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
333 static void emit_negs(u_int rs, u_int rt)
335 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
339 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
345 static void emit_subs(u_int rs1, u_int rs2, u_int rt)
347 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
348 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
351 static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
353 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
354 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
357 static void emit_movz(u_int imm, u_int rt)
359 assem_debug("movz %s,#%#x\n", regname[rt], imm);
360 output_w32(0x52800000 | imm16_rd(imm, rt));
363 static void emit_movz_lsl16(u_int imm, u_int rt)
365 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
366 output_w32(0x52a00000 | imm16_rd(imm, rt));
369 static void emit_movn(u_int imm, u_int rt)
371 assem_debug("movn %s,#%#x\n", regname[rt], imm);
372 output_w32(0x12800000 | imm16_rd(imm, rt));
375 static void emit_movn_lsl16(u_int imm,u_int rt)
377 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
378 output_w32(0x12a00000 | imm16_rd(imm, rt));
381 static void emit_movk(u_int imm,u_int rt)
383 assem_debug("movk %s,#%#x\n", regname[rt], imm);
384 output_w32(0x72800000 | imm16_rd(imm, rt));
387 static void emit_movk_lsl16(u_int imm,u_int rt)
390 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
391 output_w32(0x72a00000 | imm16_rd(imm, rt));
394 static void emit_zeroreg(u_int rt)
399 static void emit_movimm(u_int imm, u_int rt)
403 else if ((~imm) < 65536)
405 else if ((imm&0xffff) == 0)
406 emit_movz_lsl16(imm >> 16, rt);
407 else if (((~imm)&0xffff) == 0)
408 emit_movn_lsl16(~imm >> 16, rt);
409 else if (is_rotated_mask(imm)) {
411 gen_logical_imm(imm, &immr, &imms);
412 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
413 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
416 emit_movz(imm & 0xffff, rt);
417 emit_movk_lsl16(imm >> 16, rt);
421 static void emit_movimm64(uint64_t imm, u_int rt)
423 u_int shift, op, imm16, insns = 0;
424 for (shift = 0; shift < 4; shift++) {
425 imm16 = (imm >> shift * 16) & 0xffff;
428 op = insns ? 0xf2800000 : 0xd2800000;
429 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
431 assem_debug(",lsl #%u", shift * 16);
433 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
437 assem_debug("movz %s,#0\n", regname64[rt]);
438 output_w32(0xd2800000 | imm16_rd(0, rt));
442 static void emit_readword(void *addr, u_int rt)
444 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
445 if (!(offset & 3) && offset <= 16380) {
446 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
447 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
453 static void emit_readdword(void *addr, u_int rt)
455 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
456 if (!(offset & 7) && offset <= 32760) {
457 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
458 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
463 #define emit_readptr emit_readdword
465 static void emit_readshword(void *addr, u_int rt)
467 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
468 if (!(offset & 1) && offset <= 8190) {
469 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
470 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
476 static void emit_loadreg(u_int r, u_int hr)
484 //case HIREG: addr = &hi; break;
485 //case LOREG: addr = &lo; break;
486 case CCREG: addr = &cycle_count; break;
487 case CSREG: addr = &psxRegs.CP0.n.SR; break;
488 case INVCP: addr = &invc_ptr; is64 = 1; break;
489 case ROREG: addr = &ram_offset; is64 = 1; break;
492 addr = &psxRegs.GPR.r[r];
496 emit_readdword(addr, hr);
498 emit_readword(addr, hr);
502 static void emit_writeword(u_int rt, void *addr)
504 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
505 if (!(offset & 3) && offset <= 16380) {
506 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
507 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
513 static void emit_writedword(u_int rt, void *addr)
515 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
516 if (!(offset & 7) && offset <= 32760) {
517 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
518 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
524 static void emit_storereg(u_int r, u_int hr)
527 void *addr = &psxRegs.GPR.r[r];
529 //case HIREG: addr = &hi; break;
530 //case LOREG: addr = &lo; break;
531 case CCREG: addr = &cycle_count; break;
532 default: assert(r < 34); break;
534 emit_writeword(hr, addr);
537 static void emit_test(u_int rs, u_int rt)
539 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
540 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
543 static void emit_testimm(u_int rs, u_int imm)
546 assem_debug("tst %s,#%#x\n", regname[rs], imm);
547 assert(is_rotated_mask(imm)); // good enough for PCSX
548 gen_logical_imm(imm, &immr, &imms);
549 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
552 static void emit_not(u_int rs,u_int rt)
554 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
555 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
558 static void emit_and(u_int rs1,u_int rs2,u_int rt)
560 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
561 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
564 static void emit_or(u_int rs1,u_int rs2,u_int rt)
566 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
567 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
570 static void emit_bic(u_int rs1,u_int rs2,u_int rt)
572 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
573 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
576 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
578 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
579 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
582 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
584 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
585 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
588 static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
590 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
591 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
594 static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
596 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
597 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
600 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
602 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
603 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
606 static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
608 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
609 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
612 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
614 unused const char *st = s ? "s" : "";
615 s = s ? 0x20000000 : 0;
616 is64 = is64 ? 0x80000000 : 0;
618 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
619 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
621 else if (-imm < 4096) {
622 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
623 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
625 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
626 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
627 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
629 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
630 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
633 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
634 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
635 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
637 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
638 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
645 host_tempreg_acquire();
648 emit_movimm(imm, tmp);
649 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
650 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
651 if (tmp == HOST_TEMPREG)
652 host_tempreg_release();
656 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
662 emit_addimm_s(0, 0, rs, imm, rt);
665 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
667 emit_addimm_s(0, 1, rs, imm, rt);
670 static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
672 emit_addimm64(rs, imm, rt);
675 static void emit_addimm_and_set_flags(int imm, u_int rt)
677 emit_addimm_s(1, 0, rt, imm, rt);
680 static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
682 emit_addimm_s(1, 0, rs, imm, rt);
685 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
687 const char *names[] = { "and", "orr", "eor", "ands" };
688 const char *name = names[op];
691 if (is_rotated_mask(imm)) {
692 gen_logical_imm(imm, &immr, &imms);
693 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
694 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
697 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
698 host_tempreg_acquire();
699 emit_movimm(imm, HOST_TEMPREG);
700 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
701 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
702 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
703 host_tempreg_release();
708 static void emit_andimm(u_int rs, u_int imm, u_int rt)
713 emit_logicop_imm(0, rs, imm, rt);
716 static void emit_orimm(u_int rs, u_int imm, u_int rt)
723 emit_logicop_imm(1, rs, imm, rt);
726 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
733 emit_logicop_imm(2, rs, imm, rt);
736 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
738 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
739 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
742 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
744 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
745 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
748 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
750 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
751 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
754 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
756 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
760 static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
762 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
763 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
766 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
768 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
769 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
772 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
774 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
775 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
778 static void emit_signextend16(u_int rs, u_int rt)
780 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
781 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
784 static void emit_shl(u_int rs,u_int rshift,u_int rt)
786 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
787 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
790 static void emit_shr(u_int rs,u_int rshift,u_int rt)
792 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
793 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
796 static void emit_sar(u_int rs,u_int rshift,u_int rt)
798 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
799 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
802 static void emit_cmpimm(u_int rs, u_int imm)
805 assem_debug("cmp %s,%#x\n", regname[rs], imm);
806 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
808 else if (-imm < 4096) {
809 assem_debug("cmn %s,%#x\n", regname[rs], imm);
810 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
812 else if (imm < 16777216 && !(imm & 0xfff)) {
813 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
814 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
817 host_tempreg_acquire();
818 emit_movimm(imm, HOST_TEMPREG);
819 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
820 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
821 host_tempreg_release();
825 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
827 assert(imm == 0 || imm == 1);
828 assert(cond0 < 0x10);
829 assert(cond1 < 0x10);
831 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
832 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
834 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
835 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
839 static void emit_cmovne_imm(u_int imm,u_int rt)
841 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
844 static void emit_cmovl_imm(u_int imm,u_int rt)
846 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
849 static void emit_cmovb_imm(int imm,u_int rt)
851 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
854 static void emit_cmoveq_reg(u_int rs,u_int rt)
856 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
857 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
860 static void emit_cmovne_reg(u_int rs,u_int rt)
862 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
863 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
866 static void emit_cmovl_reg(u_int rs,u_int rt)
868 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
869 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
872 static void emit_cmovb_reg(u_int rs,u_int rt)
874 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
875 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
878 static void emit_cmovs_reg(u_int rs,u_int rt)
880 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
881 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
884 static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
886 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
887 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
890 static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
892 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
893 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
896 static void emit_slti32(u_int rs,int imm,u_int rt)
898 if(rs!=rt) emit_zeroreg(rt);
900 if(rs==rt) emit_movimm(0,rt);
901 emit_cmovl_imm(1,rt);
904 static void emit_sltiu32(u_int rs,int imm,u_int rt)
906 if(rs!=rt) emit_zeroreg(rt);
908 if(rs==rt) emit_movimm(0,rt);
909 emit_cmovb_imm(1,rt);
912 static void emit_cmp(u_int rs,u_int rt)
914 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
915 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
918 static void emit_cmpcs(u_int rs,u_int rt)
920 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
921 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
924 static void emit_set_gz32(u_int rs, u_int rt)
926 //assem_debug("set_gz32\n");
929 emit_cmovl_imm(0,rt);
932 static void emit_set_nz32(u_int rs, u_int rt)
934 //assem_debug("set_nz32\n");
935 if(rs!=rt) emit_mov(rs,rt);
937 emit_cmovne_imm(1,rt);
940 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
942 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
943 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
945 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
946 emit_cmovl_imm(1,rt);
949 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
951 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
952 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
954 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
955 emit_cmovb_imm(1,rt);
958 static int can_jump_or_call(const void *a)
960 intptr_t diff = (u_char *)a - out;
961 return (-134217728 <= diff && diff <= 134217727);
964 static void emit_call(const void *a)
966 intptr_t diff = (u_char *)a - out;
967 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
969 if (-134217728 <= diff && diff <= 134217727)
970 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
975 static void emit_jmp(const void *a)
977 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
978 u_int offset = genjmp(a);
979 output_w32(0x14000000 | offset);
982 static void emit_jne(const void *a)
984 assem_debug("bne %p\n", a);
985 u_int offset = genjmpcc(a);
986 output_w32(0x54000000 | (offset << 5) | COND_NE);
989 static void emit_jeq(const void *a)
991 assem_debug("beq %p\n", a);
992 u_int offset = genjmpcc(a);
993 output_w32(0x54000000 | (offset << 5) | COND_EQ);
996 static void emit_js(const void *a)
998 assem_debug("bmi %p\n", a);
999 u_int offset = genjmpcc(a);
1000 output_w32(0x54000000 | (offset << 5) | COND_MI);
1003 static void emit_jns(const void *a)
1005 assem_debug("bpl %p\n", a);
1006 u_int offset = genjmpcc(a);
1007 output_w32(0x54000000 | (offset << 5) | COND_PL);
1010 static void emit_jl(const void *a)
1012 assem_debug("blt %p\n", a);
1013 u_int offset = genjmpcc(a);
1014 output_w32(0x54000000 | (offset << 5) | COND_LT);
1017 static void emit_jge(const void *a)
1019 assem_debug("bge %p\n", a);
1020 u_int offset = genjmpcc(a);
1021 output_w32(0x54000000 | (offset << 5) | COND_GE);
1024 static void emit_jo(const void *a)
1026 assem_debug("bvs %p\n", a);
1027 u_int offset = genjmpcc(a);
1028 output_w32(0x54000000 | (offset << 5) | COND_VS);
1031 static void emit_jno(const void *a)
1033 assem_debug("bvc %p\n", a);
1034 u_int offset = genjmpcc(a);
1035 output_w32(0x54000000 | (offset << 5) | COND_VC);
1038 static void emit_jc(const void *a)
1040 assem_debug("bcs %p\n", a);
1041 u_int offset = genjmpcc(a);
1042 output_w32(0x54000000 | (offset << 5) | COND_CS);
1045 static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
1047 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
1048 u_int offset = genjmpcc(a);
1049 is64 = is64 ? 0x80000000 : 0;
1050 isnz = isnz ? 0x01000000 : 0;
1051 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1054 static void *emit_cbz(u_int r, const void *a)
1057 emit_cb(0, 0, a, r);
1061 static void emit_jmpreg(u_int r)
1063 assem_debug("br %s\n", regname64[r]);
1064 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1067 static void emit_retreg(u_int r)
1069 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1070 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1073 static void emit_ret(void)
1078 static void emit_adr(void *addr, u_int rt)
1080 intptr_t offset = (u_char *)addr - out;
1081 assert(-1048576 <= offset && offset < 1048576);
1083 assem_debug("adr x%d,#%#lx\n", rt, offset);
1084 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1087 static void emit_adrp(void *addr, u_int rt)
1089 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1090 assert(-4294967296l <= offset && offset < 4294967296l);
1093 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1094 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1097 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1099 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1100 assert(-256 <= offset && offset < 256);
1101 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1104 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1106 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1107 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1110 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1112 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1113 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1116 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1118 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1119 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1122 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1124 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1125 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1127 #define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1129 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1131 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1132 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1135 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1137 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1138 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1141 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1143 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1144 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1147 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1149 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1150 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1153 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1155 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1156 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1159 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1161 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1162 assert(-256 <= offset && offset < 256);
1163 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1166 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1168 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1169 assert(-256 <= offset && offset < 256);
1170 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1173 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1175 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1176 assert(-256 <= offset && offset < 256);
1177 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1180 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1182 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1183 assert(-256 <= offset && offset < 256);
1184 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1187 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1189 if (!(offset & 3) && (u_int)offset <= 16380) {
1190 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1191 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1193 else if (-256 <= offset && offset < 256) {
1194 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1195 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1201 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1203 if (!(offset & 1) && (u_int)offset <= 8190) {
1204 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1205 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1207 else if (-256 <= offset && offset < 256) {
1208 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1209 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1215 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1217 if ((u_int)offset < 4096) {
1218 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1219 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1221 else if (-256 <= offset && offset < 256) {
1222 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1223 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1229 static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1231 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1232 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1235 static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1237 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1238 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1241 static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1243 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1244 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1247 static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1249 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1253 static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1255 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1256 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1259 static void emit_clz(u_int rs, u_int rt)
1261 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1262 output_w32(0x5ac01000 | rn_rd(rs, rt));
1265 // special case for checking invalid_code
1266 static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1268 emit_shrimm(r, 12, rt);
1269 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1270 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1273 // special for loadlr_assemble, rs2 is destroyed
1274 static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1276 emit_shl(rs2, shift, rs2);
1277 emit_bic(rs1, rs2, rt);
1280 static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1282 emit_shr(rs2, shift, rs2);
1283 emit_bic(rs1, rs2, rt);
1286 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1288 u_int op = 0xb9000000;
1289 unused const char *ldst = is_st ? "st" : "ld";
1290 unused char rp = is64 ? 'x' : 'w';
1291 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1292 is64 = is64 ? 1 : 0;
1293 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1294 ofs = (ofs >> (2+is64));
1295 if (!is_st) op |= 0x00400000;
1296 if (is64) op |= 0x40000000;
1297 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1300 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1302 u_int op = 0x29000000;
1303 unused const char *ldst = is_st ? "st" : "ld";
1304 unused char rp = is64 ? 'x' : 'w';
1305 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1306 is64 = is64 ? 1 : 0;
1307 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1308 ofs = (ofs >> (2+is64));
1309 assert(-64 <= ofs && ofs <= 63);
1311 if (!is_st) op |= 0x00400000;
1312 if (is64) op |= 0x80000000;
1313 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1316 static void save_load_regs_all(int is_store, u_int reglist)
1320 for (r = 0; reglist; r++, reglist >>= 1) {
1324 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1330 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1333 assert(ofs <= SSP_CALLER_REGS);
1336 // Save registers before function call
1337 static void save_regs(u_int reglist)
1339 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1340 save_load_regs_all(1, reglist);
1343 // Restore registers after function call
1344 static void restore_regs(u_int reglist)
1346 reglist &= CALLER_SAVE_REGS;
1347 save_load_regs_all(0, reglist);
1350 /* Stubs/epilogue */
1352 static void literal_pool(int n)
1357 static void literal_pool_jumpover(int n)
1361 // parsed by get_pointer, find_extjump_insn
1362 static void emit_extjump(u_char *addr, u_int target)
1364 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1366 emit_movz(target & 0xffff, 0);
1367 emit_movk_lsl16(target >> 16, 0);
1369 // addr is in the current recompiled block (max 256k)
1370 // offset shouldn't exceed +/-1MB
1372 emit_far_jump(dyna_linker);
1375 static void check_extjump2(void *src)
1378 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1382 // put rt_val into rt, potentially making use of rs with value rs_val
1383 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1385 int diff = rt_val - rs_val;
1386 if ((-4096 < diff && diff < 4096)
1387 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1388 emit_addimm(rs, diff, rt);
1389 else if (rt_val == ~rs_val)
1391 else if (is_rotated_mask(rs_val ^ rt_val))
1392 emit_xorimm(rs, rs_val ^ rt_val, rt);
1394 emit_movimm(rt_val, rt);
1397 // return 1 if the above function can do it's job cheaply
1398 static int is_similar_value(u_int v1, u_int v2)
1401 return (-4096 < diff && diff < 4096)
1402 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1404 || is_rotated_mask(v1 ^ v2);
1407 static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1409 if (rt_val < 0x100000000ull) {
1410 emit_movimm_from(rs_val, rs, rt_val, rt);
1413 // just move the whole thing. At least on Linux all addresses
1414 // seem to be 48bit, so 3 insns - not great not terrible
1415 emit_movimm64(rt_val, rt);
1419 static void pass_args64(u_int a0, u_int a1)
1423 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1425 else if(a0!=0&&a1==0) {
1427 if (a0>=0) emit_mov64(a0,0);
1430 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1431 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1435 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1438 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1440 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1441 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1443 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1445 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1450 #include "pcsxmem.h"
1451 //#include "pcsxmem_inline.c"
1453 static void do_readstub(int n)
1455 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1456 set_jump_target(stubs[n].addr, out);
1457 enum stub_type type = stubs[n].type;
1459 int rs = stubs[n].b;
1460 const struct regstat *i_regs = (void *)stubs[n].c;
1461 u_int reglist = stubs[n].e;
1462 const signed char *i_regmap = i_regs->regmap;
1464 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1465 rt=get_reg(i_regmap,FTEMP);
1467 rt=get_reg(i_regmap,dops[i].rt1);
1470 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1471 void *restore_jump = NULL, *handler_jump = NULL;
1473 for (r = 0; r < HOST_CCREG; r++) {
1474 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1479 if(rt>=0&&dops[i].rt1!=0)
1486 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1488 emit_readdword(&mem_rtab,temp);
1489 emit_shrimm(rs,12,temp2);
1490 emit_readdword_dualindexedx8(temp,temp2,temp2);
1491 emit_adds64(temp2,temp2,temp2);
1494 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1496 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1497 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1498 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1499 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1500 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1506 emit_jmp(0); // jump to reg restore
1509 emit_jmp(stubs[n].retaddr); // return address
1510 set_jump_target(handler_jump, out);
1515 if(type==LOADB_STUB||type==LOADBU_STUB)
1516 handler=jump_handler_read8;
1517 if(type==LOADH_STUB||type==LOADHU_STUB)
1518 handler=jump_handler_read16;
1519 if(type==LOADW_STUB)
1520 handler=jump_handler_read32;
1522 pass_args64(rs,temp2);
1523 int cc=get_reg(i_regmap,CCREG);
1525 emit_loadreg(CCREG,2);
1526 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1527 emit_far_call(handler);
1528 // (no cycle reload after read)
1529 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1530 loadstore_extend(type,0,rt);
1533 set_jump_target(restore_jump, out);
1534 restore_regs(reglist);
1535 emit_jmp(stubs[n].retaddr);
1538 static void inline_readstub(enum stub_type type, int i, u_int addr,
1539 const signed char regmap[], int target, int adj, u_int reglist)
1541 int ra = cinfo[i].addr;
1542 int rt = get_reg(regmap, target);
1545 uintptr_t host_addr = 0;
1547 int cc=get_reg(regmap,CCREG);
1548 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
1550 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1551 if (handler == NULL) {
1552 if(rt<0||dops[i].rt1==0)
1554 if (addr != host_addr)
1555 emit_movimm_from64(addr, ra, host_addr, ra);
1557 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1558 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1559 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1560 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1561 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
1566 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1568 if(type==LOADB_STUB||type==LOADBU_STUB)
1569 handler=jump_handler_read8;
1570 if(type==LOADH_STUB||type==LOADHU_STUB)
1571 handler=jump_handler_read16;
1572 if(type==LOADW_STUB)
1573 handler=jump_handler_read32;
1576 // call a memhandler
1577 if(rt>=0&&dops[i].rt1!=0)
1581 emit_movimm(addr,0);
1585 emit_loadreg(CCREG,2);
1586 emit_addimm(cc<0?2:cc,adj,2);
1588 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1589 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1590 if (-4294967296l <= offset && offset < 4294967296l) {
1591 emit_adrp((void *)l1, 1);
1592 emit_addimm64(1, l1 & 0xfff, 1);
1595 emit_movimm64(l1, 1);
1598 emit_far_call(do_memhandler_pre);
1600 emit_far_call(handler);
1602 // (no cycle reload after read)
1603 if(rt>=0&&dops[i].rt1!=0)
1604 loadstore_extend(type, 0, rt);
1605 restore_regs(reglist);
1608 static void do_writestub(int n)
1610 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1611 set_jump_target(stubs[n].addr, out);
1612 enum stub_type type=stubs[n].type;
1615 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1616 u_int reglist=stubs[n].e;
1617 signed char *i_regmap=i_regs->regmap;
1619 if(dops[i].itype==C2LS) {
1620 rt=get_reg(i_regmap,r=FTEMP);
1622 rt=get_reg(i_regmap,r=dops[i].rs2);
1626 int rtmp,temp=-1,temp2,regs_saved=0;
1627 void *restore_jump = NULL, *handler_jump = NULL;
1628 int reglist2=reglist|(1<<rs)|(1<<rt);
1629 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1630 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1638 for(rtmp=0;rtmp<=3;rtmp++)
1639 if(rtmp!=rs&&rtmp!=rt)
1642 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1645 host_tempreg_acquire();
1648 emit_readdword(&mem_wtab,temp);
1649 emit_shrimm(rs,12,temp2);
1650 emit_readdword_dualindexedx8(temp,temp2,temp2);
1651 emit_adds64(temp2,temp2,temp2);
1655 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1656 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1657 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1662 emit_jmp(0); // jump to reg restore
1665 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1666 set_jump_target(handler_jump, out);
1672 case STOREB_STUB: handler=jump_handler_write8; break;
1673 case STOREH_STUB: handler=jump_handler_write16; break;
1674 case STOREW_STUB: handler=jump_handler_write32; break;
1680 emit_mov64(temp2,3);
1681 host_tempreg_release();
1683 int cc=get_reg(i_regmap,CCREG);
1685 emit_loadreg(CCREG,2);
1686 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1687 // returns new cycle_count
1688 emit_far_call(handler);
1689 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1691 emit_storereg(CCREG,2);
1693 set_jump_target(restore_jump, out);
1694 restore_regs(reglist);
1695 emit_jmp(stubs[n].retaddr);
1698 static void inline_writestub(enum stub_type type, int i, u_int addr,
1699 const signed char regmap[], int target, int adj, u_int reglist)
1701 int ra = cinfo[i].addr;
1702 int rt = get_reg(regmap,target);
1705 uintptr_t host_addr = 0;
1706 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1707 if (handler == NULL) {
1708 if (addr != host_addr)
1709 emit_movimm_from64(addr, ra, host_addr, ra);
1711 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1712 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1713 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
1719 // call a memhandler
1721 emit_writeword(ra, &address); // some handlers still need it
1722 loadstore_extend(type, rt, 0);
1724 cc = cc_use = get_reg(regmap, CCREG);
1726 emit_loadreg(CCREG, (cc_use = 2));
1727 emit_addimm(cc_use, adj, 2);
1729 emit_far_call(do_memhandler_pre);
1730 emit_far_call(handler);
1731 emit_far_call(do_memhandler_post);
1732 emit_addimm(0, -adj, cc_use);
1734 emit_storereg(CCREG, cc_use);
1735 restore_regs(reglist);
1740 static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1742 save_load_regs_all(1, reglist);
1743 cop2_do_stall_check(op, i, i_regs, 0);
1746 emit_far_call(pcnt_gte_start);
1748 // pointer to cop2 regs
1749 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1752 static void c2op_epilogue(u_int op,u_int reglist)
1756 emit_far_call(pcnt_gte_end);
1758 save_load_regs_all(0, reglist);
1761 static void c2op_assemble(int i, const struct regstat *i_regs)
1763 u_int c2op=source[i]&0x3f;
1764 u_int hr,reglist_full=0,reglist;
1765 int need_flags,need_ir;
1766 for(hr=0;hr<HOST_REGS;hr++) {
1767 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1769 reglist=reglist_full&CALLER_SAVE_REGS;
1771 if (gte_handlers[c2op]!=NULL) {
1772 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1773 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1774 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1775 source[i],gte_unneeded[i+1],need_flags,need_ir);
1776 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1778 //int shift = (source[i] >> 19) & 1;
1779 //int lm = (source[i] >> 10) & 1;
1783 c2op_prologue(c2op, i, i_regs, reglist);
1784 emit_movimm(source[i],1); // opcode
1785 emit_writeword(1,&psxRegs.code);
1786 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1789 c2op_epilogue(c2op,reglist);
1793 static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1795 //value = value & 0x7ffff000;
1796 //if (value & 0x7f87e000) value |= 0x80000000;
1797 emit_andimm(sl, 0x7fffe000, temp);
1798 emit_testimm(temp, 0xff87ffff);
1799 emit_andimm(sl, 0x7ffff000, temp);
1800 host_tempreg_acquire();
1801 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1802 emit_cmovne_reg(HOST_TEMPREG, temp);
1803 host_tempreg_release();
1804 assert(0); // testing needed
1807 static void do_mfc2_31_one(u_int copr,signed char temp)
1809 emit_readshword(®_cop2d[copr],temp);
1810 emit_bicsar_imm(temp,31,temp);
1811 emit_cmpimm(temp,0xf80);
1812 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1813 emit_andimm(temp,0xf80,temp);
1816 static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1819 host_tempreg_acquire();
1820 temp = HOST_TEMPREG;
1822 do_mfc2_31_one(9,temp);
1823 emit_shrimm(temp,7,tl);
1824 do_mfc2_31_one(10,temp);
1825 emit_orrshr_imm(temp,2,tl);
1826 do_mfc2_31_one(11,temp);
1827 emit_orrshl_imm(temp,3,tl);
1828 emit_writeword(tl,®_cop2d[29]);
1830 if (temp == HOST_TEMPREG)
1831 host_tempreg_release();
1834 static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1840 if(dops[i].rs1&&dops[i].rs2)
1842 switch(dops[i].opcode2)
1847 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1848 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1849 signed char hi=get_reg(i_regs->regmap,HIREG);
1850 signed char lo=get_reg(i_regs->regmap,LOREG);
1856 if(dops[i].opcode2==0x18) // MULT
1857 emit_smull(m1,m2,hi);
1859 emit_umull(m1,m2,hi);
1862 emit_shrimm64(hi,32,hi);
1868 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1869 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1870 signed char quotient=get_reg(i_regs->regmap,LOREG);
1871 signed char remainder=get_reg(i_regs->regmap,HIREG);
1872 assert(numerator>=0);
1873 assert(denominator>=0);
1874 assert(quotient>=0);
1875 assert(remainder>=0);
1877 if (dops[i].opcode2 == 0x1A) // DIV
1878 emit_sdiv(numerator,denominator,quotient);
1880 emit_udiv(numerator,denominator,quotient);
1881 emit_msub(quotient,denominator,numerator,remainder);
1883 // div 0 quotient (remainder is already correct)
1884 host_tempreg_acquire();
1885 if (dops[i].opcode2 == 0x1A) { // DIV
1886 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1887 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1890 emit_movimm(~0,HOST_TEMPREG);
1891 emit_test(denominator,denominator);
1892 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1893 host_tempreg_release();
1902 signed char hr=get_reg(i_regs->regmap,HIREG);
1903 signed char lr=get_reg(i_regs->regmap,LOREG);
1904 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1907 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1908 assert(numerator >= 0);
1910 emit_mov(numerator,hr);
1912 if (dops[i].opcode2 == 0x1A) { // DIV
1913 emit_add_lsrimm(WZR,numerator,31,lr);
1914 emit_orn_asrimm(lr,numerator,31,lr);
1921 if (hr >= 0) emit_zeroreg(hr);
1922 if (lr >= 0) emit_movimm(~0,lr);
1925 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1927 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1928 assert(denominator >= 0);
1929 if (hr >= 0) emit_zeroreg(hr);
1932 emit_test(denominator, denominator);
1933 emit_csinvne_reg(lr, lr, lr);
1938 // Multiply by zero is zero.
1939 if (hr >= 0) emit_zeroreg(hr);
1940 if (lr >= 0) emit_zeroreg(lr);
1944 #define multdiv_assemble multdiv_assemble_arm64
1946 static void do_jump_vaddr(u_int rs)
1950 emit_far_call(ndrc_get_addr_ht);
1954 static void do_preload_rhash(u_int r) {
1955 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1956 // register. On ARM the hash can be done with a single instruction (below)
1959 static void do_preload_rhtbl(u_int ht) {
1960 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1963 static void do_rhash(u_int rs,u_int rh) {
1964 emit_andimm(rs, 0xf8, rh);
1967 static void do_miniht_load(int ht, u_int rh) {
1968 emit_add64(ht, rh, ht);
1969 emit_ldst(0, 0, rh, ht, 0);
1972 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1978 set_jump_target(jaddr, out);
1979 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1980 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1984 // parsed by set_jump_target?
1985 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1986 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1987 emit_movk(return_address&0xffff,rt);
1988 add_to_linker(out,return_address,1);
1990 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1991 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1994 static unused void clear_cache_arm64(char *start, char *end)
1996 // Don't rely on GCC's __clear_cache implementation, as it caches
1997 // icache/dcache cache line sizes, that can vary between cores on
1998 // big.LITTLE architectures.
1999 uint64_t addr, ctr_el0;
2000 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2001 size_t isize, dsize;
2003 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2004 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2005 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2007 // use the global minimum cache line size
2008 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2009 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2011 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2012 not required for instruction to data coherence. */
2013 if ((ctr_el0 & (1 << 28)) == 0x0) {
2014 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2015 for (; addr < (uint64_t)end; addr += dsize)
2016 // use "civac" instead of "cvau", as this is the suggested workaround for
2017 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2018 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2020 __asm__ volatile("dsb ish" : : : "memory");
2022 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2023 Unification is not required for instruction to data coherence. */
2024 if ((ctr_el0 & (1 << 29)) == 0x0) {
2025 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2026 for (; addr < (uint64_t)end; addr += isize)
2027 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2029 __asm__ volatile("dsb ish" : : : "memory");
2032 __asm__ volatile("isb" : : : "memory");
2035 // CPU-architecture-specific initialization
2036 static void arch_init(void)
2038 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2039 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2041 assert(!(diff & 3));
2042 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2043 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2044 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2045 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2047 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2050 // vim:shiftwidth=2:expandtab