1 /* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
23 #include "arm_features.h"
25 #if defined(BASE_ADDR_FIXED)
26 #elif defined(BASE_ADDR_DYNAMIC)
27 u_char *translation_cache;
29 u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
31 static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
33 #define CALLER_SAVE_REGS 0x0007ffff
35 #define unused __attribute__((unused))
37 void do_memhandler_pre();
38 void do_memhandler_post();
41 static void set_jump_target(void *addr, void *target)
44 intptr_t offset = (u_char *)target - (u_char *)addr;
46 if((*ptr&0xFC000000)==0x14000000) {
47 assert(offset>=-134217728LL&&offset<134217728LL);
48 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
50 else if((*ptr&0xff000000)==0x54000000) {
51 // Conditional branch are limited to +/- 1MB
52 // block max size is 256k so branching beyond the +/- 1MB limit
53 // should only happen when jumping to an already compiled block (see add_link)
54 // a workaround would be to do a trampoline jump via a stub at the end of the block
55 assert(offset>=-1048576LL&&offset<1048576LL);
56 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
58 else if((*ptr&0x9f000000)==0x10000000) { //adr
59 // generated by do_miniht_insert
60 assert(offset>=-1048576LL&&offset<1048576LL);
61 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
64 assert(0); // should not happen
67 // from a pointer to external jump stub (which was produced by emit_extjump2)
68 // find where the jumping insn is
69 static void *find_extjump_insn(void *stub)
71 int *ptr = (int *)stub + 2;
72 assert((*ptr&0x9f000000) == 0x10000000); // adr
73 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
74 return ptr + offset / 4;
77 // find where external branch is liked to using addr of it's stub:
78 // get address that insn one after stub loads (dyna_linker arg1),
79 // treat it as a pointer to branch insn,
80 // return addr where that branch jumps to
81 static void *get_pointer(void *stub)
83 int *i_ptr = find_extjump_insn(stub);
84 assert((*i_ptr&0xfc000000) == 0x14000000); // b
85 return (u_char *)i_ptr+(((signed int)(*i_ptr<<6)>>6)<<2);
88 // Find the "clean" entry point from a "dirty" entry point
89 // by skipping past the call to verify_code
90 static void *get_clean_addr(void *addr)
96 static int verify_dirty(u_int *ptr)
102 static int isclean(void *addr)
105 return (*ptr >> 24) != 0x58; // the only place ldr (literal) is used
108 static uint64_t get_from_ldr_literal(const u_int *i)
111 assert((i[0] & 0xff000000) == 0x58000000);
114 return *(uint64_t *)(i + ofs);
117 static uint64_t get_from_movz(const u_int *i)
119 assert((i[0] & 0x7fe00000) == 0x52800000);
120 return (i[0] >> 5) & 0xffff;
123 // get source that block at addr was compiled from (host pointers)
124 static void get_bounds(void *addr, u_char **start, u_char **end)
126 const u_int *ptr = addr;
127 assert((ptr[0] & 0xff00001f) == 0x58000001); // ldr x1, source
128 assert((ptr[1] & 0xff00001f) == 0x58000002); // ldr x2, copy
129 assert((ptr[2] & 0xffe0001f) == 0x52800003); // movz w3, #slen*4
130 *start = (u_char *)get_from_ldr_literal(&ptr[0]);
131 *end = *start + get_from_movz(&ptr[2]);
134 // Allocate a specific ARM register.
135 static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
140 // see if it's already allocated (and dealloc it)
141 for(n=0;n<HOST_REGS;n++)
143 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
144 dirty=(cur->dirty>>n)&1;
150 cur->dirty&=~(1<<hr);
151 cur->dirty|=dirty<<hr;
152 cur->isconst&=~(1<<hr);
155 // Alloc cycle count into dedicated register
156 static void alloc_cc(struct regstat *cur,int i)
158 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
166 static unused const char *regname[32] = {
167 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
168 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
169 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
170 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
173 static unused const char *regname64[32] = {
174 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
175 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
176 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
177 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
181 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
182 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
185 static unused const char *condname[16] = {
186 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
187 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
190 static void output_w32(u_int word)
192 *((u_int *)out) = word;
196 static void output_w64(uint64_t dword)
198 *((uint64_t *)out) = dword;
203 static u_int rm_rd(u_int rm, u_int rd)
207 return (rm << 16) | rd;
211 static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
216 return (rm << 16) | (rn << 5) | rd;
219 static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
225 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
228 static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
231 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
234 static u_int imm16_rd(u_int imm16, u_int rd)
236 assert(imm16 < 0x10000);
238 return (imm16 << 5) | rd;
241 static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
243 assert(imm12 < 0x1000);
246 return (imm12 << 10) | (rn << 5) | rd;
249 static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
251 assert(imm9 < 0x200);
254 return (imm9 << 12) | (rn << 5) | rd;
257 static u_int imm19_rt(u_int imm19, u_int rt)
259 assert(imm19 < 0x80000);
261 return (imm19 << 5) | rt;
264 static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
271 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
274 static u_int genjmp(const u_char *addr)
276 intptr_t offset = addr - out;
277 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
278 if (offset < -134217728 || offset > 134217727) {
279 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
283 return ((u_int)offset >> 2) & 0x03ffffff;
286 static u_int genjmpcc(const u_char *addr)
288 intptr_t offset = addr - out;
289 if ((uintptr_t)addr < 3) return 0;
290 if (offset < -1048576 || offset > 1048572) {
291 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
295 return ((u_int)offset >> 2) & 0x7ffff;
298 static uint32_t is_mask(u_int value)
300 return value && ((value + 1) & value) == 0;
303 // This function returns true if the argument contains a
304 // non-empty sequence of ones (possibly rotated) with the remainder zero.
305 static uint32_t is_rotated_mask(u_int value)
309 if (is_mask((value - 1) | value))
311 return is_mask((~value - 1) | ~value);
314 static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
316 int lzeros, tzeros, ones;
318 if (is_mask((value - 1) | value)) {
319 lzeros = __builtin_clz(value);
320 tzeros = __builtin_ctz(value);
321 ones = 32 - lzeros - tzeros;
322 *immr = (32 - tzeros) & 31;
327 if (is_mask((value - 1) | value)) {
328 lzeros = __builtin_clz(value);
329 tzeros = __builtin_ctz(value);
330 ones = 32 - lzeros - tzeros;
338 static void emit_mov(u_int rs, u_int rt)
340 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
341 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
344 static void emit_mov64(u_int rs, u_int rt)
346 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
347 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
350 static void emit_movs(u_int rs, u_int rt)
352 assert(0); // misleading
353 assem_debug("movs %s,%s\n", regname[rt], regname[rs]);
354 output_w32(0x31000000 | imm12_rn_rd(0, rs, rt));
357 static void emit_add(u_int rs1, u_int rs2, u_int rt)
359 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
360 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
363 static void emit_add64(u_int rs1, u_int rs2, u_int rt)
365 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
366 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
369 #pragma GCC diagnostic ignored "-Wunused-function"
370 static void emit_adds(u_int rs1, u_int rs2, u_int rt)
372 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
373 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
376 static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
378 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
379 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
382 static void emit_neg(u_int rs, u_int rt)
384 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
385 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
388 static void emit_sub(u_int rs1, u_int rs2, u_int rt)
390 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
391 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
394 static void emit_movz(u_int imm, u_int rt)
396 assem_debug("movz %s,#%#x\n", regname[rt], imm);
397 output_w32(0x52800000 | imm16_rd(imm, rt));
400 static void emit_movz_lsl16(u_int imm, u_int rt)
402 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
403 output_w32(0x52a00000 | imm16_rd(imm, rt));
406 static void emit_movn(u_int imm, u_int rt)
408 assem_debug("movn %s,#%#x\n", regname[rt], imm);
409 output_w32(0x12800000 | imm16_rd(imm, rt));
412 static void emit_movn_lsl16(u_int imm,u_int rt)
414 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
415 output_w32(0x12a00000 | imm16_rd(imm, rt));
418 static void emit_movk(u_int imm,u_int rt)
420 assem_debug("movk %s,#%#x\n", regname[rt], imm);
421 output_w32(0x72800000 | imm16_rd(imm, rt));
424 static void emit_movk_lsl16(u_int imm,u_int rt)
427 assem_debug("movk %s, #%#x, lsl #16\n", regname[rt], imm);
428 output_w32(0x72a00000 | imm16_rd(imm, rt));
431 static void emit_zeroreg(u_int rt)
436 static void emit_movimm(u_int imm, u_int rt)
440 else if ((~imm) < 65536)
442 else if ((imm&0xffff) == 0)
443 emit_movz_lsl16(imm >> 16, rt);
444 else if (((~imm)&0xffff) == 0)
445 emit_movn_lsl16(~imm >> 16, rt);
446 else if (is_rotated_mask(imm)) {
448 gen_logical_imm(imm, &immr, &imms);
449 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
450 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
453 emit_movz(imm & 0xffff, rt);
454 emit_movk_lsl16(imm >> 16, rt);
458 static void emit_readword(void *addr, u_int rt)
460 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
461 if (!(offset & 3) && offset <= 16380) {
462 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
463 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
469 static void emit_readdword(void *addr, u_int rt)
471 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
472 if (!(offset & 7) && offset <= 32760) {
473 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
474 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
480 static void emit_loadreg(u_int r, u_int hr)
487 void *addr = &psxRegs.GPR.r[r];
489 //case HIREG: addr = &hi; break;
490 //case LOREG: addr = &lo; break;
491 case CCREG: addr = &cycle_count; break;
492 case CSREG: addr = &Status; break;
493 case INVCP: addr = &invc_ptr; is64 = 1; break;
494 default: assert(r < 34); break;
497 emit_readdword(addr, hr);
499 emit_readword(addr, hr);
503 static void emit_writeword(u_int rt, void *addr)
505 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
506 if (!(offset & 3) && offset <= 16380) {
507 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
508 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
514 static void emit_writedword(u_int rt, void *addr)
516 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
517 if (!(offset & 7) && offset <= 32760) {
518 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
519 output_w32(0xf9000000 | imm12_rn_rd(offset >> 2, FP, rt));
525 static void emit_storereg(u_int r, u_int hr)
528 void *addr = &psxRegs.GPR.r[r];
530 //case HIREG: addr = &hi; break;
531 //case LOREG: addr = &lo; break;
532 case CCREG: addr = &cycle_count; break;
533 default: assert(r < 34); break;
535 emit_writeword(hr, addr);
538 static void emit_test(u_int rs, u_int rt)
540 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
541 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
544 static void emit_testimm(u_int rs, u_int imm)
547 assem_debug("tst %s,#%#x\n", regname[rs], imm);
548 assert(is_rotated_mask(imm)); // good enough for PCSX
549 gen_logical_imm(imm, &immr, &imms);
550 output_w32(0xb9000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
553 static void emit_testeqimm(u_int rs,int imm)
555 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
556 assert(0); // TODO eliminate emit_testeqimm
559 static void emit_not(u_int rs,u_int rt)
561 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
562 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
565 static void emit_mvnmi(u_int rs,u_int rt)
567 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
568 assert(0); // eliminate
571 static void emit_and(u_int rs1,u_int rs2,u_int rt)
573 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
574 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
577 static void emit_or(u_int rs1,u_int rs2,u_int rt)
579 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
580 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
583 static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
585 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
586 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
589 static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
591 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
592 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
595 static void emit_xor(u_int rs1,u_int rs2,u_int rt)
597 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
598 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
601 static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
603 unused const char *st = s ? "s" : "";
604 s = s ? 0x20000000 : 0;
605 is64 = is64 ? 0x80000000 : 0;
607 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
608 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
610 else if (-imm < 4096) {
611 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
612 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
614 else if (imm < 16777216) {
615 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
616 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
617 if ((imm & 0xfff) || s) {
618 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
619 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rt, rt));
622 else if (-imm < 16777216) {
623 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
624 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
625 if ((imm & 0xfff) || s) {
626 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
627 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
634 static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
636 emit_addimm_s(0, 0, rs, imm, rt);
639 static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
641 emit_addimm_s(0, 1, rs, imm, rt);
644 static void emit_addimm_and_set_flags(int imm, u_int rt)
646 emit_addimm_s(1, 0, rt, imm, rt);
649 static void emit_addimm_no_flags(u_int imm,u_int rt)
651 emit_addimm(rt,imm,rt);
654 static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
656 const char *names[] = { "and", "orr", "eor", "ands" };
657 const char *name = names[op];
660 if (is_rotated_mask(imm)) {
661 gen_logical_imm(imm, &immr, &imms);
662 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
663 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
666 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
667 host_tempreg_acquire();
668 emit_movimm(imm, HOST_TEMPREG);
669 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
670 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
671 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
672 host_tempreg_release();
677 static void emit_andimm(u_int rs, u_int imm, u_int rt)
682 emit_logicop_imm(0, rs, imm, rt);
685 static void emit_orimm(u_int rs, u_int imm, u_int rt)
692 emit_logicop_imm(1, rs, imm, rt);
695 static void emit_xorimm(u_int rs, u_int imm, u_int rt)
702 emit_logicop_imm(2, rs, imm, rt);
705 static void emit_sbfm(u_int rs,u_int imm,u_int rt)
707 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
708 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
711 static void emit_ubfm(u_int rs,u_int imm,u_int rt)
713 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
714 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
717 static void emit_shlimm(u_int rs,u_int imm,u_int rt)
719 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
720 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
723 static unused void emit_lslpls_imm(u_int rs,int imm,u_int rt)
725 assert(0); // eliminate
728 static void emit_shrimm(u_int rs,u_int imm,u_int rt)
730 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
731 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
734 static void emit_sarimm(u_int rs,u_int imm,u_int rt)
736 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
737 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
740 static void emit_rorimm(u_int rs,u_int imm,u_int rt)
742 assem_debug("ror %s,%s,#%d",regname[rt],regname[rs],imm);
743 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
746 static void emit_signextend16(u_int rs, u_int rt)
748 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
749 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
752 static void emit_shl(u_int rs,u_int rshift,u_int rt)
754 assem_debug("lsl %s,%s,%s",regname[rt],regname[rs],regname[rshift]);
755 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
758 static void emit_shr(u_int rs,u_int rshift,u_int rt)
760 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
761 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
764 static void emit_sar(u_int rs,u_int rshift,u_int rt)
766 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
767 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
770 static void emit_cmpimm(u_int rs, u_int imm)
773 assem_debug("cmp %s,%#x\n", regname[rs], imm);
774 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
776 else if (-imm < 4096) {
777 assem_debug("cmn %s,%#x\n", regname[rs], imm);
778 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
780 else if (imm < 16777216 && !(imm & 0xfff)) {
781 assem_debug("cmp %s,#%#x,lsl #12\n", regname[rs], imm >> 12);
782 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
785 host_tempreg_acquire();
786 emit_movimm(imm, HOST_TEMPREG);
787 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
788 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
789 host_tempreg_release();
793 static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
795 assert(imm == 0 || imm == 1);
796 assert(cond0 < 0x10);
797 assert(cond1 < 0x10);
799 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
800 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
802 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
803 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
807 static void emit_cmovne_imm(u_int imm,u_int rt)
809 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
812 static void emit_cmovl_imm(u_int imm,u_int rt)
814 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
817 static void emit_cmovb_imm(int imm,u_int rt)
819 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
822 static void emit_cmovs_imm(int imm,u_int rt)
824 emit_cmov_imm(COND_MI, COND_PL, imm, rt);
827 static void emit_cmovne_reg(u_int rs,u_int rt)
829 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
830 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
833 static void emit_cmovl_reg(u_int rs,u_int rt)
835 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
836 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
839 static void emit_cmovs_reg(u_int rs,u_int rt)
841 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
842 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
845 static void emit_slti32(u_int rs,int imm,u_int rt)
847 if(rs!=rt) emit_zeroreg(rt);
849 if(rs==rt) emit_movimm(0,rt);
850 emit_cmovl_imm(1,rt);
853 static void emit_sltiu32(u_int rs,int imm,u_int rt)
855 if(rs!=rt) emit_zeroreg(rt);
857 if(rs==rt) emit_movimm(0,rt);
858 emit_cmovb_imm(1,rt);
861 static void emit_cmp(u_int rs,u_int rt)
863 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
864 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
867 static void emit_set_gz32(u_int rs, u_int rt)
869 //assem_debug("set_gz32\n");
872 emit_cmovl_imm(0,rt);
875 static void emit_set_nz32(u_int rs, u_int rt)
877 //assem_debug("set_nz32\n");
878 if(rs!=rt) emit_mov(rs,rt);
880 emit_cmovne_imm(1,rt);
883 static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
885 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovl_imm(1,rt);
892 static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
894 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
895 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
897 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
898 emit_cmovb_imm(1,rt);
901 static void emit_call(const void *a)
903 intptr_t diff = (u_char *)a - out;
904 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
906 if (-134217728 <= diff && diff <= 134217727)
907 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
912 static void emit_jmp(const void *a)
914 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
915 u_int offset = genjmp(a);
916 output_w32(0x14000000 | offset);
919 static void emit_jne(const void *a)
921 assem_debug("bne %p\n", a);
922 u_int offset = genjmpcc(a);
923 output_w32(0x54000000 | (offset << 5) | COND_NE);
926 static void emit_jeq(const void *a)
928 assem_debug("beq %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_EQ);
933 static void emit_js(const void *a)
935 assem_debug("bmi %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_MI);
940 static void emit_jns(const void *a)
942 assem_debug("bpl %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_PL);
947 static void emit_jl(const void *a)
949 assem_debug("blt %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_LT);
954 static void emit_jge(const void *a)
956 assem_debug("bge %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_GE);
961 static void emit_jno(const void *a)
963 assem_debug("bvc %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_VC);
968 static void emit_jc(const void *a)
970 assem_debug("bcs %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_CS);
975 static void emit_jcc(const void *a)
977 assem_debug("bcc %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_CC);
982 static void emit_jmpreg(u_int r)
984 assem_debug("br %s", regname64[r]);
985 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
988 static void emit_retreg(u_int r)
990 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
991 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
994 static void emit_ret(void)
999 static void emit_adr(void *addr, u_int rt)
1001 intptr_t offset = (u_char *)addr - out;
1002 assert(-1048576 <= offset && offset < 1048576);
1003 assem_debug("adr x%d,#%#lx\n", rt, offset);
1004 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1007 static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1009 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1010 assert(-256 <= offset && offset < 256);
1011 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1014 static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1016 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1017 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1020 static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1022 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1023 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1026 static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1028 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1032 static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1034 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1038 static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1040 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1044 static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1047 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1050 static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1053 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1056 static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1059 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1062 static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1065 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1068 static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1070 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1071 assert(-256 <= offset && offset < 256);
1072 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1075 static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1077 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1082 static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1084 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1089 static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1091 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1096 static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1098 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1099 if (!(offset & 3) && offset <= 16380)
1100 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1105 static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1107 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1108 if (!(offset & 1) && offset <= 8190)
1109 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1114 static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1116 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1117 if ((u_int)offset < 4096)
1118 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1123 static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1125 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1133 static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1135 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1143 static void emit_clz(u_int rs,u_int rt)
1145 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1149 // special case for checking invalid_code
1150 static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1152 host_tempreg_acquire();
1153 emit_shrimm(r, 12, HOST_TEMPREG);
1154 assem_debug("ldrb %s,[%s,%s]",regname[HOST_TEMPREG],regname64[rbase],regname64[HOST_TEMPREG]);
1155 output_w32(0x38606800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1156 emit_cmpimm(HOST_TEMPREG, imm);
1157 host_tempreg_release();
1160 static void emit_orrne_imm(u_int rs,int imm,u_int rt)
1162 assem_debug("orrne %s,%s,#%#x\n",regname[rt],regname[rs],imm);
1166 static void emit_andne_imm(u_int rs,int imm,u_int rt)
1168 assem_debug("andne %s,%s,#%#x\n",regname[rt],regname[rs],imm);
1172 static unused void emit_addpl_imm(u_int rs,int imm,u_int rt)
1174 assem_debug("addpl %s,%s,#%#x\n",regname[rt],regname[rs],imm);
1178 static void emit_loadlp_ofs(u_int ofs, u_int rt)
1180 output_w32(0x58000000 | imm19_rt(ofs, rt));
1183 static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1185 u_int op = 0xb9000000;
1186 unused const char *ldst = is_st ? "st" : "ld";
1187 unused char rp = is64 ? 'x' : 'w';
1188 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1189 is64 = is64 ? 1 : 0;
1190 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1191 ofs = (ofs >> (2+is64));
1192 if (!is_st) op |= 0x00400000;
1193 if (is64) op |= 0x40000000;
1194 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1197 static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1199 u_int op = 0x29000000;
1200 unused const char *ldst = is_st ? "st" : "ld";
1201 unused char rp = is64 ? 'x' : 'w';
1202 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1203 is64 = is64 ? 1 : 0;
1204 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1205 ofs = (ofs >> (2+is64));
1206 assert(-64 <= ofs && ofs <= 63);
1208 if (!is_st) op |= 0x00400000;
1209 if (is64) op |= 0x80000000;
1210 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1213 static void save_load_regs_all(int is_store, u_int reglist)
1217 for (r = 0; reglist; r++, reglist >>= 1) {
1221 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1227 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1230 assert(ofs <= SSP_CALLER_REGS);
1233 // Save registers before function call
1234 static void save_regs(u_int reglist)
1236 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1237 save_load_regs_all(1, reglist);
1240 // Restore registers after function call
1241 static void restore_regs(u_int reglist)
1243 reglist &= CALLER_SAVE_REGS;
1244 save_load_regs_all(0, reglist);
1247 /* Stubs/epilogue */
1249 static void literal_pool(int n)
1254 static void literal_pool_jumpover(int n)
1258 // parsed by get_pointer, find_extjump_insn
1259 static void emit_extjump2(u_char *addr, u_int target, void *linker)
1261 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1263 emit_movz(target & 0xffff, 0);
1264 emit_movk_lsl16(target >> 16, 0);
1266 // addr is in the current recompiled block (max 256k)
1267 // offset shouldn't exceed +/-1MB
1272 static void check_extjump2(void *src)
1275 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1279 // put rt_val into rt, potentially making use of rs with value rs_val
1280 static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1282 int diff = rt_val - rs_val;
1283 if ((-4096 <= diff && diff < 4096)
1284 || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff)))
1285 emit_addimm(rs, diff, rt);
1286 else if (is_rotated_mask(rs_val ^ rt_val))
1287 emit_xorimm(rs, rs_val ^ rt_val, rt);
1289 emit_movimm(rt_val, rt);
1292 // return 1 if the above function can do it's job cheaply
1293 static int is_similar_value(u_int v1, u_int v2)
1296 return (-4096 <= diff && diff < 4096)
1297 || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff))
1298 || is_rotated_mask(v1 ^ v2);
1302 static void pass_args64(u_int a0, u_int a1)
1306 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1308 else if(a0!=0&&a1==0) {
1310 if (a0>=0) emit_mov64(a0,0);
1313 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1314 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1318 static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1321 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1323 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1324 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1326 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1328 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1333 #include "pcsxmem.h"
1334 //#include "pcsxmem_inline.c"
1336 static void do_readstub(int n)
1338 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1339 set_jump_target(stubs[n].addr, out);
1340 enum stub_type type = stubs[n].type;
1342 int rs = stubs[n].b;
1343 const struct regstat *i_regs = (void *)stubs[n].c;
1344 u_int reglist = stubs[n].e;
1345 const signed char *i_regmap = i_regs->regmap;
1347 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1348 rt=get_reg(i_regmap,FTEMP);
1350 rt=get_reg(i_regmap,rt1[i]);
1353 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1354 void *restore_jump = NULL, *handler_jump = NULL;
1356 for (r = 0; r < HOST_CCREG; r++) {
1357 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1362 if(rt>=0&&rt1[i]!=0)
1369 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1371 emit_readdword(&mem_rtab,temp);
1372 emit_shrimm(rs,12,temp2);
1373 emit_readdword_dualindexedx8(temp,temp2,temp2);
1374 emit_adds64(temp2,temp2,temp2);
1377 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1379 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1380 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1381 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1382 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1383 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1389 emit_jmp(0); // jump to reg restore
1392 emit_jmp(stubs[n].retaddr); // return address
1393 set_jump_target(handler_jump, out);
1398 if(type==LOADB_STUB||type==LOADBU_STUB)
1399 handler=jump_handler_read8;
1400 if(type==LOADH_STUB||type==LOADHU_STUB)
1401 handler=jump_handler_read16;
1402 if(type==LOADW_STUB)
1403 handler=jump_handler_read32;
1405 pass_args64(rs,temp2);
1406 int cc=get_reg(i_regmap,CCREG);
1408 emit_loadreg(CCREG,2);
1409 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1411 // (no cycle reload after read)
1412 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1413 loadstore_extend(type,0,rt);
1416 set_jump_target(restore_jump, out);
1417 restore_regs(reglist);
1418 emit_jmp(stubs[n].retaddr);
1421 static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1423 int rs=get_reg(regmap,target);
1424 int rt=get_reg(regmap,target);
1425 if(rs<0) rs=get_reg(regmap,-1);
1428 uintptr_t host_addr = 0;
1430 int cc=get_reg(regmap,CCREG);
1431 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1433 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1434 if (handler == NULL) {
1437 if (addr != host_addr) {
1438 if (host_addr >= 0x100000000ull)
1439 abort(); // ROREG not implemented
1440 emit_movimm_from(addr, rs, host_addr, rs);
1443 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1444 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1445 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1446 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1447 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1452 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1462 // call a memhandler
1463 if(rt>=0&&rt1[i]!=0)
1467 emit_movimm(addr,0);
1471 emit_loadreg(CCREG,2);
1472 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1474 emit_readdword(&mem_rtab,1);
1476 emit_call(do_memhandler_pre);
1480 // (no cycle reload after read)
1481 if(rt>=0&&rt1[i]!=0)
1482 loadstore_extend(type, 0, rt);
1483 restore_regs(reglist);
1486 static void do_writestub(int n)
1488 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1489 set_jump_target(stubs[n].addr, out);
1490 enum stub_type type=stubs[n].type;
1493 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1494 u_int reglist=stubs[n].e;
1495 signed char *i_regmap=i_regs->regmap;
1497 if(itype[i]==C1LS||itype[i]==C2LS) {
1498 rt=get_reg(i_regmap,r=FTEMP);
1500 rt=get_reg(i_regmap,r=rs2[i]);
1504 int rtmp,temp=-1,temp2,regs_saved=0;
1505 void *restore_jump = NULL, *handler_jump = NULL;
1506 int reglist2=reglist|(1<<rs)|(1<<rt);
1507 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1508 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1516 for(rtmp=0;rtmp<=3;rtmp++)
1517 if(rtmp!=rs&&rtmp!=rt)
1520 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1523 host_tempreg_acquire();
1526 emit_readdword(&mem_wtab,temp);
1527 emit_shrimm(rs,12,temp2);
1528 emit_readdword_dualindexedx8(temp,temp2,temp2);
1529 emit_adds64(temp2,temp2,temp2);
1533 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1534 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1535 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1540 emit_jmp(0); // jump to reg restore
1543 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1544 set_jump_target(handler_jump, out);
1546 // TODO FIXME: regalloc should prefer callee-saved regs
1551 case STOREB_STUB: handler=jump_handler_write8; break;
1552 case STOREH_STUB: handler=jump_handler_write16; break;
1553 case STOREW_STUB: handler=jump_handler_write32; break;
1559 emit_mov64(temp2,3);
1560 host_tempreg_release();
1562 int cc=get_reg(i_regmap,CCREG);
1564 emit_loadreg(CCREG,2);
1565 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1566 // returns new cycle_count
1568 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1570 emit_storereg(CCREG,2);
1572 set_jump_target(restore_jump, out);
1573 restore_regs(reglist);
1574 emit_jmp(stubs[n].retaddr);
1577 static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1579 int rs = get_reg(regmap,-1);
1580 int rt = get_reg(regmap,target);
1583 uintptr_t host_addr = 0;
1584 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1585 if (handler == NULL) {
1586 if (addr != host_addr) {
1587 if (host_addr >= 0x100000000ull)
1588 abort(); // ROREG not implemented
1589 emit_movimm_from(addr, rs, host_addr, rs);
1592 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1593 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1594 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1600 // call a memhandler
1602 emit_writeword(rs, &address); // some handlers still need it
1603 loadstore_extend(type, rt, 0);
1605 cc = cc_use = get_reg(regmap, CCREG);
1607 emit_loadreg(CCREG, (cc_use = 2));
1608 emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2);
1610 emit_call(do_memhandler_pre);
1612 emit_call(do_memhandler_post);
1613 emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use);
1615 emit_storereg(CCREG, cc_use);
1616 restore_regs(reglist);
1619 static void do_unalignedwritestub(int n)
1621 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
1625 static void set_loadlp(u_int *loadl, void *lit)
1627 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1628 assert((*loadl & ~0x1f) == 0x58000000);
1629 assert((ofs & 3) == 0);
1630 assert(ofs < 0x100000);
1631 *loadl |= (ofs >> 2) << 5;
1634 // this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1635 static void do_dirty_stub_emit_args(u_int arg0)
1637 assert(slen <= MAXBLOCK);
1638 emit_loadlp_ofs(0, 1); // ldr x1, source
1639 emit_loadlp_ofs(0, 2); // ldr x2, copy
1640 emit_movz(slen*4, 3);
1641 emit_movz(arg0 & 0xffff, 0);
1642 emit_movk_lsl16(arg0 >> 16, 0);
1645 static void do_dirty_stub_emit_literals(u_int *loadlps)
1647 set_loadlp(&loadlps[0], out);
1648 output_w64((uintptr_t)source);
1649 set_loadlp(&loadlps[1], out);
1650 output_w64((uintptr_t)copy);
1653 static void *do_dirty_stub(int i)
1655 assem_debug("do_dirty_stub %x\n",start+i*4);
1656 u_int *loadlps = (void *)out;
1657 do_dirty_stub_emit_args(start + i*4);
1658 emit_call(verify_code);
1662 entry = instr_addr[i];
1663 emit_jmp(instr_addr[i]);
1664 do_dirty_stub_emit_literals(loadlps);
1668 static void do_dirty_stub_ds()
1670 do_dirty_stub_emit_args(start + 1);
1671 u_int *loadlps = (void *)out;
1672 emit_call(verify_code_ds);
1673 emit_jmp(out + 8*2);
1674 do_dirty_stub_emit_literals(loadlps);
1679 #define shift_assemble shift_assemble_arm64
1681 static void shift_assemble_arm64(int i,struct regstat *i_regs)
1685 #define loadlr_assemble loadlr_assemble_arm64
1687 static void loadlr_assemble_arm64(int i,struct regstat *i_regs)
1692 static void c2op_assemble(int i,struct regstat *i_regs)
1697 static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1701 #define multdiv_assemble multdiv_assemble_arm64
1703 static void do_jump_vaddr(u_int rs)
1707 emit_call(get_addr_ht);
1711 static void do_preload_rhash(u_int r) {
1712 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1713 // register. On ARM the hash can be done with a single instruction (below)
1716 static void do_preload_rhtbl(u_int ht) {
1717 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1720 static void do_rhash(u_int rs,u_int rh) {
1721 emit_andimm(rs, 0xf8, rh);
1724 static void do_miniht_load(int ht, u_int rh) {
1725 emit_add64(ht, rh, ht);
1726 emit_ldst(0, 0, rh, ht, 0);
1729 static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1735 set_jump_target(jaddr, out);
1736 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1737 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1741 // parsed by set_jump_target?
1742 static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1743 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1744 emit_movk(return_address&0xffff,rt);
1745 add_to_linker(out,return_address,1);
1747 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1748 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1751 static void mark_clear_cache(void *target)
1753 u_long offset = (u_char *)target - translation_cache;
1754 u_int mask = 1u << ((offset >> 12) & 31);
1755 if (!(needs_clear_cache[offset >> 17] & mask)) {
1756 char *start = (char *)((u_long)target & ~4095ul);
1757 start_tcache_write(start, start + 4096);
1758 needs_clear_cache[offset >> 17] |= mask;
1762 // Clearing the cache is rather slow on ARM Linux, so mark the areas
1763 // that need to be cleared, and then only clear these areas once.
1764 static void do_clear_cache()
1767 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
1769 u_int bitmap=needs_clear_cache[i];
1771 u_char *start, *end;
1775 start=translation_cache+i*131072+j*4096;
1783 end_tcache_write(start, end);
1789 needs_clear_cache[i]=0;
1794 // CPU-architecture-specific initialization
1795 static void arch_init() {
1798 // vim:shiftwidth=2:expandtab