drc: simplify cache flush for some platforms
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define unused __attribute__((unused))
27
d1e4ebd9 28void do_memhandler_pre();
29void do_memhandler_post();
be516ebe 30
31/* Linker */
d1e4ebd9 32static void set_jump_target(void *addr, void *target)
be516ebe 33{
d1e4ebd9 34 u_int *ptr = addr;
35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
3039c914 37 ptr += ndrc_write_ofs / sizeof(ptr[0]);
38
3968e69e 39 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
3968e69e 43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 47 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 48 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 49 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
3968e69e 52 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
3968e69e 58 abort(); // should not happen
be516ebe 59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
d1e4ebd9 65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
be516ebe 69}
70
104df9d3 71#if 0
be516ebe 72// find where external branch is liked to using addr of it's stub:
3968e69e 73// get address that the stub loads (dyna_linker arg1),
be516ebe 74// treat it as a pointer to branch insn,
75// return addr where that branch jumps to
76static void *get_pointer(void *stub)
77{
d1e4ebd9 78 int *i_ptr = find_extjump_insn(stub);
3968e69e 79 if ((*i_ptr&0xfc000000) == 0x14000000) // b
80 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
81 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
82 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
83 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 84 assert(0);
85 return NULL;
86}
104df9d3 87#endif
be516ebe 88
be516ebe 89// Allocate a specific ARM register.
90static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
91{
92 int n;
93 int dirty=0;
94
95 // see if it's already allocated (and dealloc it)
96 for(n=0;n<HOST_REGS;n++)
97 {
98 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
99 dirty=(cur->dirty>>n)&1;
100 cur->regmap[n]=-1;
101 }
102 }
103
104 cur->regmap[hr]=reg;
105 cur->dirty&=~(1<<hr);
106 cur->dirty|=dirty<<hr;
107 cur->isconst&=~(1<<hr);
108}
109
110// Alloc cycle count into dedicated register
111static void alloc_cc(struct regstat *cur,int i)
112{
113 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
114}
115
116/* Special alloc */
117
118
119/* Assembler */
120
121static unused const char *regname[32] = {
d1e4ebd9 122 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
123 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
124 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
125 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
126};
127
128static unused const char *regname64[32] = {
129 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
130 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
131 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
132 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
133};
134
135enum {
136 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
137 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
138};
139
140static unused const char *condname[16] = {
141 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
142 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 143};
144
be516ebe 145static void output_w32(u_int word)
146{
3039c914 147 *((u_int *)(out + ndrc_write_ofs)) = word;
be516ebe 148 out += 4;
149}
150
3968e69e 151static u_int rn_rd(u_int rn, u_int rd)
152{
153 assert(rn < 31);
154 assert(rd < 31);
155 return (rn << 5) | rd;
156}
157
be516ebe 158static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
159{
d1e4ebd9 160 assert(rm < 32);
161 assert(rn < 32);
162 assert(rd < 32);
be516ebe 163 return (rm << 16) | (rn << 5) | rd;
164}
165
3968e69e 166static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
167{
168 assert(ra < 32);
169 return rm_rn_rd(rm, rn, rd) | (ra << 10);
170}
171
d1e4ebd9 172static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
173{
174 assert(imm7 < 0x80);
175 assert(rt2 < 31);
176 assert(rn < 32);
177 assert(rt < 31);
178 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
179}
180
687b4580 181static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
182{
183 assert(imm6 <= 63);
184 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
185}
186
be516ebe 187static u_int imm16_rd(u_int imm16, u_int rd)
188{
189 assert(imm16 < 0x10000);
190 assert(rd < 31);
191 return (imm16 << 5) | rd;
192}
193
687b4580 194static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
195{
196 assert(imm12 < 0x1000);
d1e4ebd9 197 assert(rn < 32);
198 assert(rd < 32);
199 return (imm12 << 10) | (rn << 5) | rd;
200}
201
202static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
203{
204 assert(imm9 < 0x200);
687b4580 205 assert(rn < 31);
206 assert(rd < 31);
d1e4ebd9 207 return (imm9 << 12) | (rn << 5) | rd;
687b4580 208}
209
d1e4ebd9 210static u_int imm19_rt(u_int imm19, u_int rt)
211{
212 assert(imm19 < 0x80000);
213 assert(rt < 31);
214 return (imm19 << 5) | rt;
215}
216
217static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
218{
219 assert(n < 2);
220 assert(immr < 0x40);
221 assert(imms < 0x40);
222 assert(rn < 32);
223 assert(rd < 32);
224 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
225}
226
227static u_int genjmp(const u_char *addr)
be516ebe 228{
229 intptr_t offset = addr - out;
d1e4ebd9 230 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 231 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 232 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
233 abort();
be516ebe 234 return 0;
235 }
d1e4ebd9 236 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 237}
238
d1e4ebd9 239static u_int genjmpcc(const u_char *addr)
be516ebe 240{
241 intptr_t offset = addr - out;
d1e4ebd9 242 if ((uintptr_t)addr < 3) return 0;
be516ebe 243 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 244 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
245 abort();
246 return 0;
247 }
248 return ((u_int)offset >> 2) & 0x7ffff;
249}
250
251static uint32_t is_mask(u_int value)
252{
253 return value && ((value + 1) & value) == 0;
254}
255
256// This function returns true if the argument contains a
257// non-empty sequence of ones (possibly rotated) with the remainder zero.
258static uint32_t is_rotated_mask(u_int value)
259{
3968e69e 260 if (value == 0 || value == ~0)
be516ebe 261 return 0;
d1e4ebd9 262 if (is_mask((value - 1) | value))
263 return 1;
264 return is_mask((~value - 1) | ~value);
265}
266
267static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
268{
269 int lzeros, tzeros, ones;
270 assert(value != 0);
271 if (is_mask((value - 1) | value)) {
272 lzeros = __builtin_clz(value);
273 tzeros = __builtin_ctz(value);
274 ones = 32 - lzeros - tzeros;
275 *immr = (32 - tzeros) & 31;
276 *imms = ones - 1;
277 return;
be516ebe 278 }
d1e4ebd9 279 value = ~value;
280 if (is_mask((value - 1) | value)) {
281 lzeros = __builtin_clz(value);
282 tzeros = __builtin_ctz(value);
283 ones = 32 - lzeros - tzeros;
3968e69e 284 *immr = lzeros;
d1e4ebd9 285 *imms = 31 - ones;
286 return;
287 }
3968e69e 288 abort();
be516ebe 289}
290
291static void emit_mov(u_int rs, u_int rt)
292{
687b4580 293 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 294 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
295}
296
297static void emit_mov64(u_int rs, u_int rt)
298{
299 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
300 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 301}
302
687b4580 303static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 304{
d1e4ebd9 305 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
306 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 307}
308
d1e4ebd9 309static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 310{
d1e4ebd9 311 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
312 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 313}
314
d1e4ebd9 315static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 316{
3968e69e 317 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 318 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
319}
39b71d9a 320#define emit_adds_ptr emit_adds64
d1e4ebd9 321
322static void emit_neg(u_int rs, u_int rt)
323{
324 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
325 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 326}
327
687b4580 328static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 329{
d1e4ebd9 330 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 331 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 332}
333
3968e69e 334static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
335{
336 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
337 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
338}
339
d1e4ebd9 340static void emit_movz(u_int imm, u_int rt)
be516ebe 341{
d1e4ebd9 342 assem_debug("movz %s,#%#x\n", regname[rt], imm);
343 output_w32(0x52800000 | imm16_rd(imm, rt));
344}
345
346static void emit_movz_lsl16(u_int imm, u_int rt)
347{
348 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
349 output_w32(0x52a00000 | imm16_rd(imm, rt));
350}
351
352static void emit_movn(u_int imm, u_int rt)
353{
354 assem_debug("movn %s,#%#x\n", regname[rt], imm);
355 output_w32(0x12800000 | imm16_rd(imm, rt));
356}
357
358static void emit_movn_lsl16(u_int imm,u_int rt)
359{
360 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x12a00000 | imm16_rd(imm, rt));
362}
363
364static void emit_movk(u_int imm,u_int rt)
365{
366 assem_debug("movk %s,#%#x\n", regname[rt], imm);
367 output_w32(0x72800000 | imm16_rd(imm, rt));
368}
369
370static void emit_movk_lsl16(u_int imm,u_int rt)
371{
372 assert(imm<65536);
3968e69e 373 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 374 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 375}
376
377static void emit_zeroreg(u_int rt)
378{
d1e4ebd9 379 emit_movz(0, rt);
be516ebe 380}
381
be516ebe 382static void emit_movimm(u_int imm, u_int rt)
383{
d1e4ebd9 384 if (imm < 65536)
385 emit_movz(imm, rt);
386 else if ((~imm) < 65536)
387 emit_movn(~imm, rt);
388 else if ((imm&0xffff) == 0)
389 emit_movz_lsl16(imm >> 16, rt);
390 else if (((~imm)&0xffff) == 0)
391 emit_movn_lsl16(~imm >> 16, rt);
392 else if (is_rotated_mask(imm)) {
393 u_int immr, imms;
394 gen_logical_imm(imm, &immr, &imms);
395 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
396 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
397 }
be516ebe 398 else {
d1e4ebd9 399 emit_movz(imm & 0xffff, rt);
400 emit_movk_lsl16(imm >> 16, rt);
be516ebe 401 }
402}
403
687b4580 404static void emit_readword(void *addr, u_int rt)
405{
406 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
407 if (!(offset & 3) && offset <= 16380) {
408 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
409 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
410 }
411 else
3968e69e 412 abort();
687b4580 413}
414
d1e4ebd9 415static void emit_readdword(void *addr, u_int rt)
416{
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 7) && offset <= 32760) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
420 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
421 }
3968e69e 422 else
423 abort();
424}
39b71d9a 425#define emit_readptr emit_readdword
3968e69e 426
427static void emit_readshword(void *addr, u_int rt)
428{
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 1) && offset <= 8190) {
431 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
432 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
433 }
d1e4ebd9 434 else
435 assert(0);
436}
437
be516ebe 438static void emit_loadreg(u_int r, u_int hr)
439{
d1e4ebd9 440 int is64 = 0;
be516ebe 441 if (r == 0)
442 emit_zeroreg(hr);
443 else {
33788798 444 void *addr;
be516ebe 445 switch (r) {
7c3a5182 446 //case HIREG: addr = &hi; break;
447 //case LOREG: addr = &lo; break;
be516ebe 448 case CCREG: addr = &cycle_count; break;
449 case CSREG: addr = &Status; break;
d1e4ebd9 450 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 451 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 452 default:
453 assert(r < 34);
454 addr = &psxRegs.GPR.r[r];
455 break;
be516ebe 456 }
d1e4ebd9 457 if (is64)
458 emit_readdword(addr, hr);
459 else
460 emit_readword(addr, hr);
be516ebe 461 }
462}
463
687b4580 464static void emit_writeword(u_int rt, void *addr)
465{
466 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
467 if (!(offset & 3) && offset <= 16380) {
468 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
469 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
470 }
471 else
472 assert(0);
473}
474
d1e4ebd9 475static void emit_writedword(u_int rt, void *addr)
476{
477 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
478 if (!(offset & 7) && offset <= 32760) {
479 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 480 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 481 }
482 else
3968e69e 483 abort();
d1e4ebd9 484}
485
687b4580 486static void emit_storereg(u_int r, u_int hr)
be516ebe 487{
488 assert(r < 64);
7c3a5182 489 void *addr = &psxRegs.GPR.r[r];
be516ebe 490 switch (r) {
7c3a5182 491 //case HIREG: addr = &hi; break;
492 //case LOREG: addr = &lo; break;
be516ebe 493 case CCREG: addr = &cycle_count; break;
7c3a5182 494 default: assert(r < 34); break;
be516ebe 495 }
687b4580 496 emit_writeword(hr, addr);
be516ebe 497}
498
499static void emit_test(u_int rs, u_int rt)
500{
d1e4ebd9 501 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
502 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 503}
504
d1e4ebd9 505static void emit_testimm(u_int rs, u_int imm)
be516ebe 506{
d1e4ebd9 507 u_int immr, imms;
687b4580 508 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 509 assert(is_rotated_mask(imm)); // good enough for PCSX
510 gen_logical_imm(imm, &immr, &imms);
3968e69e 511 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 512}
513
514static void emit_not(u_int rs,u_int rt)
515{
516 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 517 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 518}
519
be516ebe 520static void emit_and(u_int rs1,u_int rs2,u_int rt)
521{
522 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 523 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 524}
525
526static void emit_or(u_int rs1,u_int rs2,u_int rt)
527{
528 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 529 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 530}
531
3968e69e 532static void emit_bic(u_int rs1,u_int rs2,u_int rt)
533{
534 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
535 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
536}
537
be516ebe 538static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
539{
be516ebe 540 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 541 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 542}
543
544static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
545{
be516ebe 546 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 547 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 548}
549
3968e69e 550static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
551{
552 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
553 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
554}
555
be516ebe 556static void emit_xor(u_int rs1,u_int rs2,u_int rt)
557{
558 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 559 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 560}
561
3968e69e 562static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
563{
564 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
565 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
566}
567
d1e4ebd9 568static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 569{
d1e4ebd9 570 unused const char *st = s ? "s" : "";
571 s = s ? 0x20000000 : 0;
572 is64 = is64 ? 0x80000000 : 0;
687b4580 573 if (imm < 4096) {
d1e4ebd9 574 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
575 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 576 }
577 else if (-imm < 4096) {
3968e69e 578 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 579 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
580 }
581 else if (imm < 16777216) {
582 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
583 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
584 if ((imm & 0xfff) || s) {
585 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 586 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 587 }
588 }
589 else if (-imm < 16777216) {
590 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
591 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
594 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
595 }
687b4580 596 }
597 else
3968e69e 598 abort();
be516ebe 599}
600
d1e4ebd9 601static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
602{
603 emit_addimm_s(0, 0, rs, imm, rt);
604}
605
606static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
607{
608 emit_addimm_s(0, 1, rs, imm, rt);
609}
610
be516ebe 611static void emit_addimm_and_set_flags(int imm, u_int rt)
612{
d1e4ebd9 613 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 614}
615
d1e4ebd9 616static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 617{
d1e4ebd9 618 const char *names[] = { "and", "orr", "eor", "ands" };
619 const char *name = names[op];
620 u_int immr, imms;
621 op = op << 29;
622 if (is_rotated_mask(imm)) {
623 gen_logical_imm(imm, &immr, &imms);
624 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
625 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
626 }
627 else {
628 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
629 host_tempreg_acquire();
630 emit_movimm(imm, HOST_TEMPREG);
631 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
632 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
633 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
634 host_tempreg_release();
635 }
636 (void)name;
be516ebe 637}
638
d1e4ebd9 639static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 640{
d1e4ebd9 641 if (imm == 0)
642 emit_zeroreg(rt);
643 else
644 emit_logicop_imm(0, rs, imm, rt);
be516ebe 645}
646
d1e4ebd9 647static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 648{
d1e4ebd9 649 if (imm == 0) {
650 if (rs != rt)
651 emit_mov(rs, rt);
652 }
653 else
654 emit_logicop_imm(1, rs, imm, rt);
be516ebe 655}
656
d1e4ebd9 657static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 658{
d1e4ebd9 659 if (imm == 0) {
660 if (rs != rt)
661 emit_mov(rs, rt);
662 }
663 else
664 emit_logicop_imm(2, rs, imm, rt);
be516ebe 665}
666
d1e4ebd9 667static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 668{
d1e4ebd9 669 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
670 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 671}
672
d1e4ebd9 673static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 674{
d1e4ebd9 675 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
676 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 677}
678
679static void emit_shlimm(u_int rs,u_int imm,u_int rt)
680{
be516ebe 681 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 682 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 683}
684
3968e69e 685static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 686{
3968e69e 687 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
688 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 689}
690
3968e69e 691static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 692{
be516ebe 693 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 694 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 695}
696
697static void emit_sarimm(u_int rs,u_int imm,u_int rt)
698{
be516ebe 699 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 700 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 701}
702
703static void emit_rorimm(u_int rs,u_int imm,u_int rt)
704{
3968e69e 705 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 706 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 707}
708
709static void emit_signextend16(u_int rs, u_int rt)
710{
711 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 712 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 713}
714
d1e4ebd9 715static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 716{
3968e69e 717 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 718 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 719}
720
d1e4ebd9 721static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 722{
d1e4ebd9 723 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
724 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 725}
726
d1e4ebd9 727static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 728{
d1e4ebd9 729 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
730 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 731}
732
d1e4ebd9 733static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 734{
d1e4ebd9 735 if (imm < 4096) {
736 assem_debug("cmp %s,%#x\n", regname[rs], imm);
737 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
738 }
739 else if (-imm < 4096) {
740 assem_debug("cmn %s,%#x\n", regname[rs], imm);
741 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
742 }
743 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 744 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 745 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
746 }
747 else {
748 host_tempreg_acquire();
749 emit_movimm(imm, HOST_TEMPREG);
750 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
751 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
752 host_tempreg_release();
753 }
be516ebe 754}
755
d1e4ebd9 756static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 757{
d1e4ebd9 758 assert(imm == 0 || imm == 1);
759 assert(cond0 < 0x10);
760 assert(cond1 < 0x10);
761 if (imm) {
762 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
763 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
764 } else {
765 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
766 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
767 }
be516ebe 768}
769
d1e4ebd9 770static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 771{
d1e4ebd9 772 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 773}
774
d1e4ebd9 775static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 776{
d1e4ebd9 777 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 778}
779
780static void emit_cmovb_imm(int imm,u_int rt)
781{
d1e4ebd9 782 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 783}
784
3968e69e 785static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 786{
3968e69e 787 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
788 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 789}
790
791static void emit_cmovne_reg(u_int rs,u_int rt)
792{
d1e4ebd9 793 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
794 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 795}
796
797static void emit_cmovl_reg(u_int rs,u_int rt)
798{
d1e4ebd9 799 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
800 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 801}
802
e3c6bdb5 803static void emit_cmovb_reg(u_int rs,u_int rt)
804{
805 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
806 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
807}
808
be516ebe 809static void emit_cmovs_reg(u_int rs,u_int rt)
810{
d1e4ebd9 811 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
812 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 813}
814
3968e69e 815static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
816{
817 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
818 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
819}
820
be516ebe 821static void emit_slti32(u_int rs,int imm,u_int rt)
822{
823 if(rs!=rt) emit_zeroreg(rt);
824 emit_cmpimm(rs,imm);
825 if(rs==rt) emit_movimm(0,rt);
826 emit_cmovl_imm(1,rt);
827}
828
829static void emit_sltiu32(u_int rs,int imm,u_int rt)
830{
831 if(rs!=rt) emit_zeroreg(rt);
832 emit_cmpimm(rs,imm);
833 if(rs==rt) emit_movimm(0,rt);
834 emit_cmovb_imm(1,rt);
835}
836
837static void emit_cmp(u_int rs,u_int rt)
838{
839 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 840 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 841}
842
882a08fc 843static void emit_cmpcs(u_int rs,u_int rt)
844{
845 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
846 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
847}
848
be516ebe 849static void emit_set_gz32(u_int rs, u_int rt)
850{
851 //assem_debug("set_gz32\n");
852 emit_cmpimm(rs,1);
853 emit_movimm(1,rt);
854 emit_cmovl_imm(0,rt);
855}
856
857static void emit_set_nz32(u_int rs, u_int rt)
858{
859 //assem_debug("set_nz32\n");
d1e4ebd9 860 if(rs!=rt) emit_mov(rs,rt);
861 emit_test(rs,rs);
862 emit_cmovne_imm(1,rt);
be516ebe 863}
864
865static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
866{
867 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
868 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
869 emit_cmp(rs1,rs2);
870 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
871 emit_cmovl_imm(1,rt);
872}
873
874static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
875{
876 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
878 emit_cmp(rs1,rs2);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovb_imm(1,rt);
881}
882
2a014d73 883static int can_jump_or_call(const void *a)
884{
885 intptr_t diff = (u_char *)a - out;
886 return (-134217728 <= diff && diff <= 134217727);
887}
888
d1e4ebd9 889static void emit_call(const void *a)
be516ebe 890{
d1e4ebd9 891 intptr_t diff = (u_char *)a - out;
892 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 893 assert(!(diff & 3));
894 if (-134217728 <= diff && diff <= 134217727)
895 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
896 else
3968e69e 897 abort();
be516ebe 898}
899
d1e4ebd9 900static void emit_jmp(const void *a)
be516ebe 901{
d1e4ebd9 902 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
903 u_int offset = genjmp(a);
904 output_w32(0x14000000 | offset);
be516ebe 905}
906
d1e4ebd9 907static void emit_jne(const void *a)
be516ebe 908{
d1e4ebd9 909 assem_debug("bne %p\n", a);
910 u_int offset = genjmpcc(a);
911 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 912}
913
7c3a5182 914static void emit_jeq(const void *a)
be516ebe 915{
d1e4ebd9 916 assem_debug("beq %p\n", a);
917 u_int offset = genjmpcc(a);
918 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 919}
920
7c3a5182 921static void emit_js(const void *a)
be516ebe 922{
d1e4ebd9 923 assem_debug("bmi %p\n", a);
924 u_int offset = genjmpcc(a);
925 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 926}
927
7c3a5182 928static void emit_jns(const void *a)
be516ebe 929{
d1e4ebd9 930 assem_debug("bpl %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 933}
934
7c3a5182 935static void emit_jl(const void *a)
be516ebe 936{
d1e4ebd9 937 assem_debug("blt %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 940}
941
7c3a5182 942static void emit_jge(const void *a)
be516ebe 943{
d1e4ebd9 944 assem_debug("bge %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 947}
948
7c3a5182 949static void emit_jno(const void *a)
be516ebe 950{
d1e4ebd9 951 assem_debug("bvc %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 954}
955
7c3a5182 956static void emit_jc(const void *a)
be516ebe 957{
d1e4ebd9 958 assem_debug("bcs %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 961}
962
3968e69e 963static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 964{
3968e69e 965 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 966 u_int offset = genjmpcc(a);
3968e69e 967 is64 = is64 ? 0x80000000 : 0;
968 isnz = isnz ? 0x01000000 : 0;
969 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
970}
971
104df9d3 972static unused void emit_cbz(const void *a, u_int r)
3968e69e 973{
974 emit_cb(0, 0, a, r);
be516ebe 975}
976
977static void emit_jmpreg(u_int r)
978{
3968e69e 979 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 980 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 981}
982
983static void emit_retreg(u_int r)
984{
d1e4ebd9 985 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 986 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
987}
988
989static void emit_ret(void)
990{
991 emit_retreg(LR);
992}
993
d1e4ebd9 994static void emit_adr(void *addr, u_int rt)
995{
996 intptr_t offset = (u_char *)addr - out;
997 assert(-1048576 <= offset && offset < 1048576);
3968e69e 998 assert(rt < 31);
d1e4ebd9 999 assem_debug("adr x%d,#%#lx\n", rt, offset);
1000 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1001}
1002
3968e69e 1003static void emit_adrp(void *addr, u_int rt)
1004{
1005 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1006 assert(-4294967296l <= offset && offset < 4294967296l);
1007 assert(rt < 31);
1008 offset >>= 12;
1009 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1010 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1011}
1012
be516ebe 1013static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1014{
d1e4ebd9 1015 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1016 assert(-256 <= offset && offset < 256);
1017 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1018}
1019
1020static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1021{
1022 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1023 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1024}
1025
1026static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1027{
1028 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1030}
1031
1032static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1033{
1034 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1036}
1037
1038static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1039{
1040 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1042}
39b71d9a 1043#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1044
1045static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1061}
1062
1063static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064{
1065 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1067}
1068
1069static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070{
1071 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1073}
1074
be516ebe 1075static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1076{
d1e4ebd9 1077 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1080}
1081
1082static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1083{
d1e4ebd9 1084 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1087}
1088
1089static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1090{
d1e4ebd9 1091 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1094}
1095
1096static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1097{
d1e4ebd9 1098 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1101}
1102
be516ebe 1103static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1104{
3968e69e 1105 if (!(offset & 3) && (u_int)offset <= 16380) {
1106 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1107 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1108 }
1109 else if (-256 <= offset && offset < 256) {
1110 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1111 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1112 }
687b4580 1113 else
1114 assert(0);
be516ebe 1115}
1116
1117static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1118{
3968e69e 1119 if (!(offset & 1) && (u_int)offset <= 8190) {
1120 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1121 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1122 }
1123 else if (-256 <= offset && offset < 256) {
1124 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1125 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1126 }
687b4580 1127 else
1128 assert(0);
be516ebe 1129}
1130
1131static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1132{
3968e69e 1133 if ((u_int)offset < 4096) {
1134 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1135 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1136 }
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1139 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 }
687b4580 1141 else
1142 assert(0);
be516ebe 1143}
1144
3968e69e 1145static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1146{
3968e69e 1147 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1149}
1150
3968e69e 1151static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1152{
3968e69e 1153 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1155}
1156
1157static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1158{
1159 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1160 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1161}
1162
1163static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1164{
1165 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1167}
1168
3968e69e 1169static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1170{
1171 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1173}
1174
1175static void emit_clz(u_int rs, u_int rt)
be516ebe 1176{
1177 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1178 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1179}
1180
be516ebe 1181// special case for checking invalid_code
d1e4ebd9 1182static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1183{
d1e4ebd9 1184 host_tempreg_acquire();
1185 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1186 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1187 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1188 emit_cmpimm(HOST_TEMPREG, imm);
1189 host_tempreg_release();
be516ebe 1190}
1191
3968e69e 1192// special for loadlr_assemble, rs2 is destroyed
1193static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1194{
3968e69e 1195 emit_shl(rs2, shift, rs2);
1196 emit_bic(rs1, rs2, rt);
be516ebe 1197}
1198
3968e69e 1199static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1200{
3968e69e 1201 emit_shr(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
be516ebe 1203}
1204
687b4580 1205static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1206{
687b4580 1207 u_int op = 0xb9000000;
d1e4ebd9 1208 unused const char *ldst = is_st ? "st" : "ld";
1209 unused char rp = is64 ? 'x' : 'w';
687b4580 1210 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1211 is64 = is64 ? 1 : 0;
1212 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1213 ofs = (ofs >> (2+is64));
687b4580 1214 if (!is_st) op |= 0x00400000;
1215 if (is64) op |= 0x40000000;
d1e4ebd9 1216 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1217}
1218
687b4580 1219static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1220{
687b4580 1221 u_int op = 0x29000000;
d1e4ebd9 1222 unused const char *ldst = is_st ? "st" : "ld";
1223 unused char rp = is64 ? 'x' : 'w';
687b4580 1224 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1225 is64 = is64 ? 1 : 0;
1226 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1227 ofs = (ofs >> (2+is64));
1228 assert(-64 <= ofs && ofs <= 63);
1229 ofs &= 0x7f;
1230 if (!is_st) op |= 0x00400000;
1231 if (is64) op |= 0x80000000;
d1e4ebd9 1232 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1233}
1234
1235static void save_load_regs_all(int is_store, u_int reglist)
1236{
1237 int ofs = 0, c = 0;
1238 u_int r, pair[2];
1239 for (r = 0; reglist; r++, reglist >>= 1) {
1240 if (reglist & 1)
1241 pair[c++] = r;
1242 if (c == 2) {
1243 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1244 ofs += 8 * 2;
1245 c = 0;
1246 }
1247 }
1248 if (c) {
1249 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1250 ofs += 8;
1251 }
1252 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1253}
1254
1255// Save registers before function call
1256static void save_regs(u_int reglist)
1257{
1258 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1259 save_load_regs_all(1, reglist);
be516ebe 1260}
1261
1262// Restore registers after function call
1263static void restore_regs(u_int reglist)
1264{
1265 reglist &= CALLER_SAVE_REGS;
687b4580 1266 save_load_regs_all(0, reglist);
be516ebe 1267}
1268
1269/* Stubs/epilogue */
1270
1271static void literal_pool(int n)
1272{
1273 (void)literals;
1274}
1275
1276static void literal_pool_jumpover(int n)
1277{
1278}
1279
d1e4ebd9 1280// parsed by get_pointer, find_extjump_insn
104df9d3 1281static void emit_extjump(u_char *addr, u_int target)
be516ebe 1282{
d1e4ebd9 1283 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1284
d1e4ebd9 1285 emit_movz(target & 0xffff, 0);
1286 emit_movk_lsl16(target >> 16, 0);
1287
1288 // addr is in the current recompiled block (max 256k)
1289 // offset shouldn't exceed +/-1MB
1290 emit_adr(addr, 1);
104df9d3 1291 emit_far_jump(dyna_linker);
be516ebe 1292}
1293
d1e4ebd9 1294static void check_extjump2(void *src)
be516ebe 1295{
d1e4ebd9 1296 u_int *ptr = src;
1297 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1298 (void)ptr;
be516ebe 1299}
1300
1301// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1302static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1303{
d1e4ebd9 1304 int diff = rt_val - rs_val;
3968e69e 1305 if ((-4096 < diff && diff < 4096)
1306 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1307 emit_addimm(rs, diff, rt);
3968e69e 1308 else if (rt_val == ~rs_val)
1309 emit_not(rs, rt);
d1e4ebd9 1310 else if (is_rotated_mask(rs_val ^ rt_val))
1311 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1312 else
d1e4ebd9 1313 emit_movimm(rt_val, rt);
be516ebe 1314}
1315
d1e4ebd9 1316// return 1 if the above function can do it's job cheaply
687b4580 1317static int is_similar_value(u_int v1, u_int v2)
be516ebe 1318{
687b4580 1319 int diff = v1 - v2;
3968e69e 1320 return (-4096 < diff && diff < 4096)
1321 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1322 || v1 == ~v2
d1e4ebd9 1323 || is_rotated_mask(v1 ^ v2);
1324}
1325
37387d8b 1326static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1327{
1328 if (rt_val < 0x100000000ull) {
1329 emit_movimm_from(rs_val, rs, rt_val, rt);
1330 return;
1331 }
1332 // just move the whole thing. At least on Linux all addresses
1333 // seem to be 48bit, so 3 insns - not great not terrible
1334 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1335 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1336 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1337 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1338 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1339 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1340 if (rt_val >> 48) {
1341 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1342 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1343 }
1344}
1345
1346// trashes x2
d1e4ebd9 1347static void pass_args64(u_int a0, u_int a1)
1348{
1349 if(a0==1&&a1==0) {
1350 // must swap
1351 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1352 }
1353 else if(a0!=0&&a1==0) {
1354 emit_mov64(a1,1);
1355 if (a0>=0) emit_mov64(a0,0);
1356 }
1357 else {
1358 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1359 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1360 }
be516ebe 1361}
1362
d1e4ebd9 1363static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1364{
1365 switch(type) {
1366 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1367 case LOADBU_STUB:
1368 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1369 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1370 case LOADHU_STUB:
1371 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1372 case LOADW_STUB:
1373 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1374 default: assert(0);
d1e4ebd9 1375 }
1376}
1377
1378#include "pcsxmem.h"
be516ebe 1379//#include "pcsxmem_inline.c"
1380
1381static void do_readstub(int n)
1382{
1383 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1384 set_jump_target(stubs[n].addr, out);
1385 enum stub_type type = stubs[n].type;
1386 int i = stubs[n].a;
1387 int rs = stubs[n].b;
1388 const struct regstat *i_regs = (void *)stubs[n].c;
1389 u_int reglist = stubs[n].e;
1390 const signed char *i_regmap = i_regs->regmap;
1391 int rt;
cf95b4f0 1392 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1393 rt=get_reg(i_regmap,FTEMP);
1394 }else{
cf95b4f0 1395 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1396 }
1397 assert(rs>=0);
1398 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1399 void *restore_jump = NULL, *handler_jump = NULL;
1400 reglist|=(1<<rs);
1401 for (r = 0; r < HOST_CCREG; r++) {
1402 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1403 temp = r;
1404 break;
1405 }
1406 }
cf95b4f0 1407 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1408 reglist&=~(1<<rt);
1409 if(temp==-1) {
1410 save_regs(reglist);
1411 regs_saved=1;
1412 temp=(rs==0)?2:0;
1413 }
1414 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1415 temp2=1;
1416 emit_readdword(&mem_rtab,temp);
1417 emit_shrimm(rs,12,temp2);
1418 emit_readdword_dualindexedx8(temp,temp2,temp2);
1419 emit_adds64(temp2,temp2,temp2);
1420 handler_jump=out;
1421 emit_jc(0);
cf95b4f0 1422 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1423 switch(type) {
1424 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1425 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1426 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1427 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1428 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1429 default: assert(0);
d1e4ebd9 1430 }
1431 }
1432 if(regs_saved) {
1433 restore_jump=out;
1434 emit_jmp(0); // jump to reg restore
1435 }
1436 else
1437 emit_jmp(stubs[n].retaddr); // return address
1438 set_jump_target(handler_jump, out);
1439
1440 if(!regs_saved)
1441 save_regs(reglist);
1442 void *handler=NULL;
1443 if(type==LOADB_STUB||type==LOADBU_STUB)
1444 handler=jump_handler_read8;
1445 if(type==LOADH_STUB||type==LOADHU_STUB)
1446 handler=jump_handler_read16;
1447 if(type==LOADW_STUB)
1448 handler=jump_handler_read32;
1449 assert(handler);
1450 pass_args64(rs,temp2);
1451 int cc=get_reg(i_regmap,CCREG);
1452 if(cc<0)
1453 emit_loadreg(CCREG,2);
2330734f 1454 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1455 emit_far_call(handler);
d1e4ebd9 1456 // (no cycle reload after read)
cf95b4f0 1457 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1458 loadstore_extend(type,0,rt);
1459 }
1460 if(restore_jump)
1461 set_jump_target(restore_jump, out);
1462 restore_regs(reglist);
1463 emit_jmp(stubs[n].retaddr);
be516ebe 1464}
1465
81dbbf4c 1466static void inline_readstub(enum stub_type type, int i, u_int addr,
1467 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1468{
d1e4ebd9 1469 int rs=get_reg(regmap,target);
1470 int rt=get_reg(regmap,target);
9de8a0c3 1471 if(rs<0) rs=get_reg_temp(regmap);
d1e4ebd9 1472 assert(rs>=0);
1473 u_int is_dynamic=0;
1474 uintptr_t host_addr = 0;
1475 void *handler;
1476 int cc=get_reg(regmap,CCREG);
2330734f 1477 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1478 // return;
1479 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1480 if (handler == NULL) {
cf95b4f0 1481 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1482 return;
37387d8b 1483 if (addr != host_addr)
1484 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1485 switch(type) {
1486 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1487 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1488 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1489 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1490 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1491 default: assert(0);
1492 }
1493 return;
1494 }
37387d8b 1495 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1496 if (is_dynamic) {
d1e4ebd9 1497 if(type==LOADB_STUB||type==LOADBU_STUB)
1498 handler=jump_handler_read8;
1499 if(type==LOADH_STUB||type==LOADHU_STUB)
1500 handler=jump_handler_read16;
1501 if(type==LOADW_STUB)
1502 handler=jump_handler_read32;
1503 }
1504
1505 // call a memhandler
cf95b4f0 1506 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1507 reglist&=~(1<<rt);
1508 save_regs(reglist);
1509 if(target==0)
1510 emit_movimm(addr,0);
1511 else if(rs!=0)
1512 emit_mov(rs,0);
1513 if(cc<0)
1514 emit_loadreg(CCREG,2);
2330734f 1515 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1516 if(is_dynamic) {
1517 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1518 emit_adrp((void *)l1, 1);
1519 emit_addimm64(1, l1 & 0xfff, 1);
1520 }
d1e4ebd9 1521 else
2a014d73 1522 emit_far_call(do_memhandler_pre);
d1e4ebd9 1523
2a014d73 1524 emit_far_call(handler);
d1e4ebd9 1525
1526 // (no cycle reload after read)
cf95b4f0 1527 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1528 loadstore_extend(type, 0, rt);
1529 restore_regs(reglist);
be516ebe 1530}
1531
1532static void do_writestub(int n)
1533{
1534 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1535 set_jump_target(stubs[n].addr, out);
1536 enum stub_type type=stubs[n].type;
1537 int i=stubs[n].a;
1538 int rs=stubs[n].b;
1539 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1540 u_int reglist=stubs[n].e;
1541 signed char *i_regmap=i_regs->regmap;
1542 int rt,r;
cf95b4f0 1543 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1544 rt=get_reg(i_regmap,r=FTEMP);
1545 }else{
cf95b4f0 1546 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1547 }
1548 assert(rs>=0);
1549 assert(rt>=0);
1550 int rtmp,temp=-1,temp2,regs_saved=0;
1551 void *restore_jump = NULL, *handler_jump = NULL;
1552 int reglist2=reglist|(1<<rs)|(1<<rt);
1553 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1554 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1555 temp = rtmp;
1556 break;
1557 }
1558 }
1559 if(temp==-1) {
1560 save_regs(reglist);
1561 regs_saved=1;
1562 for(rtmp=0;rtmp<=3;rtmp++)
1563 if(rtmp!=rs&&rtmp!=rt)
1564 {temp=rtmp;break;}
1565 }
1566 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1567 temp2=3;
1568 else {
1569 host_tempreg_acquire();
1570 temp2=HOST_TEMPREG;
1571 }
1572 emit_readdword(&mem_wtab,temp);
1573 emit_shrimm(rs,12,temp2);
1574 emit_readdword_dualindexedx8(temp,temp2,temp2);
1575 emit_adds64(temp2,temp2,temp2);
1576 handler_jump=out;
1577 emit_jc(0);
1578 switch(type) {
1579 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1580 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1581 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1582 default: assert(0);
1583 }
1584 if(regs_saved) {
1585 restore_jump=out;
1586 emit_jmp(0); // jump to reg restore
1587 }
1588 else
1589 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1590 set_jump_target(handler_jump, out);
1591
d1e4ebd9 1592 if(!regs_saved)
1593 save_regs(reglist);
1594 void *handler=NULL;
1595 switch(type) {
1596 case STOREB_STUB: handler=jump_handler_write8; break;
1597 case STOREH_STUB: handler=jump_handler_write16; break;
1598 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1599 default: assert(0);
d1e4ebd9 1600 }
1601 assert(handler);
1602 pass_args(rs,rt);
1603 if(temp2!=3) {
1604 emit_mov64(temp2,3);
1605 host_tempreg_release();
1606 }
1607 int cc=get_reg(i_regmap,CCREG);
1608 if(cc<0)
1609 emit_loadreg(CCREG,2);
2330734f 1610 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1611 // returns new cycle_count
2a014d73 1612 emit_far_call(handler);
2330734f 1613 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1614 if(cc<0)
1615 emit_storereg(CCREG,2);
1616 if(restore_jump)
1617 set_jump_target(restore_jump, out);
1618 restore_regs(reglist);
1619 emit_jmp(stubs[n].retaddr);
be516ebe 1620}
1621
81dbbf4c 1622static void inline_writestub(enum stub_type type, int i, u_int addr,
1623 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1624{
9de8a0c3 1625 int rs = get_reg_temp(regmap);
687b4580 1626 int rt = get_reg(regmap,target);
1627 assert(rs >= 0);
1628 assert(rt >= 0);
1629 uintptr_t host_addr = 0;
1630 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1631 if (handler == NULL) {
37387d8b 1632 if (addr != host_addr)
1633 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1634 switch (type) {
687b4580 1635 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1636 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1637 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1638 default: assert(0);
1639 }
1640 return;
1641 }
1642
1643 // call a memhandler
1644 save_regs(reglist);
687b4580 1645 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1646 loadstore_extend(type, rt, 0);
1647 int cc, cc_use;
1648 cc = cc_use = get_reg(regmap, CCREG);
1649 if (cc < 0)
1650 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1651 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1652
2a014d73 1653 emit_far_call(do_memhandler_pre);
1654 emit_far_call(handler);
1655 emit_far_call(do_memhandler_post);
2330734f 1656 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1657 if (cc < 0)
1658 emit_storereg(CCREG, cc_use);
687b4580 1659 restore_regs(reglist);
be516ebe 1660}
1661
3968e69e 1662/* Special assem */
1663
81dbbf4c 1664static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1665{
1666 save_load_regs_all(1, reglist);
32631e6a 1667 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1668#ifdef PCNT
1669 emit_movimm(op, 0);
2a014d73 1670 emit_far_call(pcnt_gte_start);
3968e69e 1671#endif
1672 // pointer to cop2 regs
1673 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1674}
1675
1676static void c2op_epilogue(u_int op,u_int reglist)
1677{
1678#ifdef PCNT
1679 emit_movimm(op, 0);
2a014d73 1680 emit_far_call(pcnt_gte_end);
3968e69e 1681#endif
1682 save_load_regs_all(0, reglist);
be516ebe 1683}
1684
81dbbf4c 1685static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1686{
3968e69e 1687 u_int c2op=source[i]&0x3f;
1688 u_int hr,reglist_full=0,reglist;
1689 int need_flags,need_ir;
1690 for(hr=0;hr<HOST_REGS;hr++) {
1691 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1692 }
1693 reglist=reglist_full&CALLER_SAVE_REGS;
1694
1695 if (gte_handlers[c2op]!=NULL) {
1696 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1697 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1698 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1699 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1700 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1701 need_flags=0;
1702 //int shift = (source[i] >> 19) & 1;
1703 //int lm = (source[i] >> 10) & 1;
1704 switch(c2op) {
1705 default:
1706 (void)need_ir;
81dbbf4c 1707 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1708 emit_movimm(source[i],1); // opcode
1709 emit_writeword(1,&psxRegs.code);
2a014d73 1710 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1711 break;
1712 }
1713 c2op_epilogue(c2op,reglist);
1714 }
1715}
1716
1717static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1718{
1719 //value = value & 0x7ffff000;
1720 //if (value & 0x7f87e000) value |= 0x80000000;
1721 emit_andimm(sl, 0x7fffe000, temp);
1722 emit_testimm(temp, 0xff87ffff);
1723 emit_andimm(sl, 0x7ffff000, temp);
1724 host_tempreg_acquire();
1725 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1726 emit_cmovne_reg(HOST_TEMPREG, temp);
1727 host_tempreg_release();
1728 assert(0); // testing needed
1729}
1730
1731static void do_mfc2_31_one(u_int copr,signed char temp)
1732{
1733 emit_readshword(&reg_cop2d[copr],temp);
1734 emit_bicsar_imm(temp,31,temp);
1735 emit_cmpimm(temp,0xf80);
1736 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1737 emit_andimm(temp,0xf80,temp);
1738}
1739
1740static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1741{
1742 if (temp < 0) {
1743 host_tempreg_acquire();
1744 temp = HOST_TEMPREG;
1745 }
1746 do_mfc2_31_one(9,temp);
1747 emit_shrimm(temp,7,tl);
1748 do_mfc2_31_one(10,temp);
1749 emit_orrshr_imm(temp,2,tl);
1750 do_mfc2_31_one(11,temp);
1751 emit_orrshl_imm(temp,3,tl);
1752 emit_writeword(tl,&reg_cop2d[29]);
1753
1754 if (temp == HOST_TEMPREG)
1755 host_tempreg_release();
be516ebe 1756}
1757
2330734f 1758static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1759{
3968e69e 1760 // case 0x18: MULT
1761 // case 0x19: MULTU
1762 // case 0x1A: DIV
1763 // case 0x1B: DIVU
cf95b4f0 1764 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1765 {
cf95b4f0 1766 switch(dops[i].opcode2)
3968e69e 1767 {
1768 case 0x18: // MULT
1769 case 0x19: // MULTU
1770 {
cf95b4f0 1771 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1772 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1773 signed char hi=get_reg(i_regs->regmap,HIREG);
1774 signed char lo=get_reg(i_regs->regmap,LOREG);
1775 assert(m1>=0);
1776 assert(m2>=0);
1777 assert(hi>=0);
1778 assert(lo>=0);
1779
cf95b4f0 1780 if(dops[i].opcode2==0x18) // MULT
3968e69e 1781 emit_smull(m1,m2,hi);
1782 else // MULTU
1783 emit_umull(m1,m2,hi);
1784
1785 emit_mov(hi,lo);
1786 emit_shrimm64(hi,32,hi);
1787 break;
1788 }
1789 case 0x1A: // DIV
1790 case 0x1B: // DIVU
1791 {
cf95b4f0 1792 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1793 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1794 signed char quotient=get_reg(i_regs->regmap,LOREG);
1795 signed char remainder=get_reg(i_regs->regmap,HIREG);
1796 assert(numerator>=0);
1797 assert(denominator>=0);
1798 assert(quotient>=0);
1799 assert(remainder>=0);
1800
cf95b4f0 1801 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1802 emit_sdiv(numerator,denominator,quotient);
1803 else // DIVU
1804 emit_udiv(numerator,denominator,quotient);
1805 emit_msub(quotient,denominator,numerator,remainder);
1806
1807 // div 0 quotient (remainder is already correct)
1808 host_tempreg_acquire();
cf95b4f0 1809 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1810 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1811 else
1812 emit_movimm(~0,HOST_TEMPREG);
1813 emit_test(denominator,denominator);
1814 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1815 host_tempreg_release();
1816 break;
1817 }
1818 default:
1819 assert(0);
1820 }
1821 }
1822 else
1823 {
1824 signed char hr=get_reg(i_regs->regmap,HIREG);
1825 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1826 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1827 {
cf95b4f0 1828 if (dops[i].rs1) {
1829 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1830 assert(numerator >= 0);
1831 if (hr >= 0)
1832 emit_mov(numerator,hr);
1833 if (lr >= 0) {
cf95b4f0 1834 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1835 emit_sub_asrimm(0,numerator,31,lr);
1836 else
1837 emit_movimm(~0,lr);
1838 }
1839 }
1840 else {
1841 if (hr >= 0) emit_zeroreg(hr);
1842 if (lr >= 0) emit_movimm(~0,lr);
1843 }
1844 }
1845 else
1846 {
1847 // Multiply by zero is zero.
1848 if (hr >= 0) emit_zeroreg(hr);
1849 if (lr >= 0) emit_zeroreg(lr);
1850 }
1851 }
be516ebe 1852}
1853#define multdiv_assemble multdiv_assemble_arm64
1854
d1e4ebd9 1855static void do_jump_vaddr(u_int rs)
1856{
1857 if (rs != 0)
1858 emit_mov(rs, 0);
104df9d3 1859 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1860 emit_jmpreg(0);
1861}
1862
be516ebe 1863static void do_preload_rhash(u_int r) {
1864 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1865 // register. On ARM the hash can be done with a single instruction (below)
1866}
1867
1868static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1869 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1870}
1871
1872static void do_rhash(u_int rs,u_int rh) {
1873 emit_andimm(rs, 0xf8, rh);
1874}
1875
d1e4ebd9 1876static void do_miniht_load(int ht, u_int rh) {
1877 emit_add64(ht, rh, ht);
1878 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1879}
1880
d1e4ebd9 1881static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1882 emit_cmp(rh, rs);
1883 void *jaddr = out;
1884 emit_jeq(0);
1885 do_jump_vaddr(rs);
1886
1887 set_jump_target(jaddr, out);
1888 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1889 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1890 emit_jmpreg(ht);
be516ebe 1891}
1892
d1e4ebd9 1893// parsed by set_jump_target?
be516ebe 1894static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1895 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1896 emit_movk(return_address&0xffff,rt);
1897 add_to_linker(out,return_address,1);
1898 emit_adr(out,temp);
1899 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1900 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1901}
1902
919981d0 1903static void clear_cache_arm64(char *start, char *end)
be516ebe 1904{
919981d0 1905 // Don't rely on GCC's __clear_cache implementation, as it caches
1906 // icache/dcache cache line sizes, that can vary between cores on
1907 // big.LITTLE architectures.
1908 uint64_t addr, ctr_el0;
1909 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1910 size_t isize, dsize;
1911
1912 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1913 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1914 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1915
1916 // use the global minimum cache line size
1917 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1918 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1919
1920 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1921 not required for instruction to data coherence. */
1922 if ((ctr_el0 & (1 << 28)) == 0x0) {
1923 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1924 for (; addr < (uint64_t)end; addr += dsize)
1925 // use "civac" instead of "cvau", as this is the suggested workaround for
1926 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1927 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 1928 }
919981d0 1929 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1930
919981d0 1931 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1932 Unification is not required for instruction to data coherence. */
1933 if ((ctr_el0 & (1 << 29)) == 0x0) {
1934 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1935 for (; addr < (uint64_t)end; addr += isize)
1936 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1937
1938 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1939 }
919981d0 1940
1941 __asm__ volatile("isb" : : : "memory");
be516ebe 1942}
1943
1944// CPU-architecture-specific initialization
2a014d73 1945static void arch_init(void)
1946{
1947 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
3039c914 1948 struct tramp_insns *ops = ndrc->tramp.ops, *opsw;
2a014d73 1949 size_t i;
1950 assert(!(diff & 3));
3039c914 1951 opsw = start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 1952 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
3039c914 1953 opsw[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1954 opsw[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 1955 }
1956 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 1957}
1958
1959// vim:shiftwidth=2:expandtab