drc: rework smc checks again
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
d1e4ebd9 26void do_memhandler_pre();
27void do_memhandler_post();
be516ebe 28
29/* Linker */
d1e4ebd9 30static void set_jump_target(void *addr, void *target)
be516ebe 31{
d9e2b173 32 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 33 intptr_t offset = (u_char *)target - (u_char *)addr;
34
3968e69e 35 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 36 assert(offset>=-134217728LL&&offset<134217728LL);
37 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
38 }
3968e69e 39 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
40 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 41 // Conditional branch are limited to +/- 1MB
42 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 43 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 44 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 45 assert(-1048576 <= offset && offset < 1048576);
4a2e3735 46 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
d1e4ebd9 47 }
3968e69e 48 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 49 // generated by do_miniht_insert
50 assert(offset>=-1048576LL&&offset<1048576LL);
51 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
52 }
53 else
3968e69e 54 abort(); // should not happen
be516ebe 55}
56
57// from a pointer to external jump stub (which was produced by emit_extjump2)
58// find where the jumping insn is
59static void *find_extjump_insn(void *stub)
60{
d1e4ebd9 61 int *ptr = (int *)stub + 2;
62 assert((*ptr&0x9f000000) == 0x10000000); // adr
63 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
64 return ptr + offset / 4;
be516ebe 65}
66
104df9d3 67#if 0
be516ebe 68// find where external branch is liked to using addr of it's stub:
3968e69e 69// get address that the stub loads (dyna_linker arg1),
be516ebe 70// treat it as a pointer to branch insn,
71// return addr where that branch jumps to
72static void *get_pointer(void *stub)
73{
d1e4ebd9 74 int *i_ptr = find_extjump_insn(stub);
3968e69e 75 if ((*i_ptr&0xfc000000) == 0x14000000) // b
76 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
77 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
78 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
79 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 80 assert(0);
81 return NULL;
82}
104df9d3 83#endif
be516ebe 84
be516ebe 85// Allocate a specific ARM register.
86static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
87{
88 int n;
89 int dirty=0;
90
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
93 {
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
96 cur->regmap[n]=-1;
97 }
98 }
99
100 cur->regmap[hr]=reg;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
104}
105
106// Alloc cycle count into dedicated register
107static void alloc_cc(struct regstat *cur,int i)
108{
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
110}
111
112/* Special alloc */
113
114
115/* Assembler */
116
117static unused const char *regname[32] = {
d1e4ebd9 118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
122};
123
124static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129};
130
131enum {
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
134};
135
136static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 139};
140
be516ebe 141static void output_w32(u_int word)
142{
d9e2b173 143 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 144 out += 4;
145}
146
3968e69e 147static u_int rn_rd(u_int rn, u_int rd)
148{
149 assert(rn < 31);
150 assert(rd < 31);
151 return (rn << 5) | rd;
152}
153
be516ebe 154static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
155{
d1e4ebd9 156 assert(rm < 32);
157 assert(rn < 32);
158 assert(rd < 32);
be516ebe 159 return (rm << 16) | (rn << 5) | rd;
160}
161
3968e69e 162static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
163{
164 assert(ra < 32);
165 return rm_rn_rd(rm, rn, rd) | (ra << 10);
166}
167
d1e4ebd9 168static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
169{
170 assert(imm7 < 0x80);
171 assert(rt2 < 31);
172 assert(rn < 32);
173 assert(rt < 31);
174 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
175}
176
687b4580 177static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
178{
179 assert(imm6 <= 63);
180 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
181}
182
be516ebe 183static u_int imm16_rd(u_int imm16, u_int rd)
184{
185 assert(imm16 < 0x10000);
186 assert(rd < 31);
187 return (imm16 << 5) | rd;
188}
189
687b4580 190static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
191{
192 assert(imm12 < 0x1000);
d1e4ebd9 193 assert(rn < 32);
194 assert(rd < 32);
195 return (imm12 << 10) | (rn << 5) | rd;
196}
197
198static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
199{
200 assert(imm9 < 0x200);
687b4580 201 assert(rn < 31);
202 assert(rd < 31);
d1e4ebd9 203 return (imm9 << 12) | (rn << 5) | rd;
687b4580 204}
205
d1e4ebd9 206static u_int imm19_rt(u_int imm19, u_int rt)
207{
208 assert(imm19 < 0x80000);
209 assert(rt < 31);
210 return (imm19 << 5) | rt;
211}
212
213static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
214{
215 assert(n < 2);
216 assert(immr < 0x40);
217 assert(imms < 0x40);
218 assert(rn < 32);
219 assert(rd < 32);
220 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
221}
222
223static u_int genjmp(const u_char *addr)
be516ebe 224{
225 intptr_t offset = addr - out;
d1e4ebd9 226 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 227 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 228 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 abort();
be516ebe 230 return 0;
231 }
d1e4ebd9 232 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 233}
234
d1e4ebd9 235static u_int genjmpcc(const u_char *addr)
be516ebe 236{
237 intptr_t offset = addr - out;
d1e4ebd9 238 if ((uintptr_t)addr < 3) return 0;
be516ebe 239 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 240 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 abort();
242 return 0;
243 }
244 return ((u_int)offset >> 2) & 0x7ffff;
245}
246
247static uint32_t is_mask(u_int value)
248{
249 return value && ((value + 1) & value) == 0;
250}
251
252// This function returns true if the argument contains a
253// non-empty sequence of ones (possibly rotated) with the remainder zero.
254static uint32_t is_rotated_mask(u_int value)
255{
3968e69e 256 if (value == 0 || value == ~0)
be516ebe 257 return 0;
d1e4ebd9 258 if (is_mask((value - 1) | value))
259 return 1;
260 return is_mask((~value - 1) | ~value);
261}
262
263static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
264{
265 int lzeros, tzeros, ones;
266 assert(value != 0);
267 if (is_mask((value - 1) | value)) {
268 lzeros = __builtin_clz(value);
269 tzeros = __builtin_ctz(value);
270 ones = 32 - lzeros - tzeros;
271 *immr = (32 - tzeros) & 31;
272 *imms = ones - 1;
273 return;
be516ebe 274 }
d1e4ebd9 275 value = ~value;
276 if (is_mask((value - 1) | value)) {
277 lzeros = __builtin_clz(value);
278 tzeros = __builtin_ctz(value);
279 ones = 32 - lzeros - tzeros;
3968e69e 280 *immr = lzeros;
d1e4ebd9 281 *imms = 31 - ones;
282 return;
283 }
3968e69e 284 abort();
be516ebe 285}
286
287static void emit_mov(u_int rs, u_int rt)
288{
687b4580 289 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 290 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
291}
292
293static void emit_mov64(u_int rs, u_int rt)
294{
295 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
296 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 297}
298
687b4580 299static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 300{
d1e4ebd9 301 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
302 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 303}
304
d1e4ebd9 305static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 306{
d1e4ebd9 307 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
308 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 309}
310
d1e4ebd9 311static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 312{
3968e69e 313 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 314 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
315}
39b71d9a 316#define emit_adds_ptr emit_adds64
d1e4ebd9 317
318static void emit_neg(u_int rs, u_int rt)
319{
320 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
321 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 322}
323
687b4580 324static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 325{
d1e4ebd9 326 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 327 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 328}
329
3968e69e 330static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
331{
332 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
333 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
334}
335
d1e4ebd9 336static void emit_movz(u_int imm, u_int rt)
be516ebe 337{
d1e4ebd9 338 assem_debug("movz %s,#%#x\n", regname[rt], imm);
339 output_w32(0x52800000 | imm16_rd(imm, rt));
340}
341
342static void emit_movz_lsl16(u_int imm, u_int rt)
343{
344 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
345 output_w32(0x52a00000 | imm16_rd(imm, rt));
346}
347
348static void emit_movn(u_int imm, u_int rt)
349{
350 assem_debug("movn %s,#%#x\n", regname[rt], imm);
351 output_w32(0x12800000 | imm16_rd(imm, rt));
352}
353
354static void emit_movn_lsl16(u_int imm,u_int rt)
355{
356 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
357 output_w32(0x12a00000 | imm16_rd(imm, rt));
358}
359
360static void emit_movk(u_int imm,u_int rt)
361{
362 assem_debug("movk %s,#%#x\n", regname[rt], imm);
363 output_w32(0x72800000 | imm16_rd(imm, rt));
364}
365
366static void emit_movk_lsl16(u_int imm,u_int rt)
367{
368 assert(imm<65536);
3968e69e 369 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 370 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 371}
372
373static void emit_zeroreg(u_int rt)
374{
d1e4ebd9 375 emit_movz(0, rt);
be516ebe 376}
377
be516ebe 378static void emit_movimm(u_int imm, u_int rt)
379{
d1e4ebd9 380 if (imm < 65536)
381 emit_movz(imm, rt);
382 else if ((~imm) < 65536)
383 emit_movn(~imm, rt);
384 else if ((imm&0xffff) == 0)
385 emit_movz_lsl16(imm >> 16, rt);
386 else if (((~imm)&0xffff) == 0)
387 emit_movn_lsl16(~imm >> 16, rt);
388 else if (is_rotated_mask(imm)) {
389 u_int immr, imms;
390 gen_logical_imm(imm, &immr, &imms);
391 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
392 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
393 }
be516ebe 394 else {
d1e4ebd9 395 emit_movz(imm & 0xffff, rt);
396 emit_movk_lsl16(imm >> 16, rt);
be516ebe 397 }
398}
399
aaece508 400static void emit_movimm64(uint64_t imm, u_int rt)
401{
402 u_int shift, op, imm16, insns = 0;
403 for (shift = 0; shift < 4; shift++) {
404 imm16 = (imm >> shift * 16) & 0xffff;
405 if (!imm16)
406 continue;
407 op = insns ? 0xf2800000 : 0xd2800000;
408 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
409 if (shift)
410 assem_debug(",lsl #%u", shift * 16);
411 assem_debug("\n");
412 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
413 insns++;
414 }
415 if (!insns) {
416 assem_debug("movz %s,#0\n", regname64[rt]);
417 output_w32(0xd2800000 | imm16_rd(0, rt));
418 }
419}
420
687b4580 421static void emit_readword(void *addr, u_int rt)
422{
423 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
424 if (!(offset & 3) && offset <= 16380) {
425 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
426 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
427 }
428 else
3968e69e 429 abort();
687b4580 430}
431
d1e4ebd9 432static void emit_readdword(void *addr, u_int rt)
433{
434 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
435 if (!(offset & 7) && offset <= 32760) {
436 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
437 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 }
3968e69e 439 else
440 abort();
441}
39b71d9a 442#define emit_readptr emit_readdword
3968e69e 443
444static void emit_readshword(void *addr, u_int rt)
445{
446 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
447 if (!(offset & 1) && offset <= 8190) {
448 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
449 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
450 }
d1e4ebd9 451 else
452 assert(0);
453}
454
be516ebe 455static void emit_loadreg(u_int r, u_int hr)
456{
d1e4ebd9 457 int is64 = 0;
be516ebe 458 if (r == 0)
459 emit_zeroreg(hr);
460 else {
33788798 461 void *addr;
be516ebe 462 switch (r) {
7c3a5182 463 //case HIREG: addr = &hi; break;
464 //case LOREG: addr = &lo; break;
be516ebe 465 case CCREG: addr = &cycle_count; break;
466 case CSREG: addr = &Status; break;
d1e4ebd9 467 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 468 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 469 default:
470 assert(r < 34);
471 addr = &psxRegs.GPR.r[r];
472 break;
be516ebe 473 }
d1e4ebd9 474 if (is64)
475 emit_readdword(addr, hr);
476 else
477 emit_readword(addr, hr);
be516ebe 478 }
479}
480
687b4580 481static void emit_writeword(u_int rt, void *addr)
482{
483 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
484 if (!(offset & 3) && offset <= 16380) {
485 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
486 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
487 }
488 else
489 assert(0);
490}
491
d1e4ebd9 492static void emit_writedword(u_int rt, void *addr)
493{
494 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
495 if (!(offset & 7) && offset <= 32760) {
496 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 497 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 498 }
499 else
3968e69e 500 abort();
d1e4ebd9 501}
502
687b4580 503static void emit_storereg(u_int r, u_int hr)
be516ebe 504{
505 assert(r < 64);
7c3a5182 506 void *addr = &psxRegs.GPR.r[r];
be516ebe 507 switch (r) {
7c3a5182 508 //case HIREG: addr = &hi; break;
509 //case LOREG: addr = &lo; break;
be516ebe 510 case CCREG: addr = &cycle_count; break;
7c3a5182 511 default: assert(r < 34); break;
be516ebe 512 }
687b4580 513 emit_writeword(hr, addr);
be516ebe 514}
515
516static void emit_test(u_int rs, u_int rt)
517{
d1e4ebd9 518 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
519 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 520}
521
d1e4ebd9 522static void emit_testimm(u_int rs, u_int imm)
be516ebe 523{
d1e4ebd9 524 u_int immr, imms;
687b4580 525 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 526 assert(is_rotated_mask(imm)); // good enough for PCSX
527 gen_logical_imm(imm, &immr, &imms);
3968e69e 528 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 529}
530
531static void emit_not(u_int rs,u_int rt)
532{
533 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 534 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 535}
536
be516ebe 537static void emit_and(u_int rs1,u_int rs2,u_int rt)
538{
539 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 540 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 541}
542
543static void emit_or(u_int rs1,u_int rs2,u_int rt)
544{
545 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 546 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 547}
548
3968e69e 549static void emit_bic(u_int rs1,u_int rs2,u_int rt)
550{
551 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
552 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
553}
554
be516ebe 555static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
556{
be516ebe 557 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 558 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 559}
560
561static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
562{
be516ebe 563 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 564 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 565}
566
3968e69e 567static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
568{
569 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
570 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
571}
572
be516ebe 573static void emit_xor(u_int rs1,u_int rs2,u_int rt)
574{
575 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 576 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 577}
578
3968e69e 579static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
580{
581 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
582 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
583}
584
d1e4ebd9 585static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 586{
d1e4ebd9 587 unused const char *st = s ? "s" : "";
588 s = s ? 0x20000000 : 0;
589 is64 = is64 ? 0x80000000 : 0;
687b4580 590 if (imm < 4096) {
d1e4ebd9 591 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
592 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 593 }
594 else if (-imm < 4096) {
3968e69e 595 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 596 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
597 }
598 else if (imm < 16777216) {
599 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
600 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 603 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 604 }
605 }
606 else if (-imm < 16777216) {
607 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
608 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
609 if ((imm & 0xfff) || s) {
610 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
611 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
612 }
687b4580 613 }
614 else
3968e69e 615 abort();
be516ebe 616}
617
d1e4ebd9 618static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
619{
9b495f6e 620 if (imm == 0) {
621 emit_mov(rs, rt);
622 return;
623 }
d1e4ebd9 624 emit_addimm_s(0, 0, rs, imm, rt);
625}
626
627static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
628{
629 emit_addimm_s(0, 1, rs, imm, rt);
630}
631
be516ebe 632static void emit_addimm_and_set_flags(int imm, u_int rt)
633{
d1e4ebd9 634 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 635}
636
d1e4ebd9 637static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 638{
d1e4ebd9 639 const char *names[] = { "and", "orr", "eor", "ands" };
640 const char *name = names[op];
641 u_int immr, imms;
642 op = op << 29;
643 if (is_rotated_mask(imm)) {
644 gen_logical_imm(imm, &immr, &imms);
645 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
646 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
647 }
648 else {
649 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
650 host_tempreg_acquire();
651 emit_movimm(imm, HOST_TEMPREG);
652 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
653 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_release();
656 }
657 (void)name;
be516ebe 658}
659
d1e4ebd9 660static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 661{
d1e4ebd9 662 if (imm == 0)
663 emit_zeroreg(rt);
664 else
665 emit_logicop_imm(0, rs, imm, rt);
be516ebe 666}
667
d1e4ebd9 668static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 669{
d1e4ebd9 670 if (imm == 0) {
671 if (rs != rt)
672 emit_mov(rs, rt);
673 }
674 else
675 emit_logicop_imm(1, rs, imm, rt);
be516ebe 676}
677
d1e4ebd9 678static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 679{
d1e4ebd9 680 if (imm == 0) {
681 if (rs != rt)
682 emit_mov(rs, rt);
683 }
684 else
685 emit_logicop_imm(2, rs, imm, rt);
be516ebe 686}
687
d1e4ebd9 688static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 689{
d1e4ebd9 690 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 692}
693
d1e4ebd9 694static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 695{
d1e4ebd9 696 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 698}
699
700static void emit_shlimm(u_int rs,u_int imm,u_int rt)
701{
be516ebe 702 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 704}
705
3968e69e 706static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 707{
3968e69e 708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 710}
711
3968e69e 712static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 713{
be516ebe 714 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 715 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 716}
717
718static void emit_sarimm(u_int rs,u_int imm,u_int rt)
719{
be516ebe 720 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 722}
723
724static void emit_rorimm(u_int rs,u_int imm,u_int rt)
725{
3968e69e 726 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 727 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 728}
729
730static void emit_signextend16(u_int rs, u_int rt)
731{
732 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 733 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 734}
735
d1e4ebd9 736static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 737{
3968e69e 738 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 739 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 740}
741
d1e4ebd9 742static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 743{
d1e4ebd9 744 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 746}
747
d1e4ebd9 748static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 749{
d1e4ebd9 750 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
751 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 752}
753
d1e4ebd9 754static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 755{
d1e4ebd9 756 if (imm < 4096) {
757 assem_debug("cmp %s,%#x\n", regname[rs], imm);
758 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
759 }
760 else if (-imm < 4096) {
761 assem_debug("cmn %s,%#x\n", regname[rs], imm);
762 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
763 }
764 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 765 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 766 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
767 }
768 else {
769 host_tempreg_acquire();
770 emit_movimm(imm, HOST_TEMPREG);
771 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
772 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
773 host_tempreg_release();
774 }
be516ebe 775}
776
d1e4ebd9 777static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 778{
d1e4ebd9 779 assert(imm == 0 || imm == 1);
780 assert(cond0 < 0x10);
781 assert(cond1 < 0x10);
782 if (imm) {
783 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
784 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
785 } else {
786 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
787 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
788 }
be516ebe 789}
790
d1e4ebd9 791static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 792{
d1e4ebd9 793 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 794}
795
d1e4ebd9 796static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 797{
d1e4ebd9 798 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 799}
800
801static void emit_cmovb_imm(int imm,u_int rt)
802{
d1e4ebd9 803 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 804}
805
3968e69e 806static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 807{
3968e69e 808 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 810}
811
812static void emit_cmovne_reg(u_int rs,u_int rt)
813{
d1e4ebd9 814 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 816}
817
818static void emit_cmovl_reg(u_int rs,u_int rt)
819{
d1e4ebd9 820 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 822}
823
e3c6bdb5 824static void emit_cmovb_reg(u_int rs,u_int rt)
825{
826 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
828}
829
be516ebe 830static void emit_cmovs_reg(u_int rs,u_int rt)
831{
d1e4ebd9 832 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
833 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 834}
835
3968e69e 836static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
837{
838 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
839 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
840}
841
be516ebe 842static void emit_slti32(u_int rs,int imm,u_int rt)
843{
844 if(rs!=rt) emit_zeroreg(rt);
845 emit_cmpimm(rs,imm);
846 if(rs==rt) emit_movimm(0,rt);
847 emit_cmovl_imm(1,rt);
848}
849
850static void emit_sltiu32(u_int rs,int imm,u_int rt)
851{
852 if(rs!=rt) emit_zeroreg(rt);
853 emit_cmpimm(rs,imm);
854 if(rs==rt) emit_movimm(0,rt);
855 emit_cmovb_imm(1,rt);
856}
857
858static void emit_cmp(u_int rs,u_int rt)
859{
860 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 861 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 862}
863
882a08fc 864static void emit_cmpcs(u_int rs,u_int rt)
865{
866 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
867 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
868}
869
be516ebe 870static void emit_set_gz32(u_int rs, u_int rt)
871{
872 //assem_debug("set_gz32\n");
873 emit_cmpimm(rs,1);
874 emit_movimm(1,rt);
875 emit_cmovl_imm(0,rt);
876}
877
878static void emit_set_nz32(u_int rs, u_int rt)
879{
880 //assem_debug("set_nz32\n");
d1e4ebd9 881 if(rs!=rt) emit_mov(rs,rt);
882 emit_test(rs,rs);
883 emit_cmovne_imm(1,rt);
be516ebe 884}
885
886static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
887{
888 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
889 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
890 emit_cmp(rs1,rs2);
891 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
892 emit_cmovl_imm(1,rt);
893}
894
895static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
896{
897 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
898 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
899 emit_cmp(rs1,rs2);
900 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
901 emit_cmovb_imm(1,rt);
902}
903
2a014d73 904static int can_jump_or_call(const void *a)
905{
906 intptr_t diff = (u_char *)a - out;
907 return (-134217728 <= diff && diff <= 134217727);
908}
909
d1e4ebd9 910static void emit_call(const void *a)
be516ebe 911{
d1e4ebd9 912 intptr_t diff = (u_char *)a - out;
913 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 914 assert(!(diff & 3));
915 if (-134217728 <= diff && diff <= 134217727)
916 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
917 else
3968e69e 918 abort();
be516ebe 919}
920
d1e4ebd9 921static void emit_jmp(const void *a)
be516ebe 922{
d1e4ebd9 923 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
924 u_int offset = genjmp(a);
925 output_w32(0x14000000 | offset);
be516ebe 926}
927
d1e4ebd9 928static void emit_jne(const void *a)
be516ebe 929{
d1e4ebd9 930 assem_debug("bne %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 933}
934
7c3a5182 935static void emit_jeq(const void *a)
be516ebe 936{
d1e4ebd9 937 assem_debug("beq %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 940}
941
7c3a5182 942static void emit_js(const void *a)
be516ebe 943{
d1e4ebd9 944 assem_debug("bmi %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 947}
948
7c3a5182 949static void emit_jns(const void *a)
be516ebe 950{
d1e4ebd9 951 assem_debug("bpl %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 954}
955
7c3a5182 956static void emit_jl(const void *a)
be516ebe 957{
d1e4ebd9 958 assem_debug("blt %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 961}
962
7c3a5182 963static void emit_jge(const void *a)
be516ebe 964{
d1e4ebd9 965 assem_debug("bge %p\n", a);
966 u_int offset = genjmpcc(a);
967 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 968}
969
7c3a5182 970static void emit_jno(const void *a)
be516ebe 971{
d1e4ebd9 972 assem_debug("bvc %p\n", a);
973 u_int offset = genjmpcc(a);
974 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 975}
976
7c3a5182 977static void emit_jc(const void *a)
be516ebe 978{
d1e4ebd9 979 assem_debug("bcs %p\n", a);
980 u_int offset = genjmpcc(a);
981 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 982}
983
3968e69e 984static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 985{
3968e69e 986 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 987 u_int offset = genjmpcc(a);
3968e69e 988 is64 = is64 ? 0x80000000 : 0;
989 isnz = isnz ? 0x01000000 : 0;
990 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
991}
992
9b495f6e 993static void *emit_cbz(u_int r, const void *a)
3968e69e 994{
9b495f6e 995 void *ret = out;
3968e69e 996 emit_cb(0, 0, a, r);
9b495f6e 997 return ret;
be516ebe 998}
999
1000static void emit_jmpreg(u_int r)
1001{
3968e69e 1002 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 1003 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1004}
1005
1006static void emit_retreg(u_int r)
1007{
d1e4ebd9 1008 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1009 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1010}
1011
1012static void emit_ret(void)
1013{
1014 emit_retreg(LR);
1015}
1016
d1e4ebd9 1017static void emit_adr(void *addr, u_int rt)
1018{
1019 intptr_t offset = (u_char *)addr - out;
1020 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1021 assert(rt < 31);
d1e4ebd9 1022 assem_debug("adr x%d,#%#lx\n", rt, offset);
1023 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1024}
1025
3968e69e 1026static void emit_adrp(void *addr, u_int rt)
1027{
1028 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1029 assert(-4294967296l <= offset && offset < 4294967296l);
1030 assert(rt < 31);
1031 offset >>= 12;
1032 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1033 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1034}
1035
be516ebe 1036static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1037{
d1e4ebd9 1038 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1039 assert(-256 <= offset && offset < 256);
1040 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1041}
1042
1043static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1044{
1045 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1047}
1048
1049static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1050{
1051 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1052 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1053}
1054
1055static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1056{
1057 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1058 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1059}
1060
1061static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1062{
1063 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1064 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1065}
39b71d9a 1066#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1067
1068static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1069{
1070 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1071 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1072}
1073
1074static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1075{
1076 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1077 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1078}
1079
1080static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1081{
1082 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1083 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1084}
1085
1086static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1087{
1088 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1089 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1090}
1091
1092static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1093{
1094 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1095 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1096}
1097
be516ebe 1098static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1099{
d1e4ebd9 1100 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1103}
1104
1105static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1106{
d1e4ebd9 1107 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1108 assert(-256 <= offset && offset < 256);
1109 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1110}
1111
1112static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1113{
d1e4ebd9 1114 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1115 assert(-256 <= offset && offset < 256);
1116 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1117}
1118
1119static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1120{
d1e4ebd9 1121 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1122 assert(-256 <= offset && offset < 256);
1123 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1124}
1125
be516ebe 1126static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1127{
3968e69e 1128 if (!(offset & 3) && (u_int)offset <= 16380) {
1129 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1130 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1131 }
1132 else if (-256 <= offset && offset < 256) {
1133 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1134 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1135 }
687b4580 1136 else
1137 assert(0);
be516ebe 1138}
1139
1140static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1141{
3968e69e 1142 if (!(offset & 1) && (u_int)offset <= 8190) {
1143 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1144 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1145 }
1146 else if (-256 <= offset && offset < 256) {
1147 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1148 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1149 }
687b4580 1150 else
1151 assert(0);
be516ebe 1152}
1153
1154static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1155{
3968e69e 1156 if ((u_int)offset < 4096) {
1157 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1158 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1159 }
1160 else if (-256 <= offset && offset < 256) {
1161 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1162 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1163 }
687b4580 1164 else
1165 assert(0);
be516ebe 1166}
1167
3968e69e 1168static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1169{
3968e69e 1170 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1172}
1173
3968e69e 1174static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1175{
3968e69e 1176 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1178}
1179
1180static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1181{
1182 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1183 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1184}
1185
1186static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1187{
1188 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1190}
1191
3968e69e 1192static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1193{
1194 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1196}
1197
1198static void emit_clz(u_int rs, u_int rt)
be516ebe 1199{
1200 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1201 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1202}
1203
be516ebe 1204// special case for checking invalid_code
9b495f6e 1205static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1206{
1207 emit_shrimm(r, 12, rt);
1208 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1209 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
be516ebe 1210}
1211
3968e69e 1212// special for loadlr_assemble, rs2 is destroyed
1213static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1214{
3968e69e 1215 emit_shl(rs2, shift, rs2);
1216 emit_bic(rs1, rs2, rt);
be516ebe 1217}
1218
3968e69e 1219static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1220{
3968e69e 1221 emit_shr(rs2, shift, rs2);
1222 emit_bic(rs1, rs2, rt);
be516ebe 1223}
1224
687b4580 1225static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1226{
687b4580 1227 u_int op = 0xb9000000;
d1e4ebd9 1228 unused const char *ldst = is_st ? "st" : "ld";
1229 unused char rp = is64 ? 'x' : 'w';
687b4580 1230 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1231 is64 = is64 ? 1 : 0;
1232 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1233 ofs = (ofs >> (2+is64));
687b4580 1234 if (!is_st) op |= 0x00400000;
1235 if (is64) op |= 0x40000000;
d1e4ebd9 1236 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1237}
1238
687b4580 1239static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1240{
687b4580 1241 u_int op = 0x29000000;
d1e4ebd9 1242 unused const char *ldst = is_st ? "st" : "ld";
1243 unused char rp = is64 ? 'x' : 'w';
687b4580 1244 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1245 is64 = is64 ? 1 : 0;
1246 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1247 ofs = (ofs >> (2+is64));
1248 assert(-64 <= ofs && ofs <= 63);
1249 ofs &= 0x7f;
1250 if (!is_st) op |= 0x00400000;
1251 if (is64) op |= 0x80000000;
d1e4ebd9 1252 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1253}
1254
1255static void save_load_regs_all(int is_store, u_int reglist)
1256{
1257 int ofs = 0, c = 0;
1258 u_int r, pair[2];
1259 for (r = 0; reglist; r++, reglist >>= 1) {
1260 if (reglist & 1)
1261 pair[c++] = r;
1262 if (c == 2) {
1263 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1264 ofs += 8 * 2;
1265 c = 0;
1266 }
1267 }
1268 if (c) {
1269 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1270 ofs += 8;
1271 }
1272 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1273}
1274
1275// Save registers before function call
1276static void save_regs(u_int reglist)
1277{
1278 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1279 save_load_regs_all(1, reglist);
be516ebe 1280}
1281
1282// Restore registers after function call
1283static void restore_regs(u_int reglist)
1284{
1285 reglist &= CALLER_SAVE_REGS;
687b4580 1286 save_load_regs_all(0, reglist);
be516ebe 1287}
1288
1289/* Stubs/epilogue */
1290
1291static void literal_pool(int n)
1292{
1293 (void)literals;
1294}
1295
1296static void literal_pool_jumpover(int n)
1297{
1298}
1299
d1e4ebd9 1300// parsed by get_pointer, find_extjump_insn
104df9d3 1301static void emit_extjump(u_char *addr, u_int target)
be516ebe 1302{
d1e4ebd9 1303 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1304
d1e4ebd9 1305 emit_movz(target & 0xffff, 0);
1306 emit_movk_lsl16(target >> 16, 0);
1307
1308 // addr is in the current recompiled block (max 256k)
1309 // offset shouldn't exceed +/-1MB
1310 emit_adr(addr, 1);
104df9d3 1311 emit_far_jump(dyna_linker);
be516ebe 1312}
1313
d1e4ebd9 1314static void check_extjump2(void *src)
be516ebe 1315{
d1e4ebd9 1316 u_int *ptr = src;
1317 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1318 (void)ptr;
be516ebe 1319}
1320
1321// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1322static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1323{
d1e4ebd9 1324 int diff = rt_val - rs_val;
3968e69e 1325 if ((-4096 < diff && diff < 4096)
1326 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1327 emit_addimm(rs, diff, rt);
3968e69e 1328 else if (rt_val == ~rs_val)
1329 emit_not(rs, rt);
d1e4ebd9 1330 else if (is_rotated_mask(rs_val ^ rt_val))
1331 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1332 else
d1e4ebd9 1333 emit_movimm(rt_val, rt);
be516ebe 1334}
1335
d1e4ebd9 1336// return 1 if the above function can do it's job cheaply
687b4580 1337static int is_similar_value(u_int v1, u_int v2)
be516ebe 1338{
687b4580 1339 int diff = v1 - v2;
3968e69e 1340 return (-4096 < diff && diff < 4096)
1341 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1342 || v1 == ~v2
d1e4ebd9 1343 || is_rotated_mask(v1 ^ v2);
1344}
1345
37387d8b 1346static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1347{
1348 if (rt_val < 0x100000000ull) {
1349 emit_movimm_from(rs_val, rs, rt_val, rt);
1350 return;
1351 }
1352 // just move the whole thing. At least on Linux all addresses
1353 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1354 emit_movimm64(rt_val, rt);
37387d8b 1355}
1356
1357// trashes x2
d1e4ebd9 1358static void pass_args64(u_int a0, u_int a1)
1359{
1360 if(a0==1&&a1==0) {
1361 // must swap
1362 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1363 }
1364 else if(a0!=0&&a1==0) {
1365 emit_mov64(a1,1);
1366 if (a0>=0) emit_mov64(a0,0);
1367 }
1368 else {
1369 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1370 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1371 }
be516ebe 1372}
1373
d1e4ebd9 1374static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1375{
1376 switch(type) {
1377 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1378 case LOADBU_STUB:
1379 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1380 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1381 case LOADHU_STUB:
1382 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1383 case LOADW_STUB:
1384 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1385 default: assert(0);
d1e4ebd9 1386 }
1387}
1388
1389#include "pcsxmem.h"
be516ebe 1390//#include "pcsxmem_inline.c"
1391
1392static void do_readstub(int n)
1393{
1394 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1395 set_jump_target(stubs[n].addr, out);
1396 enum stub_type type = stubs[n].type;
1397 int i = stubs[n].a;
1398 int rs = stubs[n].b;
1399 const struct regstat *i_regs = (void *)stubs[n].c;
1400 u_int reglist = stubs[n].e;
1401 const signed char *i_regmap = i_regs->regmap;
1402 int rt;
cf95b4f0 1403 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1404 rt=get_reg(i_regmap,FTEMP);
1405 }else{
cf95b4f0 1406 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1407 }
1408 assert(rs>=0);
1409 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1410 void *restore_jump = NULL, *handler_jump = NULL;
1411 reglist|=(1<<rs);
1412 for (r = 0; r < HOST_CCREG; r++) {
1413 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1414 temp = r;
1415 break;
1416 }
1417 }
cf95b4f0 1418 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1419 reglist&=~(1<<rt);
1420 if(temp==-1) {
1421 save_regs(reglist);
1422 regs_saved=1;
1423 temp=(rs==0)?2:0;
1424 }
1425 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1426 temp2=1;
1427 emit_readdword(&mem_rtab,temp);
1428 emit_shrimm(rs,12,temp2);
1429 emit_readdword_dualindexedx8(temp,temp2,temp2);
1430 emit_adds64(temp2,temp2,temp2);
1431 handler_jump=out;
1432 emit_jc(0);
cf95b4f0 1433 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1434 switch(type) {
1435 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1436 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1437 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1438 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1439 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1440 default: assert(0);
d1e4ebd9 1441 }
1442 }
1443 if(regs_saved) {
1444 restore_jump=out;
1445 emit_jmp(0); // jump to reg restore
1446 }
1447 else
1448 emit_jmp(stubs[n].retaddr); // return address
1449 set_jump_target(handler_jump, out);
1450
1451 if(!regs_saved)
1452 save_regs(reglist);
1453 void *handler=NULL;
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1460 assert(handler);
1461 pass_args64(rs,temp2);
1462 int cc=get_reg(i_regmap,CCREG);
1463 if(cc<0)
1464 emit_loadreg(CCREG,2);
2330734f 1465 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1466 emit_far_call(handler);
d1e4ebd9 1467 // (no cycle reload after read)
cf95b4f0 1468 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1469 loadstore_extend(type,0,rt);
1470 }
1471 if(restore_jump)
1472 set_jump_target(restore_jump, out);
1473 restore_regs(reglist);
1474 emit_jmp(stubs[n].retaddr);
be516ebe 1475}
1476
81dbbf4c 1477static void inline_readstub(enum stub_type type, int i, u_int addr,
1478 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1479{
d1e4ebd9 1480 int rs=get_reg(regmap,target);
1481 int rt=get_reg(regmap,target);
9de8a0c3 1482 if(rs<0) rs=get_reg_temp(regmap);
d1e4ebd9 1483 assert(rs>=0);
1484 u_int is_dynamic=0;
1485 uintptr_t host_addr = 0;
1486 void *handler;
1487 int cc=get_reg(regmap,CCREG);
2330734f 1488 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1489 // return;
1490 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1491 if (handler == NULL) {
cf95b4f0 1492 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1493 return;
37387d8b 1494 if (addr != host_addr)
1495 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1496 switch(type) {
1497 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1498 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1499 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1500 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1501 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1502 default: assert(0);
1503 }
1504 return;
1505 }
37387d8b 1506 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1507 if (is_dynamic) {
d1e4ebd9 1508 if(type==LOADB_STUB||type==LOADBU_STUB)
1509 handler=jump_handler_read8;
1510 if(type==LOADH_STUB||type==LOADHU_STUB)
1511 handler=jump_handler_read16;
1512 if(type==LOADW_STUB)
1513 handler=jump_handler_read32;
1514 }
1515
1516 // call a memhandler
cf95b4f0 1517 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1518 reglist&=~(1<<rt);
1519 save_regs(reglist);
1520 if(target==0)
1521 emit_movimm(addr,0);
1522 else if(rs!=0)
1523 emit_mov(rs,0);
1524 if(cc<0)
1525 emit_loadreg(CCREG,2);
2330734f 1526 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1527 if(is_dynamic) {
1528 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1529 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1530 if (-4294967296l <= offset && offset < 4294967296l) {
1531 emit_adrp((void *)l1, 1);
1532 emit_addimm64(1, l1 & 0xfff, 1);
1533 }
1534 else
1535 emit_movimm64(l1, 1);
3968e69e 1536 }
d1e4ebd9 1537 else
2a014d73 1538 emit_far_call(do_memhandler_pre);
d1e4ebd9 1539
2a014d73 1540 emit_far_call(handler);
d1e4ebd9 1541
1542 // (no cycle reload after read)
cf95b4f0 1543 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1544 loadstore_extend(type, 0, rt);
1545 restore_regs(reglist);
be516ebe 1546}
1547
1548static void do_writestub(int n)
1549{
1550 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1551 set_jump_target(stubs[n].addr, out);
1552 enum stub_type type=stubs[n].type;
1553 int i=stubs[n].a;
1554 int rs=stubs[n].b;
1555 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1556 u_int reglist=stubs[n].e;
1557 signed char *i_regmap=i_regs->regmap;
1558 int rt,r;
cf95b4f0 1559 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1560 rt=get_reg(i_regmap,r=FTEMP);
1561 }else{
cf95b4f0 1562 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1563 }
1564 assert(rs>=0);
1565 assert(rt>=0);
1566 int rtmp,temp=-1,temp2,regs_saved=0;
1567 void *restore_jump = NULL, *handler_jump = NULL;
1568 int reglist2=reglist|(1<<rs)|(1<<rt);
1569 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1570 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1571 temp = rtmp;
1572 break;
1573 }
1574 }
1575 if(temp==-1) {
1576 save_regs(reglist);
1577 regs_saved=1;
1578 for(rtmp=0;rtmp<=3;rtmp++)
1579 if(rtmp!=rs&&rtmp!=rt)
1580 {temp=rtmp;break;}
1581 }
1582 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1583 temp2=3;
1584 else {
1585 host_tempreg_acquire();
1586 temp2=HOST_TEMPREG;
1587 }
1588 emit_readdword(&mem_wtab,temp);
1589 emit_shrimm(rs,12,temp2);
1590 emit_readdword_dualindexedx8(temp,temp2,temp2);
1591 emit_adds64(temp2,temp2,temp2);
1592 handler_jump=out;
1593 emit_jc(0);
1594 switch(type) {
1595 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1596 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1597 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1598 default: assert(0);
1599 }
1600 if(regs_saved) {
1601 restore_jump=out;
1602 emit_jmp(0); // jump to reg restore
1603 }
1604 else
1605 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1606 set_jump_target(handler_jump, out);
1607
d1e4ebd9 1608 if(!regs_saved)
1609 save_regs(reglist);
1610 void *handler=NULL;
1611 switch(type) {
1612 case STOREB_STUB: handler=jump_handler_write8; break;
1613 case STOREH_STUB: handler=jump_handler_write16; break;
1614 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1615 default: assert(0);
d1e4ebd9 1616 }
1617 assert(handler);
1618 pass_args(rs,rt);
1619 if(temp2!=3) {
1620 emit_mov64(temp2,3);
1621 host_tempreg_release();
1622 }
1623 int cc=get_reg(i_regmap,CCREG);
1624 if(cc<0)
1625 emit_loadreg(CCREG,2);
2330734f 1626 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1627 // returns new cycle_count
2a014d73 1628 emit_far_call(handler);
2330734f 1629 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1630 if(cc<0)
1631 emit_storereg(CCREG,2);
1632 if(restore_jump)
1633 set_jump_target(restore_jump, out);
1634 restore_regs(reglist);
1635 emit_jmp(stubs[n].retaddr);
be516ebe 1636}
1637
81dbbf4c 1638static void inline_writestub(enum stub_type type, int i, u_int addr,
1639 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1640{
9de8a0c3 1641 int rs = get_reg_temp(regmap);
687b4580 1642 int rt = get_reg(regmap,target);
1643 assert(rs >= 0);
1644 assert(rt >= 0);
1645 uintptr_t host_addr = 0;
1646 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1647 if (handler == NULL) {
37387d8b 1648 if (addr != host_addr)
1649 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1650 switch (type) {
687b4580 1651 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1652 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1653 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1654 default: assert(0);
1655 }
1656 return;
1657 }
1658
1659 // call a memhandler
1660 save_regs(reglist);
687b4580 1661 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1662 loadstore_extend(type, rt, 0);
1663 int cc, cc_use;
1664 cc = cc_use = get_reg(regmap, CCREG);
1665 if (cc < 0)
1666 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1667 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1668
2a014d73 1669 emit_far_call(do_memhandler_pre);
1670 emit_far_call(handler);
1671 emit_far_call(do_memhandler_post);
2330734f 1672 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1673 if (cc < 0)
1674 emit_storereg(CCREG, cc_use);
687b4580 1675 restore_regs(reglist);
be516ebe 1676}
1677
3968e69e 1678/* Special assem */
1679
81dbbf4c 1680static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1681{
1682 save_load_regs_all(1, reglist);
32631e6a 1683 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1684#ifdef PCNT
1685 emit_movimm(op, 0);
2a014d73 1686 emit_far_call(pcnt_gte_start);
3968e69e 1687#endif
1688 // pointer to cop2 regs
1689 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1690}
1691
1692static void c2op_epilogue(u_int op,u_int reglist)
1693{
1694#ifdef PCNT
1695 emit_movimm(op, 0);
2a014d73 1696 emit_far_call(pcnt_gte_end);
3968e69e 1697#endif
1698 save_load_regs_all(0, reglist);
be516ebe 1699}
1700
81dbbf4c 1701static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1702{
3968e69e 1703 u_int c2op=source[i]&0x3f;
1704 u_int hr,reglist_full=0,reglist;
1705 int need_flags,need_ir;
1706 for(hr=0;hr<HOST_REGS;hr++) {
1707 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1708 }
1709 reglist=reglist_full&CALLER_SAVE_REGS;
1710
1711 if (gte_handlers[c2op]!=NULL) {
1712 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1713 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1714 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1715 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1716 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1717 need_flags=0;
1718 //int shift = (source[i] >> 19) & 1;
1719 //int lm = (source[i] >> 10) & 1;
1720 switch(c2op) {
1721 default:
1722 (void)need_ir;
81dbbf4c 1723 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1724 emit_movimm(source[i],1); // opcode
1725 emit_writeword(1,&psxRegs.code);
2a014d73 1726 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1727 break;
1728 }
1729 c2op_epilogue(c2op,reglist);
1730 }
1731}
1732
1733static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1734{
1735 //value = value & 0x7ffff000;
1736 //if (value & 0x7f87e000) value |= 0x80000000;
1737 emit_andimm(sl, 0x7fffe000, temp);
1738 emit_testimm(temp, 0xff87ffff);
1739 emit_andimm(sl, 0x7ffff000, temp);
1740 host_tempreg_acquire();
1741 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1742 emit_cmovne_reg(HOST_TEMPREG, temp);
1743 host_tempreg_release();
1744 assert(0); // testing needed
1745}
1746
1747static void do_mfc2_31_one(u_int copr,signed char temp)
1748{
1749 emit_readshword(&reg_cop2d[copr],temp);
1750 emit_bicsar_imm(temp,31,temp);
1751 emit_cmpimm(temp,0xf80);
1752 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1753 emit_andimm(temp,0xf80,temp);
1754}
1755
1756static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1757{
1758 if (temp < 0) {
1759 host_tempreg_acquire();
1760 temp = HOST_TEMPREG;
1761 }
1762 do_mfc2_31_one(9,temp);
1763 emit_shrimm(temp,7,tl);
1764 do_mfc2_31_one(10,temp);
1765 emit_orrshr_imm(temp,2,tl);
1766 do_mfc2_31_one(11,temp);
1767 emit_orrshl_imm(temp,3,tl);
1768 emit_writeword(tl,&reg_cop2d[29]);
1769
1770 if (temp == HOST_TEMPREG)
1771 host_tempreg_release();
be516ebe 1772}
1773
2330734f 1774static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1775{
3968e69e 1776 // case 0x18: MULT
1777 // case 0x19: MULTU
1778 // case 0x1A: DIV
1779 // case 0x1B: DIVU
cf95b4f0 1780 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1781 {
cf95b4f0 1782 switch(dops[i].opcode2)
3968e69e 1783 {
1784 case 0x18: // MULT
1785 case 0x19: // MULTU
1786 {
cf95b4f0 1787 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1788 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1789 signed char hi=get_reg(i_regs->regmap,HIREG);
1790 signed char lo=get_reg(i_regs->regmap,LOREG);
1791 assert(m1>=0);
1792 assert(m2>=0);
1793 assert(hi>=0);
1794 assert(lo>=0);
1795
cf95b4f0 1796 if(dops[i].opcode2==0x18) // MULT
3968e69e 1797 emit_smull(m1,m2,hi);
1798 else // MULTU
1799 emit_umull(m1,m2,hi);
1800
1801 emit_mov(hi,lo);
1802 emit_shrimm64(hi,32,hi);
1803 break;
1804 }
1805 case 0x1A: // DIV
1806 case 0x1B: // DIVU
1807 {
cf95b4f0 1808 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1809 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1810 signed char quotient=get_reg(i_regs->regmap,LOREG);
1811 signed char remainder=get_reg(i_regs->regmap,HIREG);
1812 assert(numerator>=0);
1813 assert(denominator>=0);
1814 assert(quotient>=0);
1815 assert(remainder>=0);
1816
cf95b4f0 1817 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1818 emit_sdiv(numerator,denominator,quotient);
1819 else // DIVU
1820 emit_udiv(numerator,denominator,quotient);
1821 emit_msub(quotient,denominator,numerator,remainder);
1822
1823 // div 0 quotient (remainder is already correct)
1824 host_tempreg_acquire();
cf95b4f0 1825 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1826 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1827 else
1828 emit_movimm(~0,HOST_TEMPREG);
1829 emit_test(denominator,denominator);
1830 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1831 host_tempreg_release();
1832 break;
1833 }
1834 default:
1835 assert(0);
1836 }
1837 }
1838 else
1839 {
1840 signed char hr=get_reg(i_regs->regmap,HIREG);
1841 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1842 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1843 {
cf95b4f0 1844 if (dops[i].rs1) {
1845 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1846 assert(numerator >= 0);
1847 if (hr >= 0)
1848 emit_mov(numerator,hr);
1849 if (lr >= 0) {
cf95b4f0 1850 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1851 emit_sub_asrimm(0,numerator,31,lr);
1852 else
1853 emit_movimm(~0,lr);
1854 }
1855 }
1856 else {
1857 if (hr >= 0) emit_zeroreg(hr);
1858 if (lr >= 0) emit_movimm(~0,lr);
1859 }
1860 }
1861 else
1862 {
1863 // Multiply by zero is zero.
1864 if (hr >= 0) emit_zeroreg(hr);
1865 if (lr >= 0) emit_zeroreg(lr);
1866 }
1867 }
be516ebe 1868}
1869#define multdiv_assemble multdiv_assemble_arm64
1870
d1e4ebd9 1871static void do_jump_vaddr(u_int rs)
1872{
1873 if (rs != 0)
1874 emit_mov(rs, 0);
104df9d3 1875 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1876 emit_jmpreg(0);
1877}
1878
be516ebe 1879static void do_preload_rhash(u_int r) {
1880 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1881 // register. On ARM the hash can be done with a single instruction (below)
1882}
1883
1884static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1885 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1886}
1887
1888static void do_rhash(u_int rs,u_int rh) {
1889 emit_andimm(rs, 0xf8, rh);
1890}
1891
d1e4ebd9 1892static void do_miniht_load(int ht, u_int rh) {
1893 emit_add64(ht, rh, ht);
1894 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1895}
1896
d1e4ebd9 1897static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1898 emit_cmp(rh, rs);
1899 void *jaddr = out;
1900 emit_jeq(0);
1901 do_jump_vaddr(rs);
1902
1903 set_jump_target(jaddr, out);
1904 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1905 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1906 emit_jmpreg(ht);
be516ebe 1907}
1908
d1e4ebd9 1909// parsed by set_jump_target?
be516ebe 1910static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1911 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1912 emit_movk(return_address&0xffff,rt);
1913 add_to_linker(out,return_address,1);
1914 emit_adr(out,temp);
1915 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1916 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1917}
1918
d9e2b173 1919static unused void clear_cache_arm64(char *start, char *end)
be516ebe 1920{
919981d0 1921 // Don't rely on GCC's __clear_cache implementation, as it caches
1922 // icache/dcache cache line sizes, that can vary between cores on
1923 // big.LITTLE architectures.
1924 uint64_t addr, ctr_el0;
1925 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1926 size_t isize, dsize;
1927
1928 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1929 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1930 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1931
1932 // use the global minimum cache line size
1933 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1934 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1935
1936 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1937 not required for instruction to data coherence. */
1938 if ((ctr_el0 & (1 << 28)) == 0x0) {
1939 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1940 for (; addr < (uint64_t)end; addr += dsize)
1941 // use "civac" instead of "cvau", as this is the suggested workaround for
1942 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1943 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 1944 }
919981d0 1945 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1946
919981d0 1947 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1948 Unification is not required for instruction to data coherence. */
1949 if ((ctr_el0 & (1 << 29)) == 0x0) {
1950 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1951 for (; addr < (uint64_t)end; addr += isize)
1952 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1953
1954 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1955 }
919981d0 1956
1957 __asm__ volatile("isb" : : : "memory");
be516ebe 1958}
1959
1960// CPU-architecture-specific initialization
2a014d73 1961static void arch_init(void)
1962{
1963 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 1964 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 1965 size_t i;
1966 assert(!(diff & 3));
d9e2b173 1967 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 1968 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 1969 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1970 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 1971 }
1972 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 1973}
1974
1975// vim:shiftwidth=2:expandtab