drc: update according to interpreter
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
d1e4ebd9 26void do_memhandler_pre();
27void do_memhandler_post();
be516ebe 28
29/* Linker */
d1e4ebd9 30static void set_jump_target(void *addr, void *target)
be516ebe 31{
d9e2b173 32 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 33 intptr_t offset = (u_char *)target - (u_char *)addr;
34
3968e69e 35 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 36 assert(offset>=-134217728LL&&offset<134217728LL);
37 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
38 }
3968e69e 39 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
40 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 41 // Conditional branch are limited to +/- 1MB
42 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 43 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 44 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 45 assert(-1048576 <= offset && offset < 1048576);
4a2e3735 46 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
d1e4ebd9 47 }
3968e69e 48 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 49 // generated by do_miniht_insert
50 assert(offset>=-1048576LL&&offset<1048576LL);
51 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
52 }
53 else
3968e69e 54 abort(); // should not happen
be516ebe 55}
56
57// from a pointer to external jump stub (which was produced by emit_extjump2)
58// find where the jumping insn is
59static void *find_extjump_insn(void *stub)
60{
d1e4ebd9 61 int *ptr = (int *)stub + 2;
62 assert((*ptr&0x9f000000) == 0x10000000); // adr
63 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
64 return ptr + offset / 4;
be516ebe 65}
66
104df9d3 67#if 0
be516ebe 68// find where external branch is liked to using addr of it's stub:
3968e69e 69// get address that the stub loads (dyna_linker arg1),
be516ebe 70// treat it as a pointer to branch insn,
71// return addr where that branch jumps to
72static void *get_pointer(void *stub)
73{
d1e4ebd9 74 int *i_ptr = find_extjump_insn(stub);
3968e69e 75 if ((*i_ptr&0xfc000000) == 0x14000000) // b
76 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
77 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
78 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
79 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 80 assert(0);
81 return NULL;
82}
104df9d3 83#endif
be516ebe 84
be516ebe 85// Allocate a specific ARM register.
86static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
87{
88 int n;
89 int dirty=0;
90
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
93 {
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
96 cur->regmap[n]=-1;
97 }
98 }
99
100 cur->regmap[hr]=reg;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
104}
105
106// Alloc cycle count into dedicated register
107static void alloc_cc(struct regstat *cur,int i)
108{
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
110}
111
112/* Special alloc */
113
114
115/* Assembler */
116
117static unused const char *regname[32] = {
d1e4ebd9 118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
122};
123
124static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129};
130
131enum {
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
134};
135
136static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 139};
140
be516ebe 141static void output_w32(u_int word)
142{
d9e2b173 143 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 144 out += 4;
145}
146
3968e69e 147static u_int rn_rd(u_int rn, u_int rd)
148{
149 assert(rn < 31);
150 assert(rd < 31);
151 return (rn << 5) | rd;
152}
153
be516ebe 154static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
155{
d1e4ebd9 156 assert(rm < 32);
157 assert(rn < 32);
158 assert(rd < 32);
be516ebe 159 return (rm << 16) | (rn << 5) | rd;
160}
161
3968e69e 162static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
163{
164 assert(ra < 32);
165 return rm_rn_rd(rm, rn, rd) | (ra << 10);
166}
167
d1e4ebd9 168static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
169{
170 assert(imm7 < 0x80);
171 assert(rt2 < 31);
172 assert(rn < 32);
173 assert(rt < 31);
174 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
175}
176
687b4580 177static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
178{
179 assert(imm6 <= 63);
180 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
181}
182
be516ebe 183static u_int imm16_rd(u_int imm16, u_int rd)
184{
185 assert(imm16 < 0x10000);
186 assert(rd < 31);
187 return (imm16 << 5) | rd;
188}
189
687b4580 190static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
191{
192 assert(imm12 < 0x1000);
d1e4ebd9 193 assert(rn < 32);
194 assert(rd < 32);
195 return (imm12 << 10) | (rn << 5) | rd;
196}
197
198static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
199{
200 assert(imm9 < 0x200);
687b4580 201 assert(rn < 31);
202 assert(rd < 31);
d1e4ebd9 203 return (imm9 << 12) | (rn << 5) | rd;
687b4580 204}
205
d1e4ebd9 206static u_int imm19_rt(u_int imm19, u_int rt)
207{
208 assert(imm19 < 0x80000);
209 assert(rt < 31);
210 return (imm19 << 5) | rt;
211}
212
213static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
214{
215 assert(n < 2);
216 assert(immr < 0x40);
217 assert(imms < 0x40);
218 assert(rn < 32);
219 assert(rd < 32);
220 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
221}
222
223static u_int genjmp(const u_char *addr)
be516ebe 224{
225 intptr_t offset = addr - out;
d1e4ebd9 226 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 227 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 228 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 abort();
be516ebe 230 return 0;
231 }
d1e4ebd9 232 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 233}
234
d1e4ebd9 235static u_int genjmpcc(const u_char *addr)
be516ebe 236{
237 intptr_t offset = addr - out;
d1e4ebd9 238 if ((uintptr_t)addr < 3) return 0;
be516ebe 239 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 240 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 abort();
242 return 0;
243 }
244 return ((u_int)offset >> 2) & 0x7ffff;
245}
246
247static uint32_t is_mask(u_int value)
248{
249 return value && ((value + 1) & value) == 0;
250}
251
252// This function returns true if the argument contains a
253// non-empty sequence of ones (possibly rotated) with the remainder zero.
254static uint32_t is_rotated_mask(u_int value)
255{
3968e69e 256 if (value == 0 || value == ~0)
be516ebe 257 return 0;
d1e4ebd9 258 if (is_mask((value - 1) | value))
259 return 1;
260 return is_mask((~value - 1) | ~value);
261}
262
263static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
264{
265 int lzeros, tzeros, ones;
266 assert(value != 0);
267 if (is_mask((value - 1) | value)) {
268 lzeros = __builtin_clz(value);
269 tzeros = __builtin_ctz(value);
270 ones = 32 - lzeros - tzeros;
271 *immr = (32 - tzeros) & 31;
272 *imms = ones - 1;
273 return;
be516ebe 274 }
d1e4ebd9 275 value = ~value;
276 if (is_mask((value - 1) | value)) {
277 lzeros = __builtin_clz(value);
278 tzeros = __builtin_ctz(value);
279 ones = 32 - lzeros - tzeros;
3968e69e 280 *immr = lzeros;
d1e4ebd9 281 *imms = 31 - ones;
282 return;
283 }
3968e69e 284 abort();
be516ebe 285}
286
287static void emit_mov(u_int rs, u_int rt)
288{
687b4580 289 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 290 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
291}
292
293static void emit_mov64(u_int rs, u_int rt)
294{
295 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
296 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 297}
298
687b4580 299static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 300{
d1e4ebd9 301 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
302 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 303}
304
a5cd72d0 305static void emit_adds(u_int rs1, u_int rs2, u_int rt)
306{
307 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
308 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
309}
310
d1e4ebd9 311static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 312{
d1e4ebd9 313 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
314 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 315}
316
d1e4ebd9 317static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 318{
3968e69e 319 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 320 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
321}
39b71d9a 322#define emit_adds_ptr emit_adds64
d1e4ebd9 323
a5cd72d0 324static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
325{
326 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
327 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
328}
329
d1e4ebd9 330static void emit_neg(u_int rs, u_int rt)
331{
332 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
333 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 334}
335
a5cd72d0 336static void emit_negs(u_int rs, u_int rt)
337{
338 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
339 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
340}
341
687b4580 342static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 343{
d1e4ebd9 344 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 345 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 346}
347
a5cd72d0 348static void emit_subs(u_int rs1, u_int rs2, u_int rt)
349{
350 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
351 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
352}
353
354static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
3968e69e 355{
356 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
357 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
358}
359
d1e4ebd9 360static void emit_movz(u_int imm, u_int rt)
be516ebe 361{
d1e4ebd9 362 assem_debug("movz %s,#%#x\n", regname[rt], imm);
363 output_w32(0x52800000 | imm16_rd(imm, rt));
364}
365
366static void emit_movz_lsl16(u_int imm, u_int rt)
367{
368 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
369 output_w32(0x52a00000 | imm16_rd(imm, rt));
370}
371
372static void emit_movn(u_int imm, u_int rt)
373{
374 assem_debug("movn %s,#%#x\n", regname[rt], imm);
375 output_w32(0x12800000 | imm16_rd(imm, rt));
376}
377
378static void emit_movn_lsl16(u_int imm,u_int rt)
379{
380 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
381 output_w32(0x12a00000 | imm16_rd(imm, rt));
382}
383
384static void emit_movk(u_int imm,u_int rt)
385{
386 assem_debug("movk %s,#%#x\n", regname[rt], imm);
387 output_w32(0x72800000 | imm16_rd(imm, rt));
388}
389
390static void emit_movk_lsl16(u_int imm,u_int rt)
391{
392 assert(imm<65536);
3968e69e 393 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 394 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 395}
396
397static void emit_zeroreg(u_int rt)
398{
d1e4ebd9 399 emit_movz(0, rt);
be516ebe 400}
401
be516ebe 402static void emit_movimm(u_int imm, u_int rt)
403{
d1e4ebd9 404 if (imm < 65536)
405 emit_movz(imm, rt);
406 else if ((~imm) < 65536)
407 emit_movn(~imm, rt);
408 else if ((imm&0xffff) == 0)
409 emit_movz_lsl16(imm >> 16, rt);
410 else if (((~imm)&0xffff) == 0)
411 emit_movn_lsl16(~imm >> 16, rt);
412 else if (is_rotated_mask(imm)) {
413 u_int immr, imms;
414 gen_logical_imm(imm, &immr, &imms);
415 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
416 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
417 }
be516ebe 418 else {
d1e4ebd9 419 emit_movz(imm & 0xffff, rt);
420 emit_movk_lsl16(imm >> 16, rt);
be516ebe 421 }
422}
423
aaece508 424static void emit_movimm64(uint64_t imm, u_int rt)
425{
426 u_int shift, op, imm16, insns = 0;
427 for (shift = 0; shift < 4; shift++) {
428 imm16 = (imm >> shift * 16) & 0xffff;
429 if (!imm16)
430 continue;
431 op = insns ? 0xf2800000 : 0xd2800000;
432 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
433 if (shift)
434 assem_debug(",lsl #%u", shift * 16);
435 assem_debug("\n");
436 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
437 insns++;
438 }
439 if (!insns) {
440 assem_debug("movz %s,#0\n", regname64[rt]);
441 output_w32(0xd2800000 | imm16_rd(0, rt));
442 }
443}
444
687b4580 445static void emit_readword(void *addr, u_int rt)
446{
447 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
448 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 449 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 450 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
451 }
452 else
3968e69e 453 abort();
687b4580 454}
455
d1e4ebd9 456static void emit_readdword(void *addr, u_int rt)
457{
458 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
459 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 460 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
d1e4ebd9 461 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
462 }
3968e69e 463 else
464 abort();
465}
39b71d9a 466#define emit_readptr emit_readdword
3968e69e 467
468static void emit_readshword(void *addr, u_int rt)
469{
470 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
471 if (!(offset & 1) && offset <= 8190) {
472 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
473 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
474 }
d1e4ebd9 475 else
476 assert(0);
477}
478
be516ebe 479static void emit_loadreg(u_int r, u_int hr)
480{
d1e4ebd9 481 int is64 = 0;
be516ebe 482 if (r == 0)
483 emit_zeroreg(hr);
484 else {
33788798 485 void *addr;
be516ebe 486 switch (r) {
7c3a5182 487 //case HIREG: addr = &hi; break;
488 //case LOREG: addr = &lo; break;
be516ebe 489 case CCREG: addr = &cycle_count; break;
bc7c5acb 490 case CSREG: addr = &psxRegs.CP0.n.SR; break;
d1e4ebd9 491 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 492 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 493 default:
494 assert(r < 34);
495 addr = &psxRegs.GPR.r[r];
496 break;
be516ebe 497 }
d1e4ebd9 498 if (is64)
499 emit_readdword(addr, hr);
500 else
501 emit_readword(addr, hr);
be516ebe 502 }
503}
504
687b4580 505static void emit_writeword(u_int rt, void *addr)
506{
507 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
508 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 509 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 510 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
511 }
512 else
513 assert(0);
514}
515
d1e4ebd9 516static void emit_writedword(u_int rt, void *addr)
517{
518 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
519 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 520 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
3968e69e 521 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 522 }
523 else
3968e69e 524 abort();
d1e4ebd9 525}
526
687b4580 527static void emit_storereg(u_int r, u_int hr)
be516ebe 528{
529 assert(r < 64);
7c3a5182 530 void *addr = &psxRegs.GPR.r[r];
be516ebe 531 switch (r) {
7c3a5182 532 //case HIREG: addr = &hi; break;
533 //case LOREG: addr = &lo; break;
be516ebe 534 case CCREG: addr = &cycle_count; break;
7c3a5182 535 default: assert(r < 34); break;
be516ebe 536 }
687b4580 537 emit_writeword(hr, addr);
be516ebe 538}
539
540static void emit_test(u_int rs, u_int rt)
541{
d1e4ebd9 542 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
543 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 544}
545
d1e4ebd9 546static void emit_testimm(u_int rs, u_int imm)
be516ebe 547{
d1e4ebd9 548 u_int immr, imms;
687b4580 549 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 550 assert(is_rotated_mask(imm)); // good enough for PCSX
551 gen_logical_imm(imm, &immr, &imms);
3968e69e 552 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 553}
554
555static void emit_not(u_int rs,u_int rt)
556{
557 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 558 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 559}
560
be516ebe 561static void emit_and(u_int rs1,u_int rs2,u_int rt)
562{
563 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 564 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 565}
566
567static void emit_or(u_int rs1,u_int rs2,u_int rt)
568{
569 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 570 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 571}
572
3968e69e 573static void emit_bic(u_int rs1,u_int rs2,u_int rt)
574{
575 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
576 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
577}
578
be516ebe 579static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
580{
be516ebe 581 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 582 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 583}
584
585static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
586{
be516ebe 587 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 588 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 589}
590
a5cd72d0 591static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
592{
593 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
594 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
595}
596
3968e69e 597static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
598{
599 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
600 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
601}
602
be516ebe 603static void emit_xor(u_int rs1,u_int rs2,u_int rt)
604{
605 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 606 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 607}
608
3968e69e 609static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
610{
611 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
612 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
613}
614
d1e4ebd9 615static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 616{
d1e4ebd9 617 unused const char *st = s ? "s" : "";
618 s = s ? 0x20000000 : 0;
619 is64 = is64 ? 0x80000000 : 0;
687b4580 620 if (imm < 4096) {
d1e4ebd9 621 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
622 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 623 }
624 else if (-imm < 4096) {
3968e69e 625 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 626 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
627 }
a5cd72d0 628 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
629 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
630 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
631 if (imm & 0xfff) {
632 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
633 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 634 }
635 }
a5cd72d0 636 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
637 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
638 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
639 if (-imm & 0xfff) {
640 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
641 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
d1e4ebd9 642 }
687b4580 643 }
a5cd72d0 644 else {
645 u_int tmp = rt;
646 assert(!is64);
647 if (rs == rt) {
648 host_tempreg_acquire();
649 tmp = HOST_TEMPREG;
650 }
651 emit_movimm(imm, tmp);
652 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
653 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
654 if (tmp == HOST_TEMPREG)
655 host_tempreg_release();
656 }
be516ebe 657}
658
d1e4ebd9 659static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
660{
9b495f6e 661 if (imm == 0) {
662 emit_mov(rs, rt);
663 return;
664 }
d1e4ebd9 665 emit_addimm_s(0, 0, rs, imm, rt);
666}
667
668static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
669{
670 emit_addimm_s(0, 1, rs, imm, rt);
671}
672
bc7c5acb 673static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
674{
675 emit_addimm64(rs, imm, rt);
676}
677
be516ebe 678static void emit_addimm_and_set_flags(int imm, u_int rt)
679{
d1e4ebd9 680 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 681}
682
a5cd72d0 683static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
684{
685 emit_addimm_s(1, 0, rs, imm, rt);
686}
687
d1e4ebd9 688static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 689{
d1e4ebd9 690 const char *names[] = { "and", "orr", "eor", "ands" };
691 const char *name = names[op];
692 u_int immr, imms;
693 op = op << 29;
694 if (is_rotated_mask(imm)) {
695 gen_logical_imm(imm, &immr, &imms);
696 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
697 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
698 }
699 else {
700 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
701 host_tempreg_acquire();
702 emit_movimm(imm, HOST_TEMPREG);
703 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
704 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
705 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
706 host_tempreg_release();
707 }
708 (void)name;
be516ebe 709}
710
d1e4ebd9 711static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 712{
d1e4ebd9 713 if (imm == 0)
714 emit_zeroreg(rt);
715 else
716 emit_logicop_imm(0, rs, imm, rt);
be516ebe 717}
718
d1e4ebd9 719static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 720{
d1e4ebd9 721 if (imm == 0) {
722 if (rs != rt)
723 emit_mov(rs, rt);
724 }
725 else
726 emit_logicop_imm(1, rs, imm, rt);
be516ebe 727}
728
d1e4ebd9 729static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 730{
d1e4ebd9 731 if (imm == 0) {
732 if (rs != rt)
733 emit_mov(rs, rt);
734 }
735 else
736 emit_logicop_imm(2, rs, imm, rt);
be516ebe 737}
738
d1e4ebd9 739static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 740{
d1e4ebd9 741 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
742 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 743}
744
d1e4ebd9 745static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 746{
d1e4ebd9 747 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
748 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 749}
750
751static void emit_shlimm(u_int rs,u_int imm,u_int rt)
752{
be516ebe 753 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 754 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 755}
756
3968e69e 757static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 758{
3968e69e 759 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
760 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 761}
762
3968e69e 763static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 764{
be516ebe 765 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 766 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 767}
768
769static void emit_sarimm(u_int rs,u_int imm,u_int rt)
770{
be516ebe 771 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 772 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 773}
774
775static void emit_rorimm(u_int rs,u_int imm,u_int rt)
776{
3968e69e 777 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 778 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 779}
780
781static void emit_signextend16(u_int rs, u_int rt)
782{
783 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 784 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 785}
786
d1e4ebd9 787static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 788{
3968e69e 789 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 790 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 791}
792
d1e4ebd9 793static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 794{
d1e4ebd9 795 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
796 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 797}
798
d1e4ebd9 799static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 800{
d1e4ebd9 801 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
802 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 803}
804
d1e4ebd9 805static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 806{
d1e4ebd9 807 if (imm < 4096) {
808 assem_debug("cmp %s,%#x\n", regname[rs], imm);
809 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
810 }
811 else if (-imm < 4096) {
812 assem_debug("cmn %s,%#x\n", regname[rs], imm);
813 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
814 }
815 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 816 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 817 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
818 }
819 else {
820 host_tempreg_acquire();
821 emit_movimm(imm, HOST_TEMPREG);
822 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
823 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
824 host_tempreg_release();
825 }
be516ebe 826}
827
d1e4ebd9 828static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 829{
d1e4ebd9 830 assert(imm == 0 || imm == 1);
831 assert(cond0 < 0x10);
832 assert(cond1 < 0x10);
833 if (imm) {
834 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
835 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
836 } else {
837 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
838 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
839 }
be516ebe 840}
841
d1e4ebd9 842static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 843{
d1e4ebd9 844 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 845}
846
d1e4ebd9 847static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 848{
d1e4ebd9 849 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 850}
851
852static void emit_cmovb_imm(int imm,u_int rt)
853{
d1e4ebd9 854 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 855}
856
3968e69e 857static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 858{
3968e69e 859 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
860 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 861}
862
863static void emit_cmovne_reg(u_int rs,u_int rt)
864{
d1e4ebd9 865 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
866 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 867}
868
869static void emit_cmovl_reg(u_int rs,u_int rt)
870{
d1e4ebd9 871 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
872 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 873}
874
e3c6bdb5 875static void emit_cmovb_reg(u_int rs,u_int rt)
876{
877 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
878 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
879}
880
be516ebe 881static void emit_cmovs_reg(u_int rs,u_int rt)
882{
d1e4ebd9 883 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
884 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 885}
886
3968e69e 887static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
888{
889 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
890 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
891}
892
a5cd72d0 893static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
894{
895 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
897}
898
be516ebe 899static void emit_slti32(u_int rs,int imm,u_int rt)
900{
901 if(rs!=rt) emit_zeroreg(rt);
902 emit_cmpimm(rs,imm);
903 if(rs==rt) emit_movimm(0,rt);
904 emit_cmovl_imm(1,rt);
905}
906
907static void emit_sltiu32(u_int rs,int imm,u_int rt)
908{
909 if(rs!=rt) emit_zeroreg(rt);
910 emit_cmpimm(rs,imm);
911 if(rs==rt) emit_movimm(0,rt);
912 emit_cmovb_imm(1,rt);
913}
914
915static void emit_cmp(u_int rs,u_int rt)
916{
917 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 918 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 919}
920
882a08fc 921static void emit_cmpcs(u_int rs,u_int rt)
922{
923 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
924 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
925}
926
be516ebe 927static void emit_set_gz32(u_int rs, u_int rt)
928{
929 //assem_debug("set_gz32\n");
930 emit_cmpimm(rs,1);
931 emit_movimm(1,rt);
932 emit_cmovl_imm(0,rt);
933}
934
935static void emit_set_nz32(u_int rs, u_int rt)
936{
937 //assem_debug("set_nz32\n");
d1e4ebd9 938 if(rs!=rt) emit_mov(rs,rt);
939 emit_test(rs,rs);
940 emit_cmovne_imm(1,rt);
be516ebe 941}
942
943static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
944{
945 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
946 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
947 emit_cmp(rs1,rs2);
948 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
949 emit_cmovl_imm(1,rt);
950}
951
952static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
953{
954 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
955 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
956 emit_cmp(rs1,rs2);
957 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
958 emit_cmovb_imm(1,rt);
959}
960
2a014d73 961static int can_jump_or_call(const void *a)
962{
963 intptr_t diff = (u_char *)a - out;
964 return (-134217728 <= diff && diff <= 134217727);
965}
966
d1e4ebd9 967static void emit_call(const void *a)
be516ebe 968{
d1e4ebd9 969 intptr_t diff = (u_char *)a - out;
970 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 971 assert(!(diff & 3));
972 if (-134217728 <= diff && diff <= 134217727)
973 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
974 else
3968e69e 975 abort();
be516ebe 976}
977
d1e4ebd9 978static void emit_jmp(const void *a)
be516ebe 979{
d1e4ebd9 980 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
981 u_int offset = genjmp(a);
982 output_w32(0x14000000 | offset);
be516ebe 983}
984
d1e4ebd9 985static void emit_jne(const void *a)
be516ebe 986{
d1e4ebd9 987 assem_debug("bne %p\n", a);
988 u_int offset = genjmpcc(a);
989 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 990}
991
7c3a5182 992static void emit_jeq(const void *a)
be516ebe 993{
d1e4ebd9 994 assem_debug("beq %p\n", a);
995 u_int offset = genjmpcc(a);
996 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 997}
998
7c3a5182 999static void emit_js(const void *a)
be516ebe 1000{
d1e4ebd9 1001 assem_debug("bmi %p\n", a);
1002 u_int offset = genjmpcc(a);
1003 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 1004}
1005
7c3a5182 1006static void emit_jns(const void *a)
be516ebe 1007{
d1e4ebd9 1008 assem_debug("bpl %p\n", a);
1009 u_int offset = genjmpcc(a);
1010 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 1011}
1012
7c3a5182 1013static void emit_jl(const void *a)
be516ebe 1014{
d1e4ebd9 1015 assem_debug("blt %p\n", a);
1016 u_int offset = genjmpcc(a);
1017 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 1018}
1019
7c3a5182 1020static void emit_jge(const void *a)
be516ebe 1021{
d1e4ebd9 1022 assem_debug("bge %p\n", a);
1023 u_int offset = genjmpcc(a);
1024 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 1025}
1026
a5cd72d0 1027static void emit_jo(const void *a)
1028{
1029 assem_debug("bvs %p\n", a);
1030 u_int offset = genjmpcc(a);
1031 output_w32(0x54000000 | (offset << 5) | COND_VS);
1032}
1033
7c3a5182 1034static void emit_jno(const void *a)
be516ebe 1035{
d1e4ebd9 1036 assem_debug("bvc %p\n", a);
1037 u_int offset = genjmpcc(a);
1038 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 1039}
1040
7c3a5182 1041static void emit_jc(const void *a)
be516ebe 1042{
d1e4ebd9 1043 assem_debug("bcs %p\n", a);
1044 u_int offset = genjmpcc(a);
1045 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 1046}
1047
3968e69e 1048static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 1049{
3968e69e 1050 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 1051 u_int offset = genjmpcc(a);
3968e69e 1052 is64 = is64 ? 0x80000000 : 0;
1053 isnz = isnz ? 0x01000000 : 0;
1054 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1055}
1056
9b495f6e 1057static void *emit_cbz(u_int r, const void *a)
3968e69e 1058{
9b495f6e 1059 void *ret = out;
3968e69e 1060 emit_cb(0, 0, a, r);
9b495f6e 1061 return ret;
be516ebe 1062}
1063
1064static void emit_jmpreg(u_int r)
1065{
3968e69e 1066 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 1067 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1068}
1069
1070static void emit_retreg(u_int r)
1071{
d1e4ebd9 1072 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1073 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1074}
1075
1076static void emit_ret(void)
1077{
1078 emit_retreg(LR);
1079}
1080
d1e4ebd9 1081static void emit_adr(void *addr, u_int rt)
1082{
1083 intptr_t offset = (u_char *)addr - out;
1084 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1085 assert(rt < 31);
d1e4ebd9 1086 assem_debug("adr x%d,#%#lx\n", rt, offset);
1087 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1088}
1089
3968e69e 1090static void emit_adrp(void *addr, u_int rt)
1091{
1092 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1093 assert(-4294967296l <= offset && offset < 4294967296l);
1094 assert(rt < 31);
1095 offset >>= 12;
1096 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1097 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1098}
1099
be516ebe 1100static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1101{
d1e4ebd9 1102 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1103 assert(-256 <= offset && offset < 256);
1104 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1105}
1106
1107static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1108{
1109 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1110 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1111}
1112
1113static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1114{
1115 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1116 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1117}
1118
1119static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1120{
1121 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1122 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1123}
1124
1125static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1126{
1127 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1128 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1129}
39b71d9a 1130#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1131
1132static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1133{
1134 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1135 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1136}
1137
1138static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1139{
1140 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1141 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1142}
1143
1144static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1145{
1146 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1147 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1148}
1149
1150static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1151{
1152 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1153 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1154}
1155
1156static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1157{
1158 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1159 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1160}
1161
be516ebe 1162static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1163{
d1e4ebd9 1164 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1165 assert(-256 <= offset && offset < 256);
1166 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1167}
1168
1169static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1170{
d1e4ebd9 1171 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1172 assert(-256 <= offset && offset < 256);
1173 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1174}
1175
1176static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1177{
d1e4ebd9 1178 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1179 assert(-256 <= offset && offset < 256);
1180 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1181}
1182
1183static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1184{
d1e4ebd9 1185 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1186 assert(-256 <= offset && offset < 256);
1187 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1188}
1189
be516ebe 1190static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1191{
3968e69e 1192 if (!(offset & 3) && (u_int)offset <= 16380) {
1193 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1194 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1195 }
1196 else if (-256 <= offset && offset < 256) {
1197 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1198 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1199 }
687b4580 1200 else
1201 assert(0);
be516ebe 1202}
1203
1204static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1205{
3968e69e 1206 if (!(offset & 1) && (u_int)offset <= 8190) {
1207 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1208 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1209 }
1210 else if (-256 <= offset && offset < 256) {
1211 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1212 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1213 }
687b4580 1214 else
1215 assert(0);
be516ebe 1216}
1217
1218static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1219{
3968e69e 1220 if ((u_int)offset < 4096) {
1221 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1222 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1223 }
1224 else if (-256 <= offset && offset < 256) {
1225 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1226 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1227 }
687b4580 1228 else
1229 assert(0);
be516ebe 1230}
1231
3968e69e 1232static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1233{
3968e69e 1234 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1235 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1236}
1237
3968e69e 1238static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1239{
3968e69e 1240 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1241 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1242}
1243
1244static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1245{
1246 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1247 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1248}
1249
1250static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1251{
1252 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1253 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1254}
1255
3968e69e 1256static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1257{
1258 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1259 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1260}
1261
1262static void emit_clz(u_int rs, u_int rt)
be516ebe 1263{
1264 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1265 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1266}
1267
be516ebe 1268// special case for checking invalid_code
9b495f6e 1269static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1270{
1271 emit_shrimm(r, 12, rt);
1272 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1273 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
be516ebe 1274}
1275
3968e69e 1276// special for loadlr_assemble, rs2 is destroyed
1277static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1278{
3968e69e 1279 emit_shl(rs2, shift, rs2);
1280 emit_bic(rs1, rs2, rt);
be516ebe 1281}
1282
3968e69e 1283static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1284{
3968e69e 1285 emit_shr(rs2, shift, rs2);
1286 emit_bic(rs1, rs2, rt);
be516ebe 1287}
1288
687b4580 1289static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1290{
687b4580 1291 u_int op = 0xb9000000;
d1e4ebd9 1292 unused const char *ldst = is_st ? "st" : "ld";
1293 unused char rp = is64 ? 'x' : 'w';
687b4580 1294 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1295 is64 = is64 ? 1 : 0;
1296 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1297 ofs = (ofs >> (2+is64));
687b4580 1298 if (!is_st) op |= 0x00400000;
1299 if (is64) op |= 0x40000000;
d1e4ebd9 1300 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1301}
1302
687b4580 1303static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1304{
687b4580 1305 u_int op = 0x29000000;
d1e4ebd9 1306 unused const char *ldst = is_st ? "st" : "ld";
1307 unused char rp = is64 ? 'x' : 'w';
687b4580 1308 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1309 is64 = is64 ? 1 : 0;
1310 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1311 ofs = (ofs >> (2+is64));
1312 assert(-64 <= ofs && ofs <= 63);
1313 ofs &= 0x7f;
1314 if (!is_st) op |= 0x00400000;
1315 if (is64) op |= 0x80000000;
d1e4ebd9 1316 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1317}
1318
1319static void save_load_regs_all(int is_store, u_int reglist)
1320{
1321 int ofs = 0, c = 0;
1322 u_int r, pair[2];
1323 for (r = 0; reglist; r++, reglist >>= 1) {
1324 if (reglist & 1)
1325 pair[c++] = r;
1326 if (c == 2) {
1327 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1328 ofs += 8 * 2;
1329 c = 0;
1330 }
1331 }
1332 if (c) {
1333 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1334 ofs += 8;
1335 }
1336 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1337}
1338
1339// Save registers before function call
1340static void save_regs(u_int reglist)
1341{
1342 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1343 save_load_regs_all(1, reglist);
be516ebe 1344}
1345
1346// Restore registers after function call
1347static void restore_regs(u_int reglist)
1348{
1349 reglist &= CALLER_SAVE_REGS;
687b4580 1350 save_load_regs_all(0, reglist);
be516ebe 1351}
1352
1353/* Stubs/epilogue */
1354
1355static void literal_pool(int n)
1356{
1357 (void)literals;
1358}
1359
1360static void literal_pool_jumpover(int n)
1361{
1362}
1363
d1e4ebd9 1364// parsed by get_pointer, find_extjump_insn
104df9d3 1365static void emit_extjump(u_char *addr, u_int target)
be516ebe 1366{
d1e4ebd9 1367 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1368
d1e4ebd9 1369 emit_movz(target & 0xffff, 0);
1370 emit_movk_lsl16(target >> 16, 0);
1371
1372 // addr is in the current recompiled block (max 256k)
1373 // offset shouldn't exceed +/-1MB
1374 emit_adr(addr, 1);
104df9d3 1375 emit_far_jump(dyna_linker);
be516ebe 1376}
1377
d1e4ebd9 1378static void check_extjump2(void *src)
be516ebe 1379{
d1e4ebd9 1380 u_int *ptr = src;
1381 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1382 (void)ptr;
be516ebe 1383}
1384
1385// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1386static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1387{
d1e4ebd9 1388 int diff = rt_val - rs_val;
3968e69e 1389 if ((-4096 < diff && diff < 4096)
1390 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1391 emit_addimm(rs, diff, rt);
3968e69e 1392 else if (rt_val == ~rs_val)
1393 emit_not(rs, rt);
d1e4ebd9 1394 else if (is_rotated_mask(rs_val ^ rt_val))
1395 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1396 else
d1e4ebd9 1397 emit_movimm(rt_val, rt);
be516ebe 1398}
1399
d1e4ebd9 1400// return 1 if the above function can do it's job cheaply
687b4580 1401static int is_similar_value(u_int v1, u_int v2)
be516ebe 1402{
687b4580 1403 int diff = v1 - v2;
3968e69e 1404 return (-4096 < diff && diff < 4096)
1405 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1406 || v1 == ~v2
d1e4ebd9 1407 || is_rotated_mask(v1 ^ v2);
1408}
1409
37387d8b 1410static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1411{
1412 if (rt_val < 0x100000000ull) {
1413 emit_movimm_from(rs_val, rs, rt_val, rt);
1414 return;
1415 }
1416 // just move the whole thing. At least on Linux all addresses
1417 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1418 emit_movimm64(rt_val, rt);
37387d8b 1419}
1420
1421// trashes x2
d1e4ebd9 1422static void pass_args64(u_int a0, u_int a1)
1423{
1424 if(a0==1&&a1==0) {
1425 // must swap
1426 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1427 }
1428 else if(a0!=0&&a1==0) {
1429 emit_mov64(a1,1);
1430 if (a0>=0) emit_mov64(a0,0);
1431 }
1432 else {
1433 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1434 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1435 }
be516ebe 1436}
1437
d1e4ebd9 1438static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1439{
1440 switch(type) {
1441 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1442 case LOADBU_STUB:
1443 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1444 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1445 case LOADHU_STUB:
1446 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1447 case LOADW_STUB:
1448 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1449 default: assert(0);
d1e4ebd9 1450 }
1451}
1452
1453#include "pcsxmem.h"
be516ebe 1454//#include "pcsxmem_inline.c"
1455
1456static void do_readstub(int n)
1457{
1458 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1459 set_jump_target(stubs[n].addr, out);
1460 enum stub_type type = stubs[n].type;
1461 int i = stubs[n].a;
1462 int rs = stubs[n].b;
1463 const struct regstat *i_regs = (void *)stubs[n].c;
1464 u_int reglist = stubs[n].e;
1465 const signed char *i_regmap = i_regs->regmap;
1466 int rt;
a5cd72d0 1467 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1468 rt=get_reg(i_regmap,FTEMP);
1469 }else{
cf95b4f0 1470 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1471 }
1472 assert(rs>=0);
1473 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1474 void *restore_jump = NULL, *handler_jump = NULL;
1475 reglist|=(1<<rs);
1476 for (r = 0; r < HOST_CCREG; r++) {
1477 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1478 temp = r;
1479 break;
1480 }
1481 }
cf95b4f0 1482 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1483 reglist&=~(1<<rt);
1484 if(temp==-1) {
1485 save_regs(reglist);
1486 regs_saved=1;
1487 temp=(rs==0)?2:0;
1488 }
1489 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1490 temp2=1;
1491 emit_readdword(&mem_rtab,temp);
1492 emit_shrimm(rs,12,temp2);
1493 emit_readdword_dualindexedx8(temp,temp2,temp2);
1494 emit_adds64(temp2,temp2,temp2);
1495 handler_jump=out;
1496 emit_jc(0);
a5cd72d0 1497 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1498 switch(type) {
1499 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1500 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1501 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1502 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1503 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1504 default: assert(0);
d1e4ebd9 1505 }
1506 }
1507 if(regs_saved) {
1508 restore_jump=out;
1509 emit_jmp(0); // jump to reg restore
1510 }
1511 else
1512 emit_jmp(stubs[n].retaddr); // return address
1513 set_jump_target(handler_jump, out);
1514
1515 if(!regs_saved)
1516 save_regs(reglist);
1517 void *handler=NULL;
1518 if(type==LOADB_STUB||type==LOADBU_STUB)
1519 handler=jump_handler_read8;
1520 if(type==LOADH_STUB||type==LOADHU_STUB)
1521 handler=jump_handler_read16;
1522 if(type==LOADW_STUB)
1523 handler=jump_handler_read32;
1524 assert(handler);
1525 pass_args64(rs,temp2);
1526 int cc=get_reg(i_regmap,CCREG);
1527 if(cc<0)
1528 emit_loadreg(CCREG,2);
2330734f 1529 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1530 emit_far_call(handler);
d1e4ebd9 1531 // (no cycle reload after read)
a5cd72d0 1532 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1533 loadstore_extend(type,0,rt);
1534 }
1535 if(restore_jump)
1536 set_jump_target(restore_jump, out);
1537 restore_regs(reglist);
1538 emit_jmp(stubs[n].retaddr);
be516ebe 1539}
1540
81dbbf4c 1541static void inline_readstub(enum stub_type type, int i, u_int addr,
1542 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1543{
d1e4ebd9 1544 int rs=get_reg(regmap,target);
1545 int rt=get_reg(regmap,target);
9de8a0c3 1546 if(rs<0) rs=get_reg_temp(regmap);
d1e4ebd9 1547 assert(rs>=0);
1548 u_int is_dynamic=0;
1549 uintptr_t host_addr = 0;
1550 void *handler;
1551 int cc=get_reg(regmap,CCREG);
2330734f 1552 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1553 // return;
1554 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1555 if (handler == NULL) {
cf95b4f0 1556 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1557 return;
37387d8b 1558 if (addr != host_addr)
1559 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1560 switch(type) {
1561 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1562 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1563 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1564 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1565 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1566 default: assert(0);
1567 }
1568 return;
1569 }
37387d8b 1570 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1571 if (is_dynamic) {
d1e4ebd9 1572 if(type==LOADB_STUB||type==LOADBU_STUB)
1573 handler=jump_handler_read8;
1574 if(type==LOADH_STUB||type==LOADHU_STUB)
1575 handler=jump_handler_read16;
1576 if(type==LOADW_STUB)
1577 handler=jump_handler_read32;
1578 }
1579
1580 // call a memhandler
cf95b4f0 1581 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1582 reglist&=~(1<<rt);
1583 save_regs(reglist);
1584 if(target==0)
1585 emit_movimm(addr,0);
1586 else if(rs!=0)
1587 emit_mov(rs,0);
1588 if(cc<0)
1589 emit_loadreg(CCREG,2);
2330734f 1590 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1591 if(is_dynamic) {
1592 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1593 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1594 if (-4294967296l <= offset && offset < 4294967296l) {
1595 emit_adrp((void *)l1, 1);
1596 emit_addimm64(1, l1 & 0xfff, 1);
1597 }
1598 else
1599 emit_movimm64(l1, 1);
3968e69e 1600 }
d1e4ebd9 1601 else
2a014d73 1602 emit_far_call(do_memhandler_pre);
d1e4ebd9 1603
2a014d73 1604 emit_far_call(handler);
d1e4ebd9 1605
1606 // (no cycle reload after read)
cf95b4f0 1607 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1608 loadstore_extend(type, 0, rt);
1609 restore_regs(reglist);
be516ebe 1610}
1611
1612static void do_writestub(int n)
1613{
1614 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1615 set_jump_target(stubs[n].addr, out);
1616 enum stub_type type=stubs[n].type;
1617 int i=stubs[n].a;
1618 int rs=stubs[n].b;
1619 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1620 u_int reglist=stubs[n].e;
1621 signed char *i_regmap=i_regs->regmap;
1622 int rt,r;
a5cd72d0 1623 if(dops[i].itype==C2LS) {
d1e4ebd9 1624 rt=get_reg(i_regmap,r=FTEMP);
1625 }else{
cf95b4f0 1626 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1627 }
1628 assert(rs>=0);
1629 assert(rt>=0);
1630 int rtmp,temp=-1,temp2,regs_saved=0;
1631 void *restore_jump = NULL, *handler_jump = NULL;
1632 int reglist2=reglist|(1<<rs)|(1<<rt);
1633 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1634 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1635 temp = rtmp;
1636 break;
1637 }
1638 }
1639 if(temp==-1) {
1640 save_regs(reglist);
1641 regs_saved=1;
1642 for(rtmp=0;rtmp<=3;rtmp++)
1643 if(rtmp!=rs&&rtmp!=rt)
1644 {temp=rtmp;break;}
1645 }
1646 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1647 temp2=3;
1648 else {
1649 host_tempreg_acquire();
1650 temp2=HOST_TEMPREG;
1651 }
1652 emit_readdword(&mem_wtab,temp);
1653 emit_shrimm(rs,12,temp2);
1654 emit_readdword_dualindexedx8(temp,temp2,temp2);
1655 emit_adds64(temp2,temp2,temp2);
1656 handler_jump=out;
1657 emit_jc(0);
1658 switch(type) {
1659 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1660 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1661 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1662 default: assert(0);
1663 }
1664 if(regs_saved) {
1665 restore_jump=out;
1666 emit_jmp(0); // jump to reg restore
1667 }
1668 else
1669 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1670 set_jump_target(handler_jump, out);
1671
d1e4ebd9 1672 if(!regs_saved)
1673 save_regs(reglist);
1674 void *handler=NULL;
1675 switch(type) {
1676 case STOREB_STUB: handler=jump_handler_write8; break;
1677 case STOREH_STUB: handler=jump_handler_write16; break;
1678 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1679 default: assert(0);
d1e4ebd9 1680 }
1681 assert(handler);
1682 pass_args(rs,rt);
1683 if(temp2!=3) {
1684 emit_mov64(temp2,3);
1685 host_tempreg_release();
1686 }
1687 int cc=get_reg(i_regmap,CCREG);
1688 if(cc<0)
1689 emit_loadreg(CCREG,2);
2330734f 1690 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1691 // returns new cycle_count
2a014d73 1692 emit_far_call(handler);
2330734f 1693 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1694 if(cc<0)
1695 emit_storereg(CCREG,2);
1696 if(restore_jump)
1697 set_jump_target(restore_jump, out);
1698 restore_regs(reglist);
1699 emit_jmp(stubs[n].retaddr);
be516ebe 1700}
1701
81dbbf4c 1702static void inline_writestub(enum stub_type type, int i, u_int addr,
1703 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1704{
9de8a0c3 1705 int rs = get_reg_temp(regmap);
687b4580 1706 int rt = get_reg(regmap,target);
1707 assert(rs >= 0);
1708 assert(rt >= 0);
1709 uintptr_t host_addr = 0;
1710 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1711 if (handler == NULL) {
37387d8b 1712 if (addr != host_addr)
1713 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1714 switch (type) {
687b4580 1715 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1716 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1717 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1718 default: assert(0);
1719 }
1720 return;
1721 }
1722
1723 // call a memhandler
1724 save_regs(reglist);
687b4580 1725 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1726 loadstore_extend(type, rt, 0);
1727 int cc, cc_use;
1728 cc = cc_use = get_reg(regmap, CCREG);
1729 if (cc < 0)
1730 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1731 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1732
2a014d73 1733 emit_far_call(do_memhandler_pre);
1734 emit_far_call(handler);
1735 emit_far_call(do_memhandler_post);
2330734f 1736 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1737 if (cc < 0)
1738 emit_storereg(CCREG, cc_use);
687b4580 1739 restore_regs(reglist);
be516ebe 1740}
1741
3968e69e 1742/* Special assem */
1743
81dbbf4c 1744static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1745{
1746 save_load_regs_all(1, reglist);
32631e6a 1747 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1748#ifdef PCNT
1749 emit_movimm(op, 0);
2a014d73 1750 emit_far_call(pcnt_gte_start);
3968e69e 1751#endif
1752 // pointer to cop2 regs
1753 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1754}
1755
1756static void c2op_epilogue(u_int op,u_int reglist)
1757{
1758#ifdef PCNT
1759 emit_movimm(op, 0);
2a014d73 1760 emit_far_call(pcnt_gte_end);
3968e69e 1761#endif
1762 save_load_regs_all(0, reglist);
be516ebe 1763}
1764
81dbbf4c 1765static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1766{
3968e69e 1767 u_int c2op=source[i]&0x3f;
1768 u_int hr,reglist_full=0,reglist;
1769 int need_flags,need_ir;
1770 for(hr=0;hr<HOST_REGS;hr++) {
1771 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1772 }
1773 reglist=reglist_full&CALLER_SAVE_REGS;
1774
1775 if (gte_handlers[c2op]!=NULL) {
1776 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1777 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1778 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1779 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1780 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1781 need_flags=0;
1782 //int shift = (source[i] >> 19) & 1;
1783 //int lm = (source[i] >> 10) & 1;
1784 switch(c2op) {
1785 default:
1786 (void)need_ir;
81dbbf4c 1787 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1788 emit_movimm(source[i],1); // opcode
1789 emit_writeword(1,&psxRegs.code);
2a014d73 1790 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1791 break;
1792 }
1793 c2op_epilogue(c2op,reglist);
1794 }
1795}
1796
1797static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1798{
1799 //value = value & 0x7ffff000;
1800 //if (value & 0x7f87e000) value |= 0x80000000;
1801 emit_andimm(sl, 0x7fffe000, temp);
1802 emit_testimm(temp, 0xff87ffff);
1803 emit_andimm(sl, 0x7ffff000, temp);
1804 host_tempreg_acquire();
1805 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1806 emit_cmovne_reg(HOST_TEMPREG, temp);
1807 host_tempreg_release();
1808 assert(0); // testing needed
1809}
1810
1811static void do_mfc2_31_one(u_int copr,signed char temp)
1812{
1813 emit_readshword(&reg_cop2d[copr],temp);
1814 emit_bicsar_imm(temp,31,temp);
1815 emit_cmpimm(temp,0xf80);
1816 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1817 emit_andimm(temp,0xf80,temp);
1818}
1819
1820static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1821{
1822 if (temp < 0) {
1823 host_tempreg_acquire();
1824 temp = HOST_TEMPREG;
1825 }
1826 do_mfc2_31_one(9,temp);
1827 emit_shrimm(temp,7,tl);
1828 do_mfc2_31_one(10,temp);
1829 emit_orrshr_imm(temp,2,tl);
1830 do_mfc2_31_one(11,temp);
1831 emit_orrshl_imm(temp,3,tl);
1832 emit_writeword(tl,&reg_cop2d[29]);
1833
1834 if (temp == HOST_TEMPREG)
1835 host_tempreg_release();
be516ebe 1836}
1837
2330734f 1838static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1839{
3968e69e 1840 // case 0x18: MULT
1841 // case 0x19: MULTU
1842 // case 0x1A: DIV
1843 // case 0x1B: DIVU
cf95b4f0 1844 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1845 {
cf95b4f0 1846 switch(dops[i].opcode2)
3968e69e 1847 {
1848 case 0x18: // MULT
1849 case 0x19: // MULTU
1850 {
cf95b4f0 1851 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1852 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1853 signed char hi=get_reg(i_regs->regmap,HIREG);
1854 signed char lo=get_reg(i_regs->regmap,LOREG);
1855 assert(m1>=0);
1856 assert(m2>=0);
1857 assert(hi>=0);
1858 assert(lo>=0);
1859
cf95b4f0 1860 if(dops[i].opcode2==0x18) // MULT
3968e69e 1861 emit_smull(m1,m2,hi);
1862 else // MULTU
1863 emit_umull(m1,m2,hi);
1864
1865 emit_mov(hi,lo);
1866 emit_shrimm64(hi,32,hi);
1867 break;
1868 }
1869 case 0x1A: // DIV
1870 case 0x1B: // DIVU
1871 {
cf95b4f0 1872 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1873 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1874 signed char quotient=get_reg(i_regs->regmap,LOREG);
1875 signed char remainder=get_reg(i_regs->regmap,HIREG);
1876 assert(numerator>=0);
1877 assert(denominator>=0);
1878 assert(quotient>=0);
1879 assert(remainder>=0);
1880
cf95b4f0 1881 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1882 emit_sdiv(numerator,denominator,quotient);
1883 else // DIVU
1884 emit_udiv(numerator,denominator,quotient);
1885 emit_msub(quotient,denominator,numerator,remainder);
1886
1887 // div 0 quotient (remainder is already correct)
1888 host_tempreg_acquire();
a5cd72d0 1889 if (dops[i].opcode2 == 0x1A) { // DIV
1890 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1891 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1892 }
3968e69e 1893 else
1894 emit_movimm(~0,HOST_TEMPREG);
1895 emit_test(denominator,denominator);
1896 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1897 host_tempreg_release();
1898 break;
1899 }
1900 default:
1901 assert(0);
1902 }
1903 }
1904 else
1905 {
1906 signed char hr=get_reg(i_regs->regmap,HIREG);
1907 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1908 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1909 {
cf95b4f0 1910 if (dops[i].rs1) {
1911 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1912 assert(numerator >= 0);
1913 if (hr >= 0)
1914 emit_mov(numerator,hr);
1915 if (lr >= 0) {
a5cd72d0 1916 if (dops[i].opcode2 == 0x1A) { // DIV
1917 emit_add_lsrimm(WZR,numerator,31,lr);
1918 emit_orn_asrimm(lr,numerator,31,lr);
1919 }
3968e69e 1920 else
1921 emit_movimm(~0,lr);
1922 }
1923 }
1924 else {
1925 if (hr >= 0) emit_zeroreg(hr);
1926 if (lr >= 0) emit_movimm(~0,lr);
1927 }
1928 }
a5cd72d0 1929 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1930 {
1931 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1932 assert(denominator >= 0);
1933 if (hr >= 0) emit_zeroreg(hr);
1934 if (lr >= 0) {
1935 emit_zeroreg(lr);
1936 emit_test(denominator, denominator);
1937 emit_csinvne_reg(lr, lr, lr);
1938 }
1939 }
3968e69e 1940 else
1941 {
1942 // Multiply by zero is zero.
1943 if (hr >= 0) emit_zeroreg(hr);
1944 if (lr >= 0) emit_zeroreg(lr);
1945 }
1946 }
be516ebe 1947}
1948#define multdiv_assemble multdiv_assemble_arm64
1949
d1e4ebd9 1950static void do_jump_vaddr(u_int rs)
1951{
1952 if (rs != 0)
1953 emit_mov(rs, 0);
104df9d3 1954 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1955 emit_jmpreg(0);
1956}
1957
be516ebe 1958static void do_preload_rhash(u_int r) {
1959 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1960 // register. On ARM the hash can be done with a single instruction (below)
1961}
1962
1963static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1964 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1965}
1966
1967static void do_rhash(u_int rs,u_int rh) {
1968 emit_andimm(rs, 0xf8, rh);
1969}
1970
d1e4ebd9 1971static void do_miniht_load(int ht, u_int rh) {
1972 emit_add64(ht, rh, ht);
1973 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1974}
1975
d1e4ebd9 1976static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1977 emit_cmp(rh, rs);
1978 void *jaddr = out;
1979 emit_jeq(0);
1980 do_jump_vaddr(rs);
1981
1982 set_jump_target(jaddr, out);
1983 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1984 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1985 emit_jmpreg(ht);
be516ebe 1986}
1987
d1e4ebd9 1988// parsed by set_jump_target?
be516ebe 1989static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1990 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1991 emit_movk(return_address&0xffff,rt);
1992 add_to_linker(out,return_address,1);
1993 emit_adr(out,temp);
1994 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1995 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1996}
1997
d9e2b173 1998static unused void clear_cache_arm64(char *start, char *end)
be516ebe 1999{
919981d0 2000 // Don't rely on GCC's __clear_cache implementation, as it caches
2001 // icache/dcache cache line sizes, that can vary between cores on
2002 // big.LITTLE architectures.
2003 uint64_t addr, ctr_el0;
2004 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2005 size_t isize, dsize;
2006
2007 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2008 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2009 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2010
2011 // use the global minimum cache line size
2012 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2013 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2014
2015 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2016 not required for instruction to data coherence. */
2017 if ((ctr_el0 & (1 << 28)) == 0x0) {
2018 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2019 for (; addr < (uint64_t)end; addr += dsize)
2020 // use "civac" instead of "cvau", as this is the suggested workaround for
2021 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2022 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2023 }
919981d0 2024 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2025
919981d0 2026 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2027 Unification is not required for instruction to data coherence. */
2028 if ((ctr_el0 & (1 << 29)) == 0x0) {
2029 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2030 for (; addr < (uint64_t)end; addr += isize)
2031 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2032
2033 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2034 }
919981d0 2035
2036 __asm__ volatile("isb" : : : "memory");
be516ebe 2037}
2038
2039// CPU-architecture-specific initialization
2a014d73 2040static void arch_init(void)
2041{
2042 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 2043 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 2044 size_t i;
2045 assert(!(diff & 3));
d9e2b173 2046 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 2047 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 2048 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2049 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 2050 }
2051 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2052}
2053
2054// vim:shiftwidth=2:expandtab