drc: optional address error exception support
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26/* Linker */
d1e4ebd9 27static void set_jump_target(void *addr, void *target)
be516ebe 28{
d9e2b173 29 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 30 intptr_t offset = (u_char *)target - (u_char *)addr;
31
3968e69e 32 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 33 assert(offset>=-134217728LL&&offset<134217728LL);
34 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
35 }
3968e69e 36 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
37 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 38 // Conditional branch are limited to +/- 1MB
39 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 40 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 41 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 42 assert(-1048576 <= offset && offset < 1048576);
4a2e3735 43 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
d1e4ebd9 44 }
3968e69e 45 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 46 // generated by do_miniht_insert
47 assert(offset>=-1048576LL&&offset<1048576LL);
48 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
49 }
50 else
3968e69e 51 abort(); // should not happen
be516ebe 52}
53
54// from a pointer to external jump stub (which was produced by emit_extjump2)
55// find where the jumping insn is
56static void *find_extjump_insn(void *stub)
57{
d1e4ebd9 58 int *ptr = (int *)stub + 2;
59 assert((*ptr&0x9f000000) == 0x10000000); // adr
60 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
61 return ptr + offset / 4;
be516ebe 62}
63
104df9d3 64#if 0
be516ebe 65// find where external branch is liked to using addr of it's stub:
3968e69e 66// get address that the stub loads (dyna_linker arg1),
be516ebe 67// treat it as a pointer to branch insn,
68// return addr where that branch jumps to
69static void *get_pointer(void *stub)
70{
d1e4ebd9 71 int *i_ptr = find_extjump_insn(stub);
3968e69e 72 if ((*i_ptr&0xfc000000) == 0x14000000) // b
73 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
74 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
75 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
76 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 77 assert(0);
78 return NULL;
79}
104df9d3 80#endif
be516ebe 81
be516ebe 82// Allocate a specific ARM register.
83static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
84{
85 int n;
86 int dirty=0;
87
88 // see if it's already allocated (and dealloc it)
89 for(n=0;n<HOST_REGS;n++)
90 {
91 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
92 dirty=(cur->dirty>>n)&1;
93 cur->regmap[n]=-1;
94 }
95 }
96
97 cur->regmap[hr]=reg;
98 cur->dirty&=~(1<<hr);
99 cur->dirty|=dirty<<hr;
100 cur->isconst&=~(1<<hr);
101}
102
103// Alloc cycle count into dedicated register
104static void alloc_cc(struct regstat *cur,int i)
105{
106 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
107}
108
109/* Special alloc */
110
111
112/* Assembler */
113
114static unused const char *regname[32] = {
d1e4ebd9 115 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
116 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
117 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
118 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
119};
120
121static unused const char *regname64[32] = {
122 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
123 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
124 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
125 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
126};
127
128enum {
129 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
130 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
131};
132
133static unused const char *condname[16] = {
134 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
135 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 136};
137
be516ebe 138static void output_w32(u_int word)
139{
d9e2b173 140 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 141 out += 4;
142}
143
3968e69e 144static u_int rn_rd(u_int rn, u_int rd)
145{
146 assert(rn < 31);
147 assert(rd < 31);
148 return (rn << 5) | rd;
149}
150
be516ebe 151static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
152{
d1e4ebd9 153 assert(rm < 32);
154 assert(rn < 32);
155 assert(rd < 32);
be516ebe 156 return (rm << 16) | (rn << 5) | rd;
157}
158
3968e69e 159static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
160{
161 assert(ra < 32);
162 return rm_rn_rd(rm, rn, rd) | (ra << 10);
163}
164
d1e4ebd9 165static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
166{
167 assert(imm7 < 0x80);
168 assert(rt2 < 31);
169 assert(rn < 32);
170 assert(rt < 31);
171 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
172}
173
687b4580 174static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
175{
176 assert(imm6 <= 63);
177 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
178}
179
be516ebe 180static u_int imm16_rd(u_int imm16, u_int rd)
181{
182 assert(imm16 < 0x10000);
183 assert(rd < 31);
184 return (imm16 << 5) | rd;
185}
186
687b4580 187static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
188{
189 assert(imm12 < 0x1000);
d1e4ebd9 190 assert(rn < 32);
191 assert(rd < 32);
192 return (imm12 << 10) | (rn << 5) | rd;
193}
194
195static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
196{
197 assert(imm9 < 0x200);
687b4580 198 assert(rn < 31);
199 assert(rd < 31);
d1e4ebd9 200 return (imm9 << 12) | (rn << 5) | rd;
687b4580 201}
202
d1e4ebd9 203static u_int imm19_rt(u_int imm19, u_int rt)
204{
205 assert(imm19 < 0x80000);
206 assert(rt < 31);
207 return (imm19 << 5) | rt;
208}
209
210static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
211{
212 assert(n < 2);
213 assert(immr < 0x40);
214 assert(imms < 0x40);
215 assert(rn < 32);
216 assert(rd < 32);
217 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
218}
219
220static u_int genjmp(const u_char *addr)
be516ebe 221{
222 intptr_t offset = addr - out;
d1e4ebd9 223 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 224 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 225 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
226 abort();
be516ebe 227 return 0;
228 }
d1e4ebd9 229 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 230}
231
d1e4ebd9 232static u_int genjmpcc(const u_char *addr)
be516ebe 233{
234 intptr_t offset = addr - out;
d1e4ebd9 235 if ((uintptr_t)addr < 3) return 0;
be516ebe 236 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 237 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
238 abort();
239 return 0;
240 }
241 return ((u_int)offset >> 2) & 0x7ffff;
242}
243
244static uint32_t is_mask(u_int value)
245{
246 return value && ((value + 1) & value) == 0;
247}
248
249// This function returns true if the argument contains a
250// non-empty sequence of ones (possibly rotated) with the remainder zero.
251static uint32_t is_rotated_mask(u_int value)
252{
3968e69e 253 if (value == 0 || value == ~0)
be516ebe 254 return 0;
d1e4ebd9 255 if (is_mask((value - 1) | value))
256 return 1;
257 return is_mask((~value - 1) | ~value);
258}
259
260static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
261{
262 int lzeros, tzeros, ones;
263 assert(value != 0);
264 if (is_mask((value - 1) | value)) {
265 lzeros = __builtin_clz(value);
266 tzeros = __builtin_ctz(value);
267 ones = 32 - lzeros - tzeros;
268 *immr = (32 - tzeros) & 31;
269 *imms = ones - 1;
270 return;
be516ebe 271 }
d1e4ebd9 272 value = ~value;
273 if (is_mask((value - 1) | value)) {
274 lzeros = __builtin_clz(value);
275 tzeros = __builtin_ctz(value);
276 ones = 32 - lzeros - tzeros;
3968e69e 277 *immr = lzeros;
d1e4ebd9 278 *imms = 31 - ones;
279 return;
280 }
3968e69e 281 abort();
be516ebe 282}
283
284static void emit_mov(u_int rs, u_int rt)
285{
687b4580 286 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 287 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
288}
289
290static void emit_mov64(u_int rs, u_int rt)
291{
292 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
293 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 294}
295
687b4580 296static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 297{
d1e4ebd9 298 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
299 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 300}
301
a5cd72d0 302static void emit_adds(u_int rs1, u_int rs2, u_int rt)
303{
304 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
305 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
306}
307
d1e4ebd9 308static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 309{
d1e4ebd9 310 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
311 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 312}
313
d1e4ebd9 314static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 315{
3968e69e 316 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 317 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
318}
39b71d9a 319#define emit_adds_ptr emit_adds64
d1e4ebd9 320
a5cd72d0 321static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
322{
323 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
324 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
325}
326
d1e4ebd9 327static void emit_neg(u_int rs, u_int rt)
328{
329 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
330 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 331}
332
a5cd72d0 333static void emit_negs(u_int rs, u_int rt)
334{
335 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
337}
338
687b4580 339static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 340{
d1e4ebd9 341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 343}
344
a5cd72d0 345static void emit_subs(u_int rs1, u_int rs2, u_int rt)
346{
347 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
348 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
349}
350
351static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
3968e69e 352{
353 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
354 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
355}
356
d1e4ebd9 357static void emit_movz(u_int imm, u_int rt)
be516ebe 358{
d1e4ebd9 359 assem_debug("movz %s,#%#x\n", regname[rt], imm);
360 output_w32(0x52800000 | imm16_rd(imm, rt));
361}
362
363static void emit_movz_lsl16(u_int imm, u_int rt)
364{
365 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
366 output_w32(0x52a00000 | imm16_rd(imm, rt));
367}
368
369static void emit_movn(u_int imm, u_int rt)
370{
371 assem_debug("movn %s,#%#x\n", regname[rt], imm);
372 output_w32(0x12800000 | imm16_rd(imm, rt));
373}
374
375static void emit_movn_lsl16(u_int imm,u_int rt)
376{
377 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
378 output_w32(0x12a00000 | imm16_rd(imm, rt));
379}
380
381static void emit_movk(u_int imm,u_int rt)
382{
383 assem_debug("movk %s,#%#x\n", regname[rt], imm);
384 output_w32(0x72800000 | imm16_rd(imm, rt));
385}
386
387static void emit_movk_lsl16(u_int imm,u_int rt)
388{
389 assert(imm<65536);
3968e69e 390 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 391 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 392}
393
394static void emit_zeroreg(u_int rt)
395{
d1e4ebd9 396 emit_movz(0, rt);
be516ebe 397}
398
be516ebe 399static void emit_movimm(u_int imm, u_int rt)
400{
d1e4ebd9 401 if (imm < 65536)
402 emit_movz(imm, rt);
403 else if ((~imm) < 65536)
404 emit_movn(~imm, rt);
405 else if ((imm&0xffff) == 0)
406 emit_movz_lsl16(imm >> 16, rt);
407 else if (((~imm)&0xffff) == 0)
408 emit_movn_lsl16(~imm >> 16, rt);
409 else if (is_rotated_mask(imm)) {
410 u_int immr, imms;
411 gen_logical_imm(imm, &immr, &imms);
412 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
413 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
414 }
be516ebe 415 else {
d1e4ebd9 416 emit_movz(imm & 0xffff, rt);
417 emit_movk_lsl16(imm >> 16, rt);
be516ebe 418 }
419}
420
aaece508 421static void emit_movimm64(uint64_t imm, u_int rt)
422{
423 u_int shift, op, imm16, insns = 0;
424 for (shift = 0; shift < 4; shift++) {
425 imm16 = (imm >> shift * 16) & 0xffff;
426 if (!imm16)
427 continue;
428 op = insns ? 0xf2800000 : 0xd2800000;
429 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
430 if (shift)
431 assem_debug(",lsl #%u", shift * 16);
432 assem_debug("\n");
433 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
434 insns++;
435 }
436 if (!insns) {
437 assem_debug("movz %s,#0\n", regname64[rt]);
438 output_w32(0xd2800000 | imm16_rd(0, rt));
439 }
440}
441
687b4580 442static void emit_readword(void *addr, u_int rt)
443{
444 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
445 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 446 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 447 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
448 }
449 else
3968e69e 450 abort();
687b4580 451}
452
d1e4ebd9 453static void emit_readdword(void *addr, u_int rt)
454{
455 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
456 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 457 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
d1e4ebd9 458 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
459 }
3968e69e 460 else
461 abort();
462}
39b71d9a 463#define emit_readptr emit_readdword
3968e69e 464
465static void emit_readshword(void *addr, u_int rt)
466{
467 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
468 if (!(offset & 1) && offset <= 8190) {
469 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
470 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
471 }
d1e4ebd9 472 else
473 assert(0);
474}
475
be516ebe 476static void emit_loadreg(u_int r, u_int hr)
477{
d1e4ebd9 478 int is64 = 0;
be516ebe 479 if (r == 0)
480 emit_zeroreg(hr);
481 else {
33788798 482 void *addr;
be516ebe 483 switch (r) {
7c3a5182 484 //case HIREG: addr = &hi; break;
485 //case LOREG: addr = &lo; break;
be516ebe 486 case CCREG: addr = &cycle_count; break;
bc7c5acb 487 case CSREG: addr = &psxRegs.CP0.n.SR; break;
d1e4ebd9 488 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 489 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 490 default:
491 assert(r < 34);
492 addr = &psxRegs.GPR.r[r];
493 break;
be516ebe 494 }
d1e4ebd9 495 if (is64)
496 emit_readdword(addr, hr);
497 else
498 emit_readword(addr, hr);
be516ebe 499 }
500}
501
687b4580 502static void emit_writeword(u_int rt, void *addr)
503{
504 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
505 if (!(offset & 3) && offset <= 16380) {
a5cd72d0 506 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
687b4580 507 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
508 }
509 else
510 assert(0);
511}
512
d1e4ebd9 513static void emit_writedword(u_int rt, void *addr)
514{
515 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
516 if (!(offset & 7) && offset <= 32760) {
a5cd72d0 517 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
3968e69e 518 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 519 }
520 else
3968e69e 521 abort();
d1e4ebd9 522}
523
687b4580 524static void emit_storereg(u_int r, u_int hr)
be516ebe 525{
526 assert(r < 64);
7c3a5182 527 void *addr = &psxRegs.GPR.r[r];
be516ebe 528 switch (r) {
7c3a5182 529 //case HIREG: addr = &hi; break;
530 //case LOREG: addr = &lo; break;
be516ebe 531 case CCREG: addr = &cycle_count; break;
7c3a5182 532 default: assert(r < 34); break;
be516ebe 533 }
687b4580 534 emit_writeword(hr, addr);
be516ebe 535}
536
537static void emit_test(u_int rs, u_int rt)
538{
d1e4ebd9 539 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
540 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 541}
542
d1e4ebd9 543static void emit_testimm(u_int rs, u_int imm)
be516ebe 544{
d1e4ebd9 545 u_int immr, imms;
687b4580 546 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 547 assert(is_rotated_mask(imm)); // good enough for PCSX
548 gen_logical_imm(imm, &immr, &imms);
3968e69e 549 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 550}
551
552static void emit_not(u_int rs,u_int rt)
553{
554 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 555 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 556}
557
be516ebe 558static void emit_and(u_int rs1,u_int rs2,u_int rt)
559{
560 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 561 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 562}
563
564static void emit_or(u_int rs1,u_int rs2,u_int rt)
565{
566 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 567 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 568}
569
3968e69e 570static void emit_bic(u_int rs1,u_int rs2,u_int rt)
571{
572 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
573 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
574}
575
be516ebe 576static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
577{
be516ebe 578 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 579 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 580}
581
582static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
583{
be516ebe 584 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 585 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 586}
587
a5cd72d0 588static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
589{
590 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
591 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
592}
593
3968e69e 594static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
595{
596 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
597 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
598}
599
be516ebe 600static void emit_xor(u_int rs1,u_int rs2,u_int rt)
601{
602 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 603 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 604}
605
3968e69e 606static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
607{
608 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
609 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
610}
611
d1e4ebd9 612static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 613{
d1e4ebd9 614 unused const char *st = s ? "s" : "";
615 s = s ? 0x20000000 : 0;
616 is64 = is64 ? 0x80000000 : 0;
687b4580 617 if (imm < 4096) {
d1e4ebd9 618 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
619 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 620 }
621 else if (-imm < 4096) {
3968e69e 622 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 623 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
624 }
a5cd72d0 625 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
626 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
627 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
628 if (imm & 0xfff) {
629 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
630 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 631 }
632 }
a5cd72d0 633 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
634 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
635 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
636 if (-imm & 0xfff) {
637 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
638 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
d1e4ebd9 639 }
687b4580 640 }
a5cd72d0 641 else {
642 u_int tmp = rt;
643 assert(!is64);
644 if (rs == rt) {
645 host_tempreg_acquire();
646 tmp = HOST_TEMPREG;
647 }
648 emit_movimm(imm, tmp);
649 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
650 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
651 if (tmp == HOST_TEMPREG)
652 host_tempreg_release();
653 }
be516ebe 654}
655
d1e4ebd9 656static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
657{
9b495f6e 658 if (imm == 0) {
659 emit_mov(rs, rt);
660 return;
661 }
d1e4ebd9 662 emit_addimm_s(0, 0, rs, imm, rt);
663}
664
665static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
666{
667 emit_addimm_s(0, 1, rs, imm, rt);
668}
669
bc7c5acb 670static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
671{
672 emit_addimm64(rs, imm, rt);
673}
674
be516ebe 675static void emit_addimm_and_set_flags(int imm, u_int rt)
676{
d1e4ebd9 677 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 678}
679
a5cd72d0 680static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
681{
682 emit_addimm_s(1, 0, rs, imm, rt);
683}
684
d1e4ebd9 685static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 686{
d1e4ebd9 687 const char *names[] = { "and", "orr", "eor", "ands" };
688 const char *name = names[op];
689 u_int immr, imms;
690 op = op << 29;
691 if (is_rotated_mask(imm)) {
692 gen_logical_imm(imm, &immr, &imms);
693 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
694 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
695 }
696 else {
697 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
698 host_tempreg_acquire();
699 emit_movimm(imm, HOST_TEMPREG);
700 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
701 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
702 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
703 host_tempreg_release();
704 }
705 (void)name;
be516ebe 706}
707
d1e4ebd9 708static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 709{
d1e4ebd9 710 if (imm == 0)
711 emit_zeroreg(rt);
712 else
713 emit_logicop_imm(0, rs, imm, rt);
be516ebe 714}
715
d1e4ebd9 716static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 717{
d1e4ebd9 718 if (imm == 0) {
719 if (rs != rt)
720 emit_mov(rs, rt);
721 }
722 else
723 emit_logicop_imm(1, rs, imm, rt);
be516ebe 724}
725
d1e4ebd9 726static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 727{
d1e4ebd9 728 if (imm == 0) {
729 if (rs != rt)
730 emit_mov(rs, rt);
731 }
732 else
733 emit_logicop_imm(2, rs, imm, rt);
be516ebe 734}
735
d1e4ebd9 736static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 737{
d1e4ebd9 738 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
739 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 740}
741
d1e4ebd9 742static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 743{
d1e4ebd9 744 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
745 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 746}
747
748static void emit_shlimm(u_int rs,u_int imm,u_int rt)
749{
be516ebe 750 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 751 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 752}
753
3968e69e 754static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 755{
3968e69e 756 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 758}
759
3968e69e 760static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 761{
be516ebe 762 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 763 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 764}
765
766static void emit_sarimm(u_int rs,u_int imm,u_int rt)
767{
be516ebe 768 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 769 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 770}
771
772static void emit_rorimm(u_int rs,u_int imm,u_int rt)
773{
3968e69e 774 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 775 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 776}
777
778static void emit_signextend16(u_int rs, u_int rt)
779{
780 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 781 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 782}
783
d1e4ebd9 784static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 785{
3968e69e 786 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 787 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 788}
789
d1e4ebd9 790static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 791{
d1e4ebd9 792 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
793 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 794}
795
d1e4ebd9 796static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 797{
d1e4ebd9 798 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
799 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 800}
801
d1e4ebd9 802static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 803{
d1e4ebd9 804 if (imm < 4096) {
805 assem_debug("cmp %s,%#x\n", regname[rs], imm);
806 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
807 }
808 else if (-imm < 4096) {
809 assem_debug("cmn %s,%#x\n", regname[rs], imm);
810 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
811 }
812 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 813 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 814 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
815 }
816 else {
817 host_tempreg_acquire();
818 emit_movimm(imm, HOST_TEMPREG);
819 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
820 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
821 host_tempreg_release();
822 }
be516ebe 823}
824
d1e4ebd9 825static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 826{
d1e4ebd9 827 assert(imm == 0 || imm == 1);
828 assert(cond0 < 0x10);
829 assert(cond1 < 0x10);
830 if (imm) {
831 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
832 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
833 } else {
834 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
835 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
836 }
be516ebe 837}
838
d1e4ebd9 839static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 840{
d1e4ebd9 841 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 842}
843
d1e4ebd9 844static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 845{
d1e4ebd9 846 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 847}
848
849static void emit_cmovb_imm(int imm,u_int rt)
850{
d1e4ebd9 851 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 852}
853
3968e69e 854static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 855{
3968e69e 856 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
857 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 858}
859
860static void emit_cmovne_reg(u_int rs,u_int rt)
861{
d1e4ebd9 862 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
863 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 864}
865
866static void emit_cmovl_reg(u_int rs,u_int rt)
867{
d1e4ebd9 868 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
869 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 870}
871
e3c6bdb5 872static void emit_cmovb_reg(u_int rs,u_int rt)
873{
874 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
875 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
876}
877
be516ebe 878static void emit_cmovs_reg(u_int rs,u_int rt)
879{
d1e4ebd9 880 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
881 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 882}
883
3968e69e 884static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
885{
886 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
887 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
888}
889
a5cd72d0 890static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
891{
892 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
893 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
894}
895
be516ebe 896static void emit_slti32(u_int rs,int imm,u_int rt)
897{
898 if(rs!=rt) emit_zeroreg(rt);
899 emit_cmpimm(rs,imm);
900 if(rs==rt) emit_movimm(0,rt);
901 emit_cmovl_imm(1,rt);
902}
903
904static void emit_sltiu32(u_int rs,int imm,u_int rt)
905{
906 if(rs!=rt) emit_zeroreg(rt);
907 emit_cmpimm(rs,imm);
908 if(rs==rt) emit_movimm(0,rt);
909 emit_cmovb_imm(1,rt);
910}
911
912static void emit_cmp(u_int rs,u_int rt)
913{
914 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 915 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 916}
917
882a08fc 918static void emit_cmpcs(u_int rs,u_int rt)
919{
920 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
921 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
922}
923
be516ebe 924static void emit_set_gz32(u_int rs, u_int rt)
925{
926 //assem_debug("set_gz32\n");
927 emit_cmpimm(rs,1);
928 emit_movimm(1,rt);
929 emit_cmovl_imm(0,rt);
930}
931
932static void emit_set_nz32(u_int rs, u_int rt)
933{
934 //assem_debug("set_nz32\n");
d1e4ebd9 935 if(rs!=rt) emit_mov(rs,rt);
936 emit_test(rs,rs);
937 emit_cmovne_imm(1,rt);
be516ebe 938}
939
940static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
941{
942 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
943 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
944 emit_cmp(rs1,rs2);
945 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
946 emit_cmovl_imm(1,rt);
947}
948
949static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
950{
951 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
952 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
953 emit_cmp(rs1,rs2);
954 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
955 emit_cmovb_imm(1,rt);
956}
957
2a014d73 958static int can_jump_or_call(const void *a)
959{
960 intptr_t diff = (u_char *)a - out;
961 return (-134217728 <= diff && diff <= 134217727);
962}
963
d1e4ebd9 964static void emit_call(const void *a)
be516ebe 965{
d1e4ebd9 966 intptr_t diff = (u_char *)a - out;
967 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 968 assert(!(diff & 3));
969 if (-134217728 <= diff && diff <= 134217727)
970 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
971 else
3968e69e 972 abort();
be516ebe 973}
974
d1e4ebd9 975static void emit_jmp(const void *a)
be516ebe 976{
d1e4ebd9 977 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
978 u_int offset = genjmp(a);
979 output_w32(0x14000000 | offset);
be516ebe 980}
981
d1e4ebd9 982static void emit_jne(const void *a)
be516ebe 983{
d1e4ebd9 984 assem_debug("bne %p\n", a);
985 u_int offset = genjmpcc(a);
986 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 987}
988
7c3a5182 989static void emit_jeq(const void *a)
be516ebe 990{
d1e4ebd9 991 assem_debug("beq %p\n", a);
992 u_int offset = genjmpcc(a);
993 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 994}
995
7c3a5182 996static void emit_js(const void *a)
be516ebe 997{
d1e4ebd9 998 assem_debug("bmi %p\n", a);
999 u_int offset = genjmpcc(a);
1000 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 1001}
1002
7c3a5182 1003static void emit_jns(const void *a)
be516ebe 1004{
d1e4ebd9 1005 assem_debug("bpl %p\n", a);
1006 u_int offset = genjmpcc(a);
1007 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 1008}
1009
7c3a5182 1010static void emit_jl(const void *a)
be516ebe 1011{
d1e4ebd9 1012 assem_debug("blt %p\n", a);
1013 u_int offset = genjmpcc(a);
1014 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 1015}
1016
7c3a5182 1017static void emit_jge(const void *a)
be516ebe 1018{
d1e4ebd9 1019 assem_debug("bge %p\n", a);
1020 u_int offset = genjmpcc(a);
1021 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 1022}
1023
a5cd72d0 1024static void emit_jo(const void *a)
1025{
1026 assem_debug("bvs %p\n", a);
1027 u_int offset = genjmpcc(a);
1028 output_w32(0x54000000 | (offset << 5) | COND_VS);
1029}
1030
7c3a5182 1031static void emit_jno(const void *a)
be516ebe 1032{
d1e4ebd9 1033 assem_debug("bvc %p\n", a);
1034 u_int offset = genjmpcc(a);
1035 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 1036}
1037
7c3a5182 1038static void emit_jc(const void *a)
be516ebe 1039{
d1e4ebd9 1040 assem_debug("bcs %p\n", a);
1041 u_int offset = genjmpcc(a);
1042 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 1043}
1044
3968e69e 1045static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 1046{
3968e69e 1047 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 1048 u_int offset = genjmpcc(a);
3968e69e 1049 is64 = is64 ? 0x80000000 : 0;
1050 isnz = isnz ? 0x01000000 : 0;
1051 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1052}
1053
9b495f6e 1054static void *emit_cbz(u_int r, const void *a)
3968e69e 1055{
9b495f6e 1056 void *ret = out;
3968e69e 1057 emit_cb(0, 0, a, r);
9b495f6e 1058 return ret;
be516ebe 1059}
1060
1061static void emit_jmpreg(u_int r)
1062{
3968e69e 1063 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 1064 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1065}
1066
1067static void emit_retreg(u_int r)
1068{
d1e4ebd9 1069 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1070 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1071}
1072
1073static void emit_ret(void)
1074{
1075 emit_retreg(LR);
1076}
1077
d1e4ebd9 1078static void emit_adr(void *addr, u_int rt)
1079{
1080 intptr_t offset = (u_char *)addr - out;
1081 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1082 assert(rt < 31);
d1e4ebd9 1083 assem_debug("adr x%d,#%#lx\n", rt, offset);
1084 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1085}
1086
3968e69e 1087static void emit_adrp(void *addr, u_int rt)
1088{
1089 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1090 assert(-4294967296l <= offset && offset < 4294967296l);
1091 assert(rt < 31);
1092 offset >>= 12;
1093 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1094 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1095}
1096
be516ebe 1097static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1098{
d1e4ebd9 1099 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1100 assert(-256 <= offset && offset < 256);
1101 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1102}
1103
1104static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1105{
1106 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1107 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1108}
1109
1110static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1111{
1112 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1113 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1114}
1115
1116static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1117{
1118 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1119 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1120}
1121
1122static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1123{
1124 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1125 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1126}
39b71d9a 1127#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1128
1129static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1130{
1131 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1132 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1133}
1134
1135static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1136{
1137 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1138 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1139}
1140
1141static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1142{
1143 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1144 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1145}
1146
1147static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1148{
1149 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1150 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1151}
1152
1153static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1154{
1155 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1156 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1157}
1158
be516ebe 1159static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1160{
d1e4ebd9 1161 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1162 assert(-256 <= offset && offset < 256);
1163 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1164}
1165
1166static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1167{
d1e4ebd9 1168 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1169 assert(-256 <= offset && offset < 256);
1170 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1171}
1172
1173static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1174{
d1e4ebd9 1175 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1176 assert(-256 <= offset && offset < 256);
1177 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1178}
1179
1180static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1181{
d1e4ebd9 1182 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1183 assert(-256 <= offset && offset < 256);
1184 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1185}
1186
be516ebe 1187static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1188{
3968e69e 1189 if (!(offset & 3) && (u_int)offset <= 16380) {
1190 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1191 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1192 }
1193 else if (-256 <= offset && offset < 256) {
1194 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1195 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1196 }
687b4580 1197 else
1198 assert(0);
be516ebe 1199}
1200
1201static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1202{
3968e69e 1203 if (!(offset & 1) && (u_int)offset <= 8190) {
1204 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1205 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1206 }
1207 else if (-256 <= offset && offset < 256) {
1208 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1209 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1210 }
687b4580 1211 else
1212 assert(0);
be516ebe 1213}
1214
1215static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1216{
3968e69e 1217 if ((u_int)offset < 4096) {
1218 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1219 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1220 }
1221 else if (-256 <= offset && offset < 256) {
1222 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1223 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1224 }
687b4580 1225 else
1226 assert(0);
be516ebe 1227}
1228
3968e69e 1229static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1230{
3968e69e 1231 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1232 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1233}
1234
3968e69e 1235static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1236{
3968e69e 1237 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1238 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1239}
1240
1241static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1242{
1243 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1244 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1245}
1246
1247static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1248{
1249 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1251}
1252
3968e69e 1253static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1254{
1255 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1256 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1257}
1258
1259static void emit_clz(u_int rs, u_int rt)
be516ebe 1260{
1261 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1262 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1263}
1264
be516ebe 1265// special case for checking invalid_code
9b495f6e 1266static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1267{
1268 emit_shrimm(r, 12, rt);
1269 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1270 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
be516ebe 1271}
1272
3968e69e 1273// special for loadlr_assemble, rs2 is destroyed
1274static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1275{
3968e69e 1276 emit_shl(rs2, shift, rs2);
1277 emit_bic(rs1, rs2, rt);
be516ebe 1278}
1279
3968e69e 1280static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1281{
3968e69e 1282 emit_shr(rs2, shift, rs2);
1283 emit_bic(rs1, rs2, rt);
be516ebe 1284}
1285
687b4580 1286static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1287{
687b4580 1288 u_int op = 0xb9000000;
d1e4ebd9 1289 unused const char *ldst = is_st ? "st" : "ld";
1290 unused char rp = is64 ? 'x' : 'w';
687b4580 1291 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1292 is64 = is64 ? 1 : 0;
1293 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1294 ofs = (ofs >> (2+is64));
687b4580 1295 if (!is_st) op |= 0x00400000;
1296 if (is64) op |= 0x40000000;
d1e4ebd9 1297 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1298}
1299
687b4580 1300static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1301{
687b4580 1302 u_int op = 0x29000000;
d1e4ebd9 1303 unused const char *ldst = is_st ? "st" : "ld";
1304 unused char rp = is64 ? 'x' : 'w';
687b4580 1305 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1306 is64 = is64 ? 1 : 0;
1307 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1308 ofs = (ofs >> (2+is64));
1309 assert(-64 <= ofs && ofs <= 63);
1310 ofs &= 0x7f;
1311 if (!is_st) op |= 0x00400000;
1312 if (is64) op |= 0x80000000;
d1e4ebd9 1313 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1314}
1315
1316static void save_load_regs_all(int is_store, u_int reglist)
1317{
1318 int ofs = 0, c = 0;
1319 u_int r, pair[2];
1320 for (r = 0; reglist; r++, reglist >>= 1) {
1321 if (reglist & 1)
1322 pair[c++] = r;
1323 if (c == 2) {
1324 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1325 ofs += 8 * 2;
1326 c = 0;
1327 }
1328 }
1329 if (c) {
1330 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1331 ofs += 8;
1332 }
1333 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1334}
1335
1336// Save registers before function call
1337static void save_regs(u_int reglist)
1338{
1339 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1340 save_load_regs_all(1, reglist);
be516ebe 1341}
1342
1343// Restore registers after function call
1344static void restore_regs(u_int reglist)
1345{
1346 reglist &= CALLER_SAVE_REGS;
687b4580 1347 save_load_regs_all(0, reglist);
be516ebe 1348}
1349
1350/* Stubs/epilogue */
1351
1352static void literal_pool(int n)
1353{
1354 (void)literals;
1355}
1356
1357static void literal_pool_jumpover(int n)
1358{
1359}
1360
d1e4ebd9 1361// parsed by get_pointer, find_extjump_insn
104df9d3 1362static void emit_extjump(u_char *addr, u_int target)
be516ebe 1363{
d1e4ebd9 1364 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1365
d1e4ebd9 1366 emit_movz(target & 0xffff, 0);
1367 emit_movk_lsl16(target >> 16, 0);
1368
1369 // addr is in the current recompiled block (max 256k)
1370 // offset shouldn't exceed +/-1MB
1371 emit_adr(addr, 1);
104df9d3 1372 emit_far_jump(dyna_linker);
be516ebe 1373}
1374
d1e4ebd9 1375static void check_extjump2(void *src)
be516ebe 1376{
d1e4ebd9 1377 u_int *ptr = src;
1378 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1379 (void)ptr;
be516ebe 1380}
1381
1382// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1383static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1384{
d1e4ebd9 1385 int diff = rt_val - rs_val;
3968e69e 1386 if ((-4096 < diff && diff < 4096)
1387 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1388 emit_addimm(rs, diff, rt);
3968e69e 1389 else if (rt_val == ~rs_val)
1390 emit_not(rs, rt);
d1e4ebd9 1391 else if (is_rotated_mask(rs_val ^ rt_val))
1392 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1393 else
d1e4ebd9 1394 emit_movimm(rt_val, rt);
be516ebe 1395}
1396
d1e4ebd9 1397// return 1 if the above function can do it's job cheaply
687b4580 1398static int is_similar_value(u_int v1, u_int v2)
be516ebe 1399{
687b4580 1400 int diff = v1 - v2;
3968e69e 1401 return (-4096 < diff && diff < 4096)
1402 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1403 || v1 == ~v2
d1e4ebd9 1404 || is_rotated_mask(v1 ^ v2);
1405}
1406
37387d8b 1407static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1408{
1409 if (rt_val < 0x100000000ull) {
1410 emit_movimm_from(rs_val, rs, rt_val, rt);
1411 return;
1412 }
1413 // just move the whole thing. At least on Linux all addresses
1414 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1415 emit_movimm64(rt_val, rt);
37387d8b 1416}
1417
1418// trashes x2
d1e4ebd9 1419static void pass_args64(u_int a0, u_int a1)
1420{
1421 if(a0==1&&a1==0) {
1422 // must swap
1423 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1424 }
1425 else if(a0!=0&&a1==0) {
1426 emit_mov64(a1,1);
1427 if (a0>=0) emit_mov64(a0,0);
1428 }
1429 else {
1430 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1431 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1432 }
be516ebe 1433}
1434
d1e4ebd9 1435static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1436{
1437 switch(type) {
1438 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1439 case LOADBU_STUB:
1440 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1441 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1442 case LOADHU_STUB:
1443 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1444 case LOADW_STUB:
1445 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1446 default: assert(0);
d1e4ebd9 1447 }
1448}
1449
1450#include "pcsxmem.h"
be516ebe 1451//#include "pcsxmem_inline.c"
1452
1453static void do_readstub(int n)
1454{
1455 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1456 set_jump_target(stubs[n].addr, out);
1457 enum stub_type type = stubs[n].type;
1458 int i = stubs[n].a;
1459 int rs = stubs[n].b;
1460 const struct regstat *i_regs = (void *)stubs[n].c;
1461 u_int reglist = stubs[n].e;
1462 const signed char *i_regmap = i_regs->regmap;
1463 int rt;
a5cd72d0 1464 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1465 rt=get_reg(i_regmap,FTEMP);
1466 }else{
cf95b4f0 1467 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1468 }
1469 assert(rs>=0);
1470 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1471 void *restore_jump = NULL, *handler_jump = NULL;
1472 reglist|=(1<<rs);
1473 for (r = 0; r < HOST_CCREG; r++) {
1474 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1475 temp = r;
1476 break;
1477 }
1478 }
cf95b4f0 1479 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1480 reglist&=~(1<<rt);
1481 if(temp==-1) {
1482 save_regs(reglist);
1483 regs_saved=1;
1484 temp=(rs==0)?2:0;
1485 }
1486 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1487 temp2=1;
1488 emit_readdword(&mem_rtab,temp);
1489 emit_shrimm(rs,12,temp2);
1490 emit_readdword_dualindexedx8(temp,temp2,temp2);
1491 emit_adds64(temp2,temp2,temp2);
1492 handler_jump=out;
1493 emit_jc(0);
a5cd72d0 1494 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1495 switch(type) {
1496 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1497 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1498 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1499 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1500 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1501 default: assert(0);
d1e4ebd9 1502 }
1503 }
1504 if(regs_saved) {
1505 restore_jump=out;
1506 emit_jmp(0); // jump to reg restore
1507 }
1508 else
1509 emit_jmp(stubs[n].retaddr); // return address
1510 set_jump_target(handler_jump, out);
1511
1512 if(!regs_saved)
1513 save_regs(reglist);
1514 void *handler=NULL;
1515 if(type==LOADB_STUB||type==LOADBU_STUB)
1516 handler=jump_handler_read8;
1517 if(type==LOADH_STUB||type==LOADHU_STUB)
1518 handler=jump_handler_read16;
1519 if(type==LOADW_STUB)
1520 handler=jump_handler_read32;
1521 assert(handler);
1522 pass_args64(rs,temp2);
1523 int cc=get_reg(i_regmap,CCREG);
1524 if(cc<0)
1525 emit_loadreg(CCREG,2);
2330734f 1526 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1527 emit_far_call(handler);
d1e4ebd9 1528 // (no cycle reload after read)
a5cd72d0 1529 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1530 loadstore_extend(type,0,rt);
1531 }
1532 if(restore_jump)
1533 set_jump_target(restore_jump, out);
1534 restore_regs(reglist);
1535 emit_jmp(stubs[n].retaddr);
be516ebe 1536}
1537
81dbbf4c 1538static void inline_readstub(enum stub_type type, int i, u_int addr,
1539 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1540{
277718fa 1541 int ra = cinfo[i].addr;
1542 int rt = get_reg(regmap, target);
1543 assert(ra >= 0);
d1e4ebd9 1544 u_int is_dynamic=0;
1545 uintptr_t host_addr = 0;
1546 void *handler;
1547 int cc=get_reg(regmap,CCREG);
277718fa 1548 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
d1e4ebd9 1549 // return;
1550 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1551 if (handler == NULL) {
cf95b4f0 1552 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1553 return;
37387d8b 1554 if (addr != host_addr)
277718fa 1555 emit_movimm_from64(addr, ra, host_addr, ra);
d1e4ebd9 1556 switch(type) {
277718fa 1557 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1558 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1559 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1560 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1561 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
d1e4ebd9 1562 default: assert(0);
1563 }
1564 return;
1565 }
37387d8b 1566 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1567 if (is_dynamic) {
d1e4ebd9 1568 if(type==LOADB_STUB||type==LOADBU_STUB)
1569 handler=jump_handler_read8;
1570 if(type==LOADH_STUB||type==LOADHU_STUB)
1571 handler=jump_handler_read16;
1572 if(type==LOADW_STUB)
1573 handler=jump_handler_read32;
1574 }
1575
1576 // call a memhandler
cf95b4f0 1577 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1578 reglist&=~(1<<rt);
1579 save_regs(reglist);
1580 if(target==0)
1581 emit_movimm(addr,0);
277718fa 1582 else if(ra!=0)
1583 emit_mov(ra,0);
d1e4ebd9 1584 if(cc<0)
1585 emit_loadreg(CCREG,2);
2330734f 1586 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1587 if(is_dynamic) {
1588 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1589 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1590 if (-4294967296l <= offset && offset < 4294967296l) {
1591 emit_adrp((void *)l1, 1);
1592 emit_addimm64(1, l1 & 0xfff, 1);
1593 }
1594 else
1595 emit_movimm64(l1, 1);
3968e69e 1596 }
d1e4ebd9 1597 else
2a014d73 1598 emit_far_call(do_memhandler_pre);
d1e4ebd9 1599
2a014d73 1600 emit_far_call(handler);
d1e4ebd9 1601
1602 // (no cycle reload after read)
cf95b4f0 1603 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1604 loadstore_extend(type, 0, rt);
1605 restore_regs(reglist);
be516ebe 1606}
1607
1608static void do_writestub(int n)
1609{
1610 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1611 set_jump_target(stubs[n].addr, out);
1612 enum stub_type type=stubs[n].type;
1613 int i=stubs[n].a;
1614 int rs=stubs[n].b;
1615 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1616 u_int reglist=stubs[n].e;
1617 signed char *i_regmap=i_regs->regmap;
1618 int rt,r;
a5cd72d0 1619 if(dops[i].itype==C2LS) {
d1e4ebd9 1620 rt=get_reg(i_regmap,r=FTEMP);
1621 }else{
cf95b4f0 1622 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1623 }
1624 assert(rs>=0);
1625 assert(rt>=0);
1626 int rtmp,temp=-1,temp2,regs_saved=0;
1627 void *restore_jump = NULL, *handler_jump = NULL;
1628 int reglist2=reglist|(1<<rs)|(1<<rt);
1629 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1630 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1631 temp = rtmp;
1632 break;
1633 }
1634 }
1635 if(temp==-1) {
1636 save_regs(reglist);
1637 regs_saved=1;
1638 for(rtmp=0;rtmp<=3;rtmp++)
1639 if(rtmp!=rs&&rtmp!=rt)
1640 {temp=rtmp;break;}
1641 }
1642 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1643 temp2=3;
1644 else {
1645 host_tempreg_acquire();
1646 temp2=HOST_TEMPREG;
1647 }
1648 emit_readdword(&mem_wtab,temp);
1649 emit_shrimm(rs,12,temp2);
1650 emit_readdword_dualindexedx8(temp,temp2,temp2);
1651 emit_adds64(temp2,temp2,temp2);
1652 handler_jump=out;
1653 emit_jc(0);
1654 switch(type) {
1655 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1656 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1657 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1658 default: assert(0);
1659 }
1660 if(regs_saved) {
1661 restore_jump=out;
1662 emit_jmp(0); // jump to reg restore
1663 }
1664 else
1665 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1666 set_jump_target(handler_jump, out);
1667
d1e4ebd9 1668 if(!regs_saved)
1669 save_regs(reglist);
1670 void *handler=NULL;
1671 switch(type) {
1672 case STOREB_STUB: handler=jump_handler_write8; break;
1673 case STOREH_STUB: handler=jump_handler_write16; break;
1674 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1675 default: assert(0);
d1e4ebd9 1676 }
1677 assert(handler);
1678 pass_args(rs,rt);
1679 if(temp2!=3) {
1680 emit_mov64(temp2,3);
1681 host_tempreg_release();
1682 }
1683 int cc=get_reg(i_regmap,CCREG);
1684 if(cc<0)
1685 emit_loadreg(CCREG,2);
2330734f 1686 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1687 // returns new cycle_count
2a014d73 1688 emit_far_call(handler);
2330734f 1689 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1690 if(cc<0)
1691 emit_storereg(CCREG,2);
1692 if(restore_jump)
1693 set_jump_target(restore_jump, out);
1694 restore_regs(reglist);
1695 emit_jmp(stubs[n].retaddr);
be516ebe 1696}
1697
81dbbf4c 1698static void inline_writestub(enum stub_type type, int i, u_int addr,
1699 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1700{
277718fa 1701 int ra = cinfo[i].addr;
687b4580 1702 int rt = get_reg(regmap,target);
277718fa 1703 assert(ra >= 0);
687b4580 1704 assert(rt >= 0);
1705 uintptr_t host_addr = 0;
1706 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1707 if (handler == NULL) {
37387d8b 1708 if (addr != host_addr)
277718fa 1709 emit_movimm_from64(addr, ra, host_addr, ra);
d1e4ebd9 1710 switch (type) {
277718fa 1711 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1712 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1713 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
687b4580 1714 default: assert(0);
1715 }
1716 return;
1717 }
1718
1719 // call a memhandler
1720 save_regs(reglist);
277718fa 1721 emit_writeword(ra, &address); // some handlers still need it
d1e4ebd9 1722 loadstore_extend(type, rt, 0);
1723 int cc, cc_use;
1724 cc = cc_use = get_reg(regmap, CCREG);
1725 if (cc < 0)
1726 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1727 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1728
2a014d73 1729 emit_far_call(do_memhandler_pre);
1730 emit_far_call(handler);
1731 emit_far_call(do_memhandler_post);
2330734f 1732 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1733 if (cc < 0)
1734 emit_storereg(CCREG, cc_use);
687b4580 1735 restore_regs(reglist);
be516ebe 1736}
1737
3968e69e 1738/* Special assem */
1739
81dbbf4c 1740static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1741{
1742 save_load_regs_all(1, reglist);
32631e6a 1743 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1744#ifdef PCNT
1745 emit_movimm(op, 0);
2a014d73 1746 emit_far_call(pcnt_gte_start);
3968e69e 1747#endif
1748 // pointer to cop2 regs
1749 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1750}
1751
1752static void c2op_epilogue(u_int op,u_int reglist)
1753{
1754#ifdef PCNT
1755 emit_movimm(op, 0);
2a014d73 1756 emit_far_call(pcnt_gte_end);
3968e69e 1757#endif
1758 save_load_regs_all(0, reglist);
be516ebe 1759}
1760
81dbbf4c 1761static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1762{
3968e69e 1763 u_int c2op=source[i]&0x3f;
1764 u_int hr,reglist_full=0,reglist;
1765 int need_flags,need_ir;
1766 for(hr=0;hr<HOST_REGS;hr++) {
1767 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1768 }
1769 reglist=reglist_full&CALLER_SAVE_REGS;
1770
1771 if (gte_handlers[c2op]!=NULL) {
1772 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1773 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1774 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1775 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1776 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1777 need_flags=0;
1778 //int shift = (source[i] >> 19) & 1;
1779 //int lm = (source[i] >> 10) & 1;
1780 switch(c2op) {
1781 default:
1782 (void)need_ir;
81dbbf4c 1783 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1784 emit_movimm(source[i],1); // opcode
1785 emit_writeword(1,&psxRegs.code);
2a014d73 1786 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1787 break;
1788 }
1789 c2op_epilogue(c2op,reglist);
1790 }
1791}
1792
1793static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1794{
1795 //value = value & 0x7ffff000;
1796 //if (value & 0x7f87e000) value |= 0x80000000;
1797 emit_andimm(sl, 0x7fffe000, temp);
1798 emit_testimm(temp, 0xff87ffff);
1799 emit_andimm(sl, 0x7ffff000, temp);
1800 host_tempreg_acquire();
1801 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1802 emit_cmovne_reg(HOST_TEMPREG, temp);
1803 host_tempreg_release();
1804 assert(0); // testing needed
1805}
1806
1807static void do_mfc2_31_one(u_int copr,signed char temp)
1808{
1809 emit_readshword(&reg_cop2d[copr],temp);
1810 emit_bicsar_imm(temp,31,temp);
1811 emit_cmpimm(temp,0xf80);
1812 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1813 emit_andimm(temp,0xf80,temp);
1814}
1815
1816static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1817{
1818 if (temp < 0) {
1819 host_tempreg_acquire();
1820 temp = HOST_TEMPREG;
1821 }
1822 do_mfc2_31_one(9,temp);
1823 emit_shrimm(temp,7,tl);
1824 do_mfc2_31_one(10,temp);
1825 emit_orrshr_imm(temp,2,tl);
1826 do_mfc2_31_one(11,temp);
1827 emit_orrshl_imm(temp,3,tl);
1828 emit_writeword(tl,&reg_cop2d[29]);
1829
1830 if (temp == HOST_TEMPREG)
1831 host_tempreg_release();
be516ebe 1832}
1833
2330734f 1834static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1835{
3968e69e 1836 // case 0x18: MULT
1837 // case 0x19: MULTU
1838 // case 0x1A: DIV
1839 // case 0x1B: DIVU
cf95b4f0 1840 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1841 {
cf95b4f0 1842 switch(dops[i].opcode2)
3968e69e 1843 {
1844 case 0x18: // MULT
1845 case 0x19: // MULTU
1846 {
cf95b4f0 1847 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1848 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1849 signed char hi=get_reg(i_regs->regmap,HIREG);
1850 signed char lo=get_reg(i_regs->regmap,LOREG);
1851 assert(m1>=0);
1852 assert(m2>=0);
1853 assert(hi>=0);
1854 assert(lo>=0);
1855
cf95b4f0 1856 if(dops[i].opcode2==0x18) // MULT
3968e69e 1857 emit_smull(m1,m2,hi);
1858 else // MULTU
1859 emit_umull(m1,m2,hi);
1860
1861 emit_mov(hi,lo);
1862 emit_shrimm64(hi,32,hi);
1863 break;
1864 }
1865 case 0x1A: // DIV
1866 case 0x1B: // DIVU
1867 {
cf95b4f0 1868 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1869 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1870 signed char quotient=get_reg(i_regs->regmap,LOREG);
1871 signed char remainder=get_reg(i_regs->regmap,HIREG);
1872 assert(numerator>=0);
1873 assert(denominator>=0);
1874 assert(quotient>=0);
1875 assert(remainder>=0);
1876
cf95b4f0 1877 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1878 emit_sdiv(numerator,denominator,quotient);
1879 else // DIVU
1880 emit_udiv(numerator,denominator,quotient);
1881 emit_msub(quotient,denominator,numerator,remainder);
1882
1883 // div 0 quotient (remainder is already correct)
1884 host_tempreg_acquire();
a5cd72d0 1885 if (dops[i].opcode2 == 0x1A) { // DIV
1886 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1887 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1888 }
3968e69e 1889 else
1890 emit_movimm(~0,HOST_TEMPREG);
1891 emit_test(denominator,denominator);
1892 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1893 host_tempreg_release();
1894 break;
1895 }
1896 default:
1897 assert(0);
1898 }
1899 }
1900 else
1901 {
1902 signed char hr=get_reg(i_regs->regmap,HIREG);
1903 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1904 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1905 {
cf95b4f0 1906 if (dops[i].rs1) {
1907 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1908 assert(numerator >= 0);
1909 if (hr >= 0)
1910 emit_mov(numerator,hr);
1911 if (lr >= 0) {
a5cd72d0 1912 if (dops[i].opcode2 == 0x1A) { // DIV
1913 emit_add_lsrimm(WZR,numerator,31,lr);
1914 emit_orn_asrimm(lr,numerator,31,lr);
1915 }
3968e69e 1916 else
1917 emit_movimm(~0,lr);
1918 }
1919 }
1920 else {
1921 if (hr >= 0) emit_zeroreg(hr);
1922 if (lr >= 0) emit_movimm(~0,lr);
1923 }
1924 }
a5cd72d0 1925 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1926 {
1927 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1928 assert(denominator >= 0);
1929 if (hr >= 0) emit_zeroreg(hr);
1930 if (lr >= 0) {
1931 emit_zeroreg(lr);
1932 emit_test(denominator, denominator);
1933 emit_csinvne_reg(lr, lr, lr);
1934 }
1935 }
3968e69e 1936 else
1937 {
1938 // Multiply by zero is zero.
1939 if (hr >= 0) emit_zeroreg(hr);
1940 if (lr >= 0) emit_zeroreg(lr);
1941 }
1942 }
be516ebe 1943}
1944#define multdiv_assemble multdiv_assemble_arm64
1945
d1e4ebd9 1946static void do_jump_vaddr(u_int rs)
1947{
1948 if (rs != 0)
1949 emit_mov(rs, 0);
104df9d3 1950 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1951 emit_jmpreg(0);
1952}
1953
be516ebe 1954static void do_preload_rhash(u_int r) {
1955 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1956 // register. On ARM the hash can be done with a single instruction (below)
1957}
1958
1959static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1960 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1961}
1962
1963static void do_rhash(u_int rs,u_int rh) {
1964 emit_andimm(rs, 0xf8, rh);
1965}
1966
d1e4ebd9 1967static void do_miniht_load(int ht, u_int rh) {
1968 emit_add64(ht, rh, ht);
1969 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1970}
1971
d1e4ebd9 1972static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1973 emit_cmp(rh, rs);
1974 void *jaddr = out;
1975 emit_jeq(0);
1976 do_jump_vaddr(rs);
1977
1978 set_jump_target(jaddr, out);
1979 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1980 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1981 emit_jmpreg(ht);
be516ebe 1982}
1983
d1e4ebd9 1984// parsed by set_jump_target?
be516ebe 1985static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1986 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1987 emit_movk(return_address&0xffff,rt);
1988 add_to_linker(out,return_address,1);
1989 emit_adr(out,temp);
1990 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1991 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1992}
1993
d9e2b173 1994static unused void clear_cache_arm64(char *start, char *end)
be516ebe 1995{
919981d0 1996 // Don't rely on GCC's __clear_cache implementation, as it caches
1997 // icache/dcache cache line sizes, that can vary between cores on
1998 // big.LITTLE architectures.
1999 uint64_t addr, ctr_el0;
2000 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2001 size_t isize, dsize;
2002
2003 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2004 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2005 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2006
2007 // use the global minimum cache line size
2008 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2009 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2010
2011 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2012 not required for instruction to data coherence. */
2013 if ((ctr_el0 & (1 << 28)) == 0x0) {
2014 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2015 for (; addr < (uint64_t)end; addr += dsize)
2016 // use "civac" instead of "cvau", as this is the suggested workaround for
2017 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2018 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2019 }
919981d0 2020 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2021
919981d0 2022 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2023 Unification is not required for instruction to data coherence. */
2024 if ((ctr_el0 & (1 << 29)) == 0x0) {
2025 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2026 for (; addr < (uint64_t)end; addr += isize)
2027 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2028
2029 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2030 }
919981d0 2031
2032 __asm__ volatile("isb" : : : "memory");
be516ebe 2033}
2034
2035// CPU-architecture-specific initialization
2a014d73 2036static void arch_init(void)
2037{
2038 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 2039 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 2040 size_t i;
2041 assert(!(diff & 3));
d9e2b173 2042 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 2043 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 2044 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2045 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 2046 }
2047 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2048}
2049
2050// vim:shiftwidth=2:expandtab