try to fix win32 build
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
d1e4ebd9 26void do_memhandler_pre();
27void do_memhandler_post();
be516ebe 28
29/* Linker */
d1e4ebd9 30static void set_jump_target(void *addr, void *target)
be516ebe 31{
d9e2b173 32 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 33 intptr_t offset = (u_char *)target - (u_char *)addr;
34
3968e69e 35 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 36 assert(offset>=-134217728LL&&offset<134217728LL);
37 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
38 }
3968e69e 39 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
40 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 41 // Conditional branch are limited to +/- 1MB
42 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 43 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 44 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 45 assert(-1048576 <= offset && offset < 1048576);
4a2e3735 46 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
d1e4ebd9 47 }
3968e69e 48 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 49 // generated by do_miniht_insert
50 assert(offset>=-1048576LL&&offset<1048576LL);
51 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
52 }
53 else
3968e69e 54 abort(); // should not happen
be516ebe 55}
56
57// from a pointer to external jump stub (which was produced by emit_extjump2)
58// find where the jumping insn is
59static void *find_extjump_insn(void *stub)
60{
d1e4ebd9 61 int *ptr = (int *)stub + 2;
62 assert((*ptr&0x9f000000) == 0x10000000); // adr
63 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
64 return ptr + offset / 4;
be516ebe 65}
66
104df9d3 67#if 0
be516ebe 68// find where external branch is liked to using addr of it's stub:
3968e69e 69// get address that the stub loads (dyna_linker arg1),
be516ebe 70// treat it as a pointer to branch insn,
71// return addr where that branch jumps to
72static void *get_pointer(void *stub)
73{
d1e4ebd9 74 int *i_ptr = find_extjump_insn(stub);
3968e69e 75 if ((*i_ptr&0xfc000000) == 0x14000000) // b
76 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
77 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
78 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
79 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 80 assert(0);
81 return NULL;
82}
104df9d3 83#endif
be516ebe 84
be516ebe 85// Allocate a specific ARM register.
86static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
87{
88 int n;
89 int dirty=0;
90
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
93 {
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
96 cur->regmap[n]=-1;
97 }
98 }
99
100 cur->regmap[hr]=reg;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
104}
105
106// Alloc cycle count into dedicated register
107static void alloc_cc(struct regstat *cur,int i)
108{
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
110}
111
112/* Special alloc */
113
114
115/* Assembler */
116
117static unused const char *regname[32] = {
d1e4ebd9 118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
122};
123
124static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129};
130
131enum {
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
134};
135
136static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 139};
140
be516ebe 141static void output_w32(u_int word)
142{
d9e2b173 143 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 144 out += 4;
145}
146
3968e69e 147static u_int rn_rd(u_int rn, u_int rd)
148{
149 assert(rn < 31);
150 assert(rd < 31);
151 return (rn << 5) | rd;
152}
153
be516ebe 154static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
155{
d1e4ebd9 156 assert(rm < 32);
157 assert(rn < 32);
158 assert(rd < 32);
be516ebe 159 return (rm << 16) | (rn << 5) | rd;
160}
161
3968e69e 162static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
163{
164 assert(ra < 32);
165 return rm_rn_rd(rm, rn, rd) | (ra << 10);
166}
167
d1e4ebd9 168static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
169{
170 assert(imm7 < 0x80);
171 assert(rt2 < 31);
172 assert(rn < 32);
173 assert(rt < 31);
174 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
175}
176
687b4580 177static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
178{
179 assert(imm6 <= 63);
180 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
181}
182
be516ebe 183static u_int imm16_rd(u_int imm16, u_int rd)
184{
185 assert(imm16 < 0x10000);
186 assert(rd < 31);
187 return (imm16 << 5) | rd;
188}
189
687b4580 190static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
191{
192 assert(imm12 < 0x1000);
d1e4ebd9 193 assert(rn < 32);
194 assert(rd < 32);
195 return (imm12 << 10) | (rn << 5) | rd;
196}
197
198static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
199{
200 assert(imm9 < 0x200);
687b4580 201 assert(rn < 31);
202 assert(rd < 31);
d1e4ebd9 203 return (imm9 << 12) | (rn << 5) | rd;
687b4580 204}
205
d1e4ebd9 206static u_int imm19_rt(u_int imm19, u_int rt)
207{
208 assert(imm19 < 0x80000);
209 assert(rt < 31);
210 return (imm19 << 5) | rt;
211}
212
213static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
214{
215 assert(n < 2);
216 assert(immr < 0x40);
217 assert(imms < 0x40);
218 assert(rn < 32);
219 assert(rd < 32);
220 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
221}
222
223static u_int genjmp(const u_char *addr)
be516ebe 224{
225 intptr_t offset = addr - out;
d1e4ebd9 226 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 227 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 228 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 abort();
be516ebe 230 return 0;
231 }
d1e4ebd9 232 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 233}
234
d1e4ebd9 235static u_int genjmpcc(const u_char *addr)
be516ebe 236{
237 intptr_t offset = addr - out;
d1e4ebd9 238 if ((uintptr_t)addr < 3) return 0;
be516ebe 239 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 240 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 abort();
242 return 0;
243 }
244 return ((u_int)offset >> 2) & 0x7ffff;
245}
246
247static uint32_t is_mask(u_int value)
248{
249 return value && ((value + 1) & value) == 0;
250}
251
252// This function returns true if the argument contains a
253// non-empty sequence of ones (possibly rotated) with the remainder zero.
254static uint32_t is_rotated_mask(u_int value)
255{
3968e69e 256 if (value == 0 || value == ~0)
be516ebe 257 return 0;
d1e4ebd9 258 if (is_mask((value - 1) | value))
259 return 1;
260 return is_mask((~value - 1) | ~value);
261}
262
263static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
264{
265 int lzeros, tzeros, ones;
266 assert(value != 0);
267 if (is_mask((value - 1) | value)) {
268 lzeros = __builtin_clz(value);
269 tzeros = __builtin_ctz(value);
270 ones = 32 - lzeros - tzeros;
271 *immr = (32 - tzeros) & 31;
272 *imms = ones - 1;
273 return;
be516ebe 274 }
d1e4ebd9 275 value = ~value;
276 if (is_mask((value - 1) | value)) {
277 lzeros = __builtin_clz(value);
278 tzeros = __builtin_ctz(value);
279 ones = 32 - lzeros - tzeros;
3968e69e 280 *immr = lzeros;
d1e4ebd9 281 *imms = 31 - ones;
282 return;
283 }
3968e69e 284 abort();
be516ebe 285}
286
287static void emit_mov(u_int rs, u_int rt)
288{
687b4580 289 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 290 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
291}
292
293static void emit_mov64(u_int rs, u_int rt)
294{
295 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
296 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 297}
298
687b4580 299static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 300{
d1e4ebd9 301 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
302 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 303}
304
d1e4ebd9 305static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 306{
d1e4ebd9 307 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
308 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 309}
310
d1e4ebd9 311static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 312{
3968e69e 313 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 314 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
315}
39b71d9a 316#define emit_adds_ptr emit_adds64
d1e4ebd9 317
318static void emit_neg(u_int rs, u_int rt)
319{
320 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
321 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 322}
323
687b4580 324static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 325{
d1e4ebd9 326 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 327 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 328}
329
3968e69e 330static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
331{
332 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
333 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
334}
335
d1e4ebd9 336static void emit_movz(u_int imm, u_int rt)
be516ebe 337{
d1e4ebd9 338 assem_debug("movz %s,#%#x\n", regname[rt], imm);
339 output_w32(0x52800000 | imm16_rd(imm, rt));
340}
341
342static void emit_movz_lsl16(u_int imm, u_int rt)
343{
344 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
345 output_w32(0x52a00000 | imm16_rd(imm, rt));
346}
347
348static void emit_movn(u_int imm, u_int rt)
349{
350 assem_debug("movn %s,#%#x\n", regname[rt], imm);
351 output_w32(0x12800000 | imm16_rd(imm, rt));
352}
353
354static void emit_movn_lsl16(u_int imm,u_int rt)
355{
356 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
357 output_w32(0x12a00000 | imm16_rd(imm, rt));
358}
359
360static void emit_movk(u_int imm,u_int rt)
361{
362 assem_debug("movk %s,#%#x\n", regname[rt], imm);
363 output_w32(0x72800000 | imm16_rd(imm, rt));
364}
365
366static void emit_movk_lsl16(u_int imm,u_int rt)
367{
368 assert(imm<65536);
3968e69e 369 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 370 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 371}
372
373static void emit_zeroreg(u_int rt)
374{
d1e4ebd9 375 emit_movz(0, rt);
be516ebe 376}
377
be516ebe 378static void emit_movimm(u_int imm, u_int rt)
379{
d1e4ebd9 380 if (imm < 65536)
381 emit_movz(imm, rt);
382 else if ((~imm) < 65536)
383 emit_movn(~imm, rt);
384 else if ((imm&0xffff) == 0)
385 emit_movz_lsl16(imm >> 16, rt);
386 else if (((~imm)&0xffff) == 0)
387 emit_movn_lsl16(~imm >> 16, rt);
388 else if (is_rotated_mask(imm)) {
389 u_int immr, imms;
390 gen_logical_imm(imm, &immr, &imms);
391 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
392 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
393 }
be516ebe 394 else {
d1e4ebd9 395 emit_movz(imm & 0xffff, rt);
396 emit_movk_lsl16(imm >> 16, rt);
be516ebe 397 }
398}
399
aaece508 400static void emit_movimm64(uint64_t imm, u_int rt)
401{
402 u_int shift, op, imm16, insns = 0;
403 for (shift = 0; shift < 4; shift++) {
404 imm16 = (imm >> shift * 16) & 0xffff;
405 if (!imm16)
406 continue;
407 op = insns ? 0xf2800000 : 0xd2800000;
408 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
409 if (shift)
410 assem_debug(",lsl #%u", shift * 16);
411 assem_debug("\n");
412 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
413 insns++;
414 }
415 if (!insns) {
416 assem_debug("movz %s,#0\n", regname64[rt]);
417 output_w32(0xd2800000 | imm16_rd(0, rt));
418 }
419}
420
687b4580 421static void emit_readword(void *addr, u_int rt)
422{
423 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
424 if (!(offset & 3) && offset <= 16380) {
425 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
426 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
427 }
428 else
3968e69e 429 abort();
687b4580 430}
431
d1e4ebd9 432static void emit_readdword(void *addr, u_int rt)
433{
434 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
435 if (!(offset & 7) && offset <= 32760) {
436 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
437 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 }
3968e69e 439 else
440 abort();
441}
39b71d9a 442#define emit_readptr emit_readdword
3968e69e 443
444static void emit_readshword(void *addr, u_int rt)
445{
446 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
447 if (!(offset & 1) && offset <= 8190) {
448 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
449 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
450 }
d1e4ebd9 451 else
452 assert(0);
453}
454
be516ebe 455static void emit_loadreg(u_int r, u_int hr)
456{
d1e4ebd9 457 int is64 = 0;
be516ebe 458 if (r == 0)
459 emit_zeroreg(hr);
460 else {
33788798 461 void *addr;
be516ebe 462 switch (r) {
7c3a5182 463 //case HIREG: addr = &hi; break;
464 //case LOREG: addr = &lo; break;
be516ebe 465 case CCREG: addr = &cycle_count; break;
bc7c5acb 466 case CSREG: addr = &psxRegs.CP0.n.SR; break;
d1e4ebd9 467 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 468 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 469 default:
470 assert(r < 34);
471 addr = &psxRegs.GPR.r[r];
472 break;
be516ebe 473 }
d1e4ebd9 474 if (is64)
475 emit_readdword(addr, hr);
476 else
477 emit_readword(addr, hr);
be516ebe 478 }
479}
480
687b4580 481static void emit_writeword(u_int rt, void *addr)
482{
483 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
484 if (!(offset & 3) && offset <= 16380) {
485 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
486 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
487 }
488 else
489 assert(0);
490}
491
d1e4ebd9 492static void emit_writedword(u_int rt, void *addr)
493{
494 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
495 if (!(offset & 7) && offset <= 32760) {
496 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 497 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 498 }
499 else
3968e69e 500 abort();
d1e4ebd9 501}
502
687b4580 503static void emit_storereg(u_int r, u_int hr)
be516ebe 504{
505 assert(r < 64);
7c3a5182 506 void *addr = &psxRegs.GPR.r[r];
be516ebe 507 switch (r) {
7c3a5182 508 //case HIREG: addr = &hi; break;
509 //case LOREG: addr = &lo; break;
be516ebe 510 case CCREG: addr = &cycle_count; break;
7c3a5182 511 default: assert(r < 34); break;
be516ebe 512 }
687b4580 513 emit_writeword(hr, addr);
be516ebe 514}
515
516static void emit_test(u_int rs, u_int rt)
517{
d1e4ebd9 518 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
519 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 520}
521
d1e4ebd9 522static void emit_testimm(u_int rs, u_int imm)
be516ebe 523{
d1e4ebd9 524 u_int immr, imms;
687b4580 525 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 526 assert(is_rotated_mask(imm)); // good enough for PCSX
527 gen_logical_imm(imm, &immr, &imms);
3968e69e 528 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 529}
530
531static void emit_not(u_int rs,u_int rt)
532{
533 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 534 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 535}
536
be516ebe 537static void emit_and(u_int rs1,u_int rs2,u_int rt)
538{
539 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 540 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 541}
542
543static void emit_or(u_int rs1,u_int rs2,u_int rt)
544{
545 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 546 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 547}
548
3968e69e 549static void emit_bic(u_int rs1,u_int rs2,u_int rt)
550{
551 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
552 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
553}
554
be516ebe 555static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
556{
be516ebe 557 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 558 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 559}
560
561static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
562{
be516ebe 563 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 564 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 565}
566
3968e69e 567static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
568{
569 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
570 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
571}
572
be516ebe 573static void emit_xor(u_int rs1,u_int rs2,u_int rt)
574{
575 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 576 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 577}
578
3968e69e 579static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
580{
581 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
582 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
583}
584
d1e4ebd9 585static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 586{
d1e4ebd9 587 unused const char *st = s ? "s" : "";
588 s = s ? 0x20000000 : 0;
589 is64 = is64 ? 0x80000000 : 0;
687b4580 590 if (imm < 4096) {
d1e4ebd9 591 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
592 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 593 }
594 else if (-imm < 4096) {
3968e69e 595 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 596 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
597 }
598 else if (imm < 16777216) {
599 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
600 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 603 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 604 }
605 }
606 else if (-imm < 16777216) {
607 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
608 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
609 if ((imm & 0xfff) || s) {
610 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
611 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
612 }
687b4580 613 }
614 else
3968e69e 615 abort();
be516ebe 616}
617
d1e4ebd9 618static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
619{
9b495f6e 620 if (imm == 0) {
621 emit_mov(rs, rt);
622 return;
623 }
d1e4ebd9 624 emit_addimm_s(0, 0, rs, imm, rt);
625}
626
627static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
628{
629 emit_addimm_s(0, 1, rs, imm, rt);
630}
631
bc7c5acb 632static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
633{
634 emit_addimm64(rs, imm, rt);
635}
636
be516ebe 637static void emit_addimm_and_set_flags(int imm, u_int rt)
638{
d1e4ebd9 639 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 640}
641
d1e4ebd9 642static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 643{
d1e4ebd9 644 const char *names[] = { "and", "orr", "eor", "ands" };
645 const char *name = names[op];
646 u_int immr, imms;
647 op = op << 29;
648 if (is_rotated_mask(imm)) {
649 gen_logical_imm(imm, &immr, &imms);
650 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
651 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
652 }
653 else {
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_acquire();
656 emit_movimm(imm, HOST_TEMPREG);
657 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
658 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
659 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
660 host_tempreg_release();
661 }
662 (void)name;
be516ebe 663}
664
d1e4ebd9 665static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 666{
d1e4ebd9 667 if (imm == 0)
668 emit_zeroreg(rt);
669 else
670 emit_logicop_imm(0, rs, imm, rt);
be516ebe 671}
672
d1e4ebd9 673static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 674{
d1e4ebd9 675 if (imm == 0) {
676 if (rs != rt)
677 emit_mov(rs, rt);
678 }
679 else
680 emit_logicop_imm(1, rs, imm, rt);
be516ebe 681}
682
d1e4ebd9 683static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 684{
d1e4ebd9 685 if (imm == 0) {
686 if (rs != rt)
687 emit_mov(rs, rt);
688 }
689 else
690 emit_logicop_imm(2, rs, imm, rt);
be516ebe 691}
692
d1e4ebd9 693static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 694{
d1e4ebd9 695 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
696 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 697}
698
d1e4ebd9 699static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 700{
d1e4ebd9 701 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
702 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 703}
704
705static void emit_shlimm(u_int rs,u_int imm,u_int rt)
706{
be516ebe 707 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 708 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 709}
710
3968e69e 711static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 712{
3968e69e 713 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
714 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 715}
716
3968e69e 717static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 718{
be516ebe 719 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 720 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 721}
722
723static void emit_sarimm(u_int rs,u_int imm,u_int rt)
724{
be516ebe 725 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 726 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 727}
728
729static void emit_rorimm(u_int rs,u_int imm,u_int rt)
730{
3968e69e 731 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 732 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 733}
734
735static void emit_signextend16(u_int rs, u_int rt)
736{
737 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 738 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 739}
740
d1e4ebd9 741static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 742{
3968e69e 743 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 744 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 745}
746
d1e4ebd9 747static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 748{
d1e4ebd9 749 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
750 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 751}
752
d1e4ebd9 753static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 754{
d1e4ebd9 755 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
756 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 757}
758
d1e4ebd9 759static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 760{
d1e4ebd9 761 if (imm < 4096) {
762 assem_debug("cmp %s,%#x\n", regname[rs], imm);
763 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
764 }
765 else if (-imm < 4096) {
766 assem_debug("cmn %s,%#x\n", regname[rs], imm);
767 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
768 }
769 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 770 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 771 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
772 }
773 else {
774 host_tempreg_acquire();
775 emit_movimm(imm, HOST_TEMPREG);
776 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
777 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
778 host_tempreg_release();
779 }
be516ebe 780}
781
d1e4ebd9 782static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 783{
d1e4ebd9 784 assert(imm == 0 || imm == 1);
785 assert(cond0 < 0x10);
786 assert(cond1 < 0x10);
787 if (imm) {
788 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
789 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
790 } else {
791 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
792 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
793 }
be516ebe 794}
795
d1e4ebd9 796static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 797{
d1e4ebd9 798 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 799}
800
d1e4ebd9 801static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 802{
d1e4ebd9 803 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 804}
805
806static void emit_cmovb_imm(int imm,u_int rt)
807{
d1e4ebd9 808 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 809}
810
3968e69e 811static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 812{
3968e69e 813 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
814 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 815}
816
817static void emit_cmovne_reg(u_int rs,u_int rt)
818{
d1e4ebd9 819 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
820 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 821}
822
823static void emit_cmovl_reg(u_int rs,u_int rt)
824{
d1e4ebd9 825 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
826 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 827}
828
e3c6bdb5 829static void emit_cmovb_reg(u_int rs,u_int rt)
830{
831 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
832 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
833}
834
be516ebe 835static void emit_cmovs_reg(u_int rs,u_int rt)
836{
d1e4ebd9 837 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
838 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 839}
840
3968e69e 841static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
842{
843 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
844 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
845}
846
be516ebe 847static void emit_slti32(u_int rs,int imm,u_int rt)
848{
849 if(rs!=rt) emit_zeroreg(rt);
850 emit_cmpimm(rs,imm);
851 if(rs==rt) emit_movimm(0,rt);
852 emit_cmovl_imm(1,rt);
853}
854
855static void emit_sltiu32(u_int rs,int imm,u_int rt)
856{
857 if(rs!=rt) emit_zeroreg(rt);
858 emit_cmpimm(rs,imm);
859 if(rs==rt) emit_movimm(0,rt);
860 emit_cmovb_imm(1,rt);
861}
862
863static void emit_cmp(u_int rs,u_int rt)
864{
865 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 866 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 867}
868
882a08fc 869static void emit_cmpcs(u_int rs,u_int rt)
870{
871 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
872 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
873}
874
be516ebe 875static void emit_set_gz32(u_int rs, u_int rt)
876{
877 //assem_debug("set_gz32\n");
878 emit_cmpimm(rs,1);
879 emit_movimm(1,rt);
880 emit_cmovl_imm(0,rt);
881}
882
883static void emit_set_nz32(u_int rs, u_int rt)
884{
885 //assem_debug("set_nz32\n");
d1e4ebd9 886 if(rs!=rt) emit_mov(rs,rt);
887 emit_test(rs,rs);
888 emit_cmovne_imm(1,rt);
be516ebe 889}
890
891static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
892{
893 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
894 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
895 emit_cmp(rs1,rs2);
896 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
897 emit_cmovl_imm(1,rt);
898}
899
900static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
901{
902 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
903 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
904 emit_cmp(rs1,rs2);
905 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
906 emit_cmovb_imm(1,rt);
907}
908
2a014d73 909static int can_jump_or_call(const void *a)
910{
911 intptr_t diff = (u_char *)a - out;
912 return (-134217728 <= diff && diff <= 134217727);
913}
914
d1e4ebd9 915static void emit_call(const void *a)
be516ebe 916{
d1e4ebd9 917 intptr_t diff = (u_char *)a - out;
918 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 919 assert(!(diff & 3));
920 if (-134217728 <= diff && diff <= 134217727)
921 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
922 else
3968e69e 923 abort();
be516ebe 924}
925
d1e4ebd9 926static void emit_jmp(const void *a)
be516ebe 927{
d1e4ebd9 928 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
929 u_int offset = genjmp(a);
930 output_w32(0x14000000 | offset);
be516ebe 931}
932
d1e4ebd9 933static void emit_jne(const void *a)
be516ebe 934{
d1e4ebd9 935 assem_debug("bne %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 938}
939
7c3a5182 940static void emit_jeq(const void *a)
be516ebe 941{
d1e4ebd9 942 assem_debug("beq %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 945}
946
7c3a5182 947static void emit_js(const void *a)
be516ebe 948{
d1e4ebd9 949 assem_debug("bmi %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 952}
953
7c3a5182 954static void emit_jns(const void *a)
be516ebe 955{
d1e4ebd9 956 assem_debug("bpl %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 959}
960
7c3a5182 961static void emit_jl(const void *a)
be516ebe 962{
d1e4ebd9 963 assem_debug("blt %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 966}
967
7c3a5182 968static void emit_jge(const void *a)
be516ebe 969{
d1e4ebd9 970 assem_debug("bge %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 973}
974
7c3a5182 975static void emit_jno(const void *a)
be516ebe 976{
d1e4ebd9 977 assem_debug("bvc %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 980}
981
7c3a5182 982static void emit_jc(const void *a)
be516ebe 983{
d1e4ebd9 984 assem_debug("bcs %p\n", a);
985 u_int offset = genjmpcc(a);
986 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 987}
988
3968e69e 989static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 990{
3968e69e 991 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 992 u_int offset = genjmpcc(a);
3968e69e 993 is64 = is64 ? 0x80000000 : 0;
994 isnz = isnz ? 0x01000000 : 0;
995 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
996}
997
9b495f6e 998static void *emit_cbz(u_int r, const void *a)
3968e69e 999{
9b495f6e 1000 void *ret = out;
3968e69e 1001 emit_cb(0, 0, a, r);
9b495f6e 1002 return ret;
be516ebe 1003}
1004
1005static void emit_jmpreg(u_int r)
1006{
3968e69e 1007 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 1008 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1009}
1010
1011static void emit_retreg(u_int r)
1012{
d1e4ebd9 1013 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1014 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1015}
1016
1017static void emit_ret(void)
1018{
1019 emit_retreg(LR);
1020}
1021
d1e4ebd9 1022static void emit_adr(void *addr, u_int rt)
1023{
1024 intptr_t offset = (u_char *)addr - out;
1025 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1026 assert(rt < 31);
d1e4ebd9 1027 assem_debug("adr x%d,#%#lx\n", rt, offset);
1028 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1029}
1030
3968e69e 1031static void emit_adrp(void *addr, u_int rt)
1032{
1033 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1034 assert(-4294967296l <= offset && offset < 4294967296l);
1035 assert(rt < 31);
1036 offset >>= 12;
1037 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1038 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1039}
1040
be516ebe 1041static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1042{
d1e4ebd9 1043 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1044 assert(-256 <= offset && offset < 256);
1045 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1046}
1047
1048static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1049{
1050 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1051 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1052}
1053
1054static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1055{
1056 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1058}
1059
1060static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1061{
1062 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1064}
1065
1066static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1067{
1068 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1070}
39b71d9a 1071#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1072
1073static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1074{
1075 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1076 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1077}
1078
1079static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1080{
1081 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1082 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1083}
1084
1085static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1086{
1087 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1088 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1089}
1090
1091static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1092{
1093 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1094 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1095}
1096
1097static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1098{
1099 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1100 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1101}
1102
be516ebe 1103static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1104{
d1e4ebd9 1105 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1106 assert(-256 <= offset && offset < 256);
1107 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1108}
1109
1110static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1111{
d1e4ebd9 1112 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1113 assert(-256 <= offset && offset < 256);
1114 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1115}
1116
1117static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1118{
d1e4ebd9 1119 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1120 assert(-256 <= offset && offset < 256);
1121 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1122}
1123
1124static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1125{
d1e4ebd9 1126 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1127 assert(-256 <= offset && offset < 256);
1128 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1129}
1130
be516ebe 1131static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1132{
3968e69e 1133 if (!(offset & 3) && (u_int)offset <= 16380) {
1134 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1135 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1136 }
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1139 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 }
687b4580 1141 else
1142 assert(0);
be516ebe 1143}
1144
1145static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1146{
3968e69e 1147 if (!(offset & 1) && (u_int)offset <= 8190) {
1148 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1149 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1150 }
1151 else if (-256 <= offset && offset < 256) {
1152 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1153 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1154 }
687b4580 1155 else
1156 assert(0);
be516ebe 1157}
1158
1159static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1160{
3968e69e 1161 if ((u_int)offset < 4096) {
1162 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1163 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1164 }
1165 else if (-256 <= offset && offset < 256) {
1166 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1167 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1168 }
687b4580 1169 else
1170 assert(0);
be516ebe 1171}
1172
3968e69e 1173static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1174{
3968e69e 1175 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1176 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1177}
1178
3968e69e 1179static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1180{
3968e69e 1181 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1182 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1183}
1184
1185static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1186{
1187 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1188 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1189}
1190
1191static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1192{
1193 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1194 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1195}
1196
3968e69e 1197static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1198{
1199 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1200 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1201}
1202
1203static void emit_clz(u_int rs, u_int rt)
be516ebe 1204{
1205 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1206 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1207}
1208
be516ebe 1209// special case for checking invalid_code
9b495f6e 1210static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1211{
1212 emit_shrimm(r, 12, rt);
1213 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1214 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
be516ebe 1215}
1216
3968e69e 1217// special for loadlr_assemble, rs2 is destroyed
1218static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1219{
3968e69e 1220 emit_shl(rs2, shift, rs2);
1221 emit_bic(rs1, rs2, rt);
be516ebe 1222}
1223
3968e69e 1224static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1225{
3968e69e 1226 emit_shr(rs2, shift, rs2);
1227 emit_bic(rs1, rs2, rt);
be516ebe 1228}
1229
687b4580 1230static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1231{
687b4580 1232 u_int op = 0xb9000000;
d1e4ebd9 1233 unused const char *ldst = is_st ? "st" : "ld";
1234 unused char rp = is64 ? 'x' : 'w';
687b4580 1235 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1236 is64 = is64 ? 1 : 0;
1237 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1238 ofs = (ofs >> (2+is64));
687b4580 1239 if (!is_st) op |= 0x00400000;
1240 if (is64) op |= 0x40000000;
d1e4ebd9 1241 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1242}
1243
687b4580 1244static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1245{
687b4580 1246 u_int op = 0x29000000;
d1e4ebd9 1247 unused const char *ldst = is_st ? "st" : "ld";
1248 unused char rp = is64 ? 'x' : 'w';
687b4580 1249 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1250 is64 = is64 ? 1 : 0;
1251 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1252 ofs = (ofs >> (2+is64));
1253 assert(-64 <= ofs && ofs <= 63);
1254 ofs &= 0x7f;
1255 if (!is_st) op |= 0x00400000;
1256 if (is64) op |= 0x80000000;
d1e4ebd9 1257 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1258}
1259
1260static void save_load_regs_all(int is_store, u_int reglist)
1261{
1262 int ofs = 0, c = 0;
1263 u_int r, pair[2];
1264 for (r = 0; reglist; r++, reglist >>= 1) {
1265 if (reglist & 1)
1266 pair[c++] = r;
1267 if (c == 2) {
1268 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1269 ofs += 8 * 2;
1270 c = 0;
1271 }
1272 }
1273 if (c) {
1274 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1275 ofs += 8;
1276 }
1277 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1278}
1279
1280// Save registers before function call
1281static void save_regs(u_int reglist)
1282{
1283 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1284 save_load_regs_all(1, reglist);
be516ebe 1285}
1286
1287// Restore registers after function call
1288static void restore_regs(u_int reglist)
1289{
1290 reglist &= CALLER_SAVE_REGS;
687b4580 1291 save_load_regs_all(0, reglist);
be516ebe 1292}
1293
1294/* Stubs/epilogue */
1295
1296static void literal_pool(int n)
1297{
1298 (void)literals;
1299}
1300
1301static void literal_pool_jumpover(int n)
1302{
1303}
1304
d1e4ebd9 1305// parsed by get_pointer, find_extjump_insn
104df9d3 1306static void emit_extjump(u_char *addr, u_int target)
be516ebe 1307{
d1e4ebd9 1308 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1309
d1e4ebd9 1310 emit_movz(target & 0xffff, 0);
1311 emit_movk_lsl16(target >> 16, 0);
1312
1313 // addr is in the current recompiled block (max 256k)
1314 // offset shouldn't exceed +/-1MB
1315 emit_adr(addr, 1);
104df9d3 1316 emit_far_jump(dyna_linker);
be516ebe 1317}
1318
d1e4ebd9 1319static void check_extjump2(void *src)
be516ebe 1320{
d1e4ebd9 1321 u_int *ptr = src;
1322 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1323 (void)ptr;
be516ebe 1324}
1325
1326// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1327static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1328{
d1e4ebd9 1329 int diff = rt_val - rs_val;
3968e69e 1330 if ((-4096 < diff && diff < 4096)
1331 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1332 emit_addimm(rs, diff, rt);
3968e69e 1333 else if (rt_val == ~rs_val)
1334 emit_not(rs, rt);
d1e4ebd9 1335 else if (is_rotated_mask(rs_val ^ rt_val))
1336 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1337 else
d1e4ebd9 1338 emit_movimm(rt_val, rt);
be516ebe 1339}
1340
d1e4ebd9 1341// return 1 if the above function can do it's job cheaply
687b4580 1342static int is_similar_value(u_int v1, u_int v2)
be516ebe 1343{
687b4580 1344 int diff = v1 - v2;
3968e69e 1345 return (-4096 < diff && diff < 4096)
1346 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1347 || v1 == ~v2
d1e4ebd9 1348 || is_rotated_mask(v1 ^ v2);
1349}
1350
37387d8b 1351static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1352{
1353 if (rt_val < 0x100000000ull) {
1354 emit_movimm_from(rs_val, rs, rt_val, rt);
1355 return;
1356 }
1357 // just move the whole thing. At least on Linux all addresses
1358 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1359 emit_movimm64(rt_val, rt);
37387d8b 1360}
1361
1362// trashes x2
d1e4ebd9 1363static void pass_args64(u_int a0, u_int a1)
1364{
1365 if(a0==1&&a1==0) {
1366 // must swap
1367 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1368 }
1369 else if(a0!=0&&a1==0) {
1370 emit_mov64(a1,1);
1371 if (a0>=0) emit_mov64(a0,0);
1372 }
1373 else {
1374 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1375 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1376 }
be516ebe 1377}
1378
d1e4ebd9 1379static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1380{
1381 switch(type) {
1382 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1383 case LOADBU_STUB:
1384 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1385 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1386 case LOADHU_STUB:
1387 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1388 case LOADW_STUB:
1389 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1390 default: assert(0);
d1e4ebd9 1391 }
1392}
1393
1394#include "pcsxmem.h"
be516ebe 1395//#include "pcsxmem_inline.c"
1396
1397static void do_readstub(int n)
1398{
1399 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1400 set_jump_target(stubs[n].addr, out);
1401 enum stub_type type = stubs[n].type;
1402 int i = stubs[n].a;
1403 int rs = stubs[n].b;
1404 const struct regstat *i_regs = (void *)stubs[n].c;
1405 u_int reglist = stubs[n].e;
1406 const signed char *i_regmap = i_regs->regmap;
1407 int rt;
cf95b4f0 1408 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1409 rt=get_reg(i_regmap,FTEMP);
1410 }else{
cf95b4f0 1411 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1412 }
1413 assert(rs>=0);
1414 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1415 void *restore_jump = NULL, *handler_jump = NULL;
1416 reglist|=(1<<rs);
1417 for (r = 0; r < HOST_CCREG; r++) {
1418 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1419 temp = r;
1420 break;
1421 }
1422 }
cf95b4f0 1423 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1424 reglist&=~(1<<rt);
1425 if(temp==-1) {
1426 save_regs(reglist);
1427 regs_saved=1;
1428 temp=(rs==0)?2:0;
1429 }
1430 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1431 temp2=1;
1432 emit_readdword(&mem_rtab,temp);
1433 emit_shrimm(rs,12,temp2);
1434 emit_readdword_dualindexedx8(temp,temp2,temp2);
1435 emit_adds64(temp2,temp2,temp2);
1436 handler_jump=out;
1437 emit_jc(0);
cf95b4f0 1438 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1439 switch(type) {
1440 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1441 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1442 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1443 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1444 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1445 default: assert(0);
d1e4ebd9 1446 }
1447 }
1448 if(regs_saved) {
1449 restore_jump=out;
1450 emit_jmp(0); // jump to reg restore
1451 }
1452 else
1453 emit_jmp(stubs[n].retaddr); // return address
1454 set_jump_target(handler_jump, out);
1455
1456 if(!regs_saved)
1457 save_regs(reglist);
1458 void *handler=NULL;
1459 if(type==LOADB_STUB||type==LOADBU_STUB)
1460 handler=jump_handler_read8;
1461 if(type==LOADH_STUB||type==LOADHU_STUB)
1462 handler=jump_handler_read16;
1463 if(type==LOADW_STUB)
1464 handler=jump_handler_read32;
1465 assert(handler);
1466 pass_args64(rs,temp2);
1467 int cc=get_reg(i_regmap,CCREG);
1468 if(cc<0)
1469 emit_loadreg(CCREG,2);
2330734f 1470 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1471 emit_far_call(handler);
d1e4ebd9 1472 // (no cycle reload after read)
cf95b4f0 1473 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1474 loadstore_extend(type,0,rt);
1475 }
1476 if(restore_jump)
1477 set_jump_target(restore_jump, out);
1478 restore_regs(reglist);
1479 emit_jmp(stubs[n].retaddr);
be516ebe 1480}
1481
81dbbf4c 1482static void inline_readstub(enum stub_type type, int i, u_int addr,
1483 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1484{
d1e4ebd9 1485 int rs=get_reg(regmap,target);
1486 int rt=get_reg(regmap,target);
9de8a0c3 1487 if(rs<0) rs=get_reg_temp(regmap);
d1e4ebd9 1488 assert(rs>=0);
1489 u_int is_dynamic=0;
1490 uintptr_t host_addr = 0;
1491 void *handler;
1492 int cc=get_reg(regmap,CCREG);
2330734f 1493 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1494 // return;
1495 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1496 if (handler == NULL) {
cf95b4f0 1497 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1498 return;
37387d8b 1499 if (addr != host_addr)
1500 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1501 switch(type) {
1502 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1503 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1504 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1505 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1506 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1507 default: assert(0);
1508 }
1509 return;
1510 }
37387d8b 1511 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1512 if (is_dynamic) {
d1e4ebd9 1513 if(type==LOADB_STUB||type==LOADBU_STUB)
1514 handler=jump_handler_read8;
1515 if(type==LOADH_STUB||type==LOADHU_STUB)
1516 handler=jump_handler_read16;
1517 if(type==LOADW_STUB)
1518 handler=jump_handler_read32;
1519 }
1520
1521 // call a memhandler
cf95b4f0 1522 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1523 reglist&=~(1<<rt);
1524 save_regs(reglist);
1525 if(target==0)
1526 emit_movimm(addr,0);
1527 else if(rs!=0)
1528 emit_mov(rs,0);
1529 if(cc<0)
1530 emit_loadreg(CCREG,2);
2330734f 1531 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1532 if(is_dynamic) {
1533 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1534 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1535 if (-4294967296l <= offset && offset < 4294967296l) {
1536 emit_adrp((void *)l1, 1);
1537 emit_addimm64(1, l1 & 0xfff, 1);
1538 }
1539 else
1540 emit_movimm64(l1, 1);
3968e69e 1541 }
d1e4ebd9 1542 else
2a014d73 1543 emit_far_call(do_memhandler_pre);
d1e4ebd9 1544
2a014d73 1545 emit_far_call(handler);
d1e4ebd9 1546
1547 // (no cycle reload after read)
cf95b4f0 1548 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1549 loadstore_extend(type, 0, rt);
1550 restore_regs(reglist);
be516ebe 1551}
1552
1553static void do_writestub(int n)
1554{
1555 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1556 set_jump_target(stubs[n].addr, out);
1557 enum stub_type type=stubs[n].type;
1558 int i=stubs[n].a;
1559 int rs=stubs[n].b;
1560 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1561 u_int reglist=stubs[n].e;
1562 signed char *i_regmap=i_regs->regmap;
1563 int rt,r;
cf95b4f0 1564 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1565 rt=get_reg(i_regmap,r=FTEMP);
1566 }else{
cf95b4f0 1567 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1568 }
1569 assert(rs>=0);
1570 assert(rt>=0);
1571 int rtmp,temp=-1,temp2,regs_saved=0;
1572 void *restore_jump = NULL, *handler_jump = NULL;
1573 int reglist2=reglist|(1<<rs)|(1<<rt);
1574 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1575 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1576 temp = rtmp;
1577 break;
1578 }
1579 }
1580 if(temp==-1) {
1581 save_regs(reglist);
1582 regs_saved=1;
1583 for(rtmp=0;rtmp<=3;rtmp++)
1584 if(rtmp!=rs&&rtmp!=rt)
1585 {temp=rtmp;break;}
1586 }
1587 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1588 temp2=3;
1589 else {
1590 host_tempreg_acquire();
1591 temp2=HOST_TEMPREG;
1592 }
1593 emit_readdword(&mem_wtab,temp);
1594 emit_shrimm(rs,12,temp2);
1595 emit_readdword_dualindexedx8(temp,temp2,temp2);
1596 emit_adds64(temp2,temp2,temp2);
1597 handler_jump=out;
1598 emit_jc(0);
1599 switch(type) {
1600 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1601 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1602 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1603 default: assert(0);
1604 }
1605 if(regs_saved) {
1606 restore_jump=out;
1607 emit_jmp(0); // jump to reg restore
1608 }
1609 else
1610 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1611 set_jump_target(handler_jump, out);
1612
d1e4ebd9 1613 if(!regs_saved)
1614 save_regs(reglist);
1615 void *handler=NULL;
1616 switch(type) {
1617 case STOREB_STUB: handler=jump_handler_write8; break;
1618 case STOREH_STUB: handler=jump_handler_write16; break;
1619 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1620 default: assert(0);
d1e4ebd9 1621 }
1622 assert(handler);
1623 pass_args(rs,rt);
1624 if(temp2!=3) {
1625 emit_mov64(temp2,3);
1626 host_tempreg_release();
1627 }
1628 int cc=get_reg(i_regmap,CCREG);
1629 if(cc<0)
1630 emit_loadreg(CCREG,2);
2330734f 1631 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1632 // returns new cycle_count
2a014d73 1633 emit_far_call(handler);
2330734f 1634 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1635 if(cc<0)
1636 emit_storereg(CCREG,2);
1637 if(restore_jump)
1638 set_jump_target(restore_jump, out);
1639 restore_regs(reglist);
1640 emit_jmp(stubs[n].retaddr);
be516ebe 1641}
1642
81dbbf4c 1643static void inline_writestub(enum stub_type type, int i, u_int addr,
1644 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1645{
9de8a0c3 1646 int rs = get_reg_temp(regmap);
687b4580 1647 int rt = get_reg(regmap,target);
1648 assert(rs >= 0);
1649 assert(rt >= 0);
1650 uintptr_t host_addr = 0;
1651 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1652 if (handler == NULL) {
37387d8b 1653 if (addr != host_addr)
1654 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1655 switch (type) {
687b4580 1656 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1657 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1658 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1659 default: assert(0);
1660 }
1661 return;
1662 }
1663
1664 // call a memhandler
1665 save_regs(reglist);
687b4580 1666 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1667 loadstore_extend(type, rt, 0);
1668 int cc, cc_use;
1669 cc = cc_use = get_reg(regmap, CCREG);
1670 if (cc < 0)
1671 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1672 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1673
2a014d73 1674 emit_far_call(do_memhandler_pre);
1675 emit_far_call(handler);
1676 emit_far_call(do_memhandler_post);
2330734f 1677 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1678 if (cc < 0)
1679 emit_storereg(CCREG, cc_use);
687b4580 1680 restore_regs(reglist);
be516ebe 1681}
1682
3968e69e 1683/* Special assem */
1684
81dbbf4c 1685static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1686{
1687 save_load_regs_all(1, reglist);
32631e6a 1688 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1689#ifdef PCNT
1690 emit_movimm(op, 0);
2a014d73 1691 emit_far_call(pcnt_gte_start);
3968e69e 1692#endif
1693 // pointer to cop2 regs
1694 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1695}
1696
1697static void c2op_epilogue(u_int op,u_int reglist)
1698{
1699#ifdef PCNT
1700 emit_movimm(op, 0);
2a014d73 1701 emit_far_call(pcnt_gte_end);
3968e69e 1702#endif
1703 save_load_regs_all(0, reglist);
be516ebe 1704}
1705
81dbbf4c 1706static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1707{
3968e69e 1708 u_int c2op=source[i]&0x3f;
1709 u_int hr,reglist_full=0,reglist;
1710 int need_flags,need_ir;
1711 for(hr=0;hr<HOST_REGS;hr++) {
1712 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1713 }
1714 reglist=reglist_full&CALLER_SAVE_REGS;
1715
1716 if (gte_handlers[c2op]!=NULL) {
1717 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1718 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1719 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1720 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1721 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1722 need_flags=0;
1723 //int shift = (source[i] >> 19) & 1;
1724 //int lm = (source[i] >> 10) & 1;
1725 switch(c2op) {
1726 default:
1727 (void)need_ir;
81dbbf4c 1728 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1729 emit_movimm(source[i],1); // opcode
1730 emit_writeword(1,&psxRegs.code);
2a014d73 1731 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1732 break;
1733 }
1734 c2op_epilogue(c2op,reglist);
1735 }
1736}
1737
1738static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1739{
1740 //value = value & 0x7ffff000;
1741 //if (value & 0x7f87e000) value |= 0x80000000;
1742 emit_andimm(sl, 0x7fffe000, temp);
1743 emit_testimm(temp, 0xff87ffff);
1744 emit_andimm(sl, 0x7ffff000, temp);
1745 host_tempreg_acquire();
1746 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1747 emit_cmovne_reg(HOST_TEMPREG, temp);
1748 host_tempreg_release();
1749 assert(0); // testing needed
1750}
1751
1752static void do_mfc2_31_one(u_int copr,signed char temp)
1753{
1754 emit_readshword(&reg_cop2d[copr],temp);
1755 emit_bicsar_imm(temp,31,temp);
1756 emit_cmpimm(temp,0xf80);
1757 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1758 emit_andimm(temp,0xf80,temp);
1759}
1760
1761static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1762{
1763 if (temp < 0) {
1764 host_tempreg_acquire();
1765 temp = HOST_TEMPREG;
1766 }
1767 do_mfc2_31_one(9,temp);
1768 emit_shrimm(temp,7,tl);
1769 do_mfc2_31_one(10,temp);
1770 emit_orrshr_imm(temp,2,tl);
1771 do_mfc2_31_one(11,temp);
1772 emit_orrshl_imm(temp,3,tl);
1773 emit_writeword(tl,&reg_cop2d[29]);
1774
1775 if (temp == HOST_TEMPREG)
1776 host_tempreg_release();
be516ebe 1777}
1778
2330734f 1779static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1780{
3968e69e 1781 // case 0x18: MULT
1782 // case 0x19: MULTU
1783 // case 0x1A: DIV
1784 // case 0x1B: DIVU
cf95b4f0 1785 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1786 {
cf95b4f0 1787 switch(dops[i].opcode2)
3968e69e 1788 {
1789 case 0x18: // MULT
1790 case 0x19: // MULTU
1791 {
cf95b4f0 1792 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1793 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1794 signed char hi=get_reg(i_regs->regmap,HIREG);
1795 signed char lo=get_reg(i_regs->regmap,LOREG);
1796 assert(m1>=0);
1797 assert(m2>=0);
1798 assert(hi>=0);
1799 assert(lo>=0);
1800
cf95b4f0 1801 if(dops[i].opcode2==0x18) // MULT
3968e69e 1802 emit_smull(m1,m2,hi);
1803 else // MULTU
1804 emit_umull(m1,m2,hi);
1805
1806 emit_mov(hi,lo);
1807 emit_shrimm64(hi,32,hi);
1808 break;
1809 }
1810 case 0x1A: // DIV
1811 case 0x1B: // DIVU
1812 {
cf95b4f0 1813 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1814 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1815 signed char quotient=get_reg(i_regs->regmap,LOREG);
1816 signed char remainder=get_reg(i_regs->regmap,HIREG);
1817 assert(numerator>=0);
1818 assert(denominator>=0);
1819 assert(quotient>=0);
1820 assert(remainder>=0);
1821
cf95b4f0 1822 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1823 emit_sdiv(numerator,denominator,quotient);
1824 else // DIVU
1825 emit_udiv(numerator,denominator,quotient);
1826 emit_msub(quotient,denominator,numerator,remainder);
1827
1828 // div 0 quotient (remainder is already correct)
1829 host_tempreg_acquire();
cf95b4f0 1830 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1831 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1832 else
1833 emit_movimm(~0,HOST_TEMPREG);
1834 emit_test(denominator,denominator);
1835 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1836 host_tempreg_release();
1837 break;
1838 }
1839 default:
1840 assert(0);
1841 }
1842 }
1843 else
1844 {
1845 signed char hr=get_reg(i_regs->regmap,HIREG);
1846 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1847 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1848 {
cf95b4f0 1849 if (dops[i].rs1) {
1850 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1851 assert(numerator >= 0);
1852 if (hr >= 0)
1853 emit_mov(numerator,hr);
1854 if (lr >= 0) {
cf95b4f0 1855 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1856 emit_sub_asrimm(0,numerator,31,lr);
1857 else
1858 emit_movimm(~0,lr);
1859 }
1860 }
1861 else {
1862 if (hr >= 0) emit_zeroreg(hr);
1863 if (lr >= 0) emit_movimm(~0,lr);
1864 }
1865 }
1866 else
1867 {
1868 // Multiply by zero is zero.
1869 if (hr >= 0) emit_zeroreg(hr);
1870 if (lr >= 0) emit_zeroreg(lr);
1871 }
1872 }
be516ebe 1873}
1874#define multdiv_assemble multdiv_assemble_arm64
1875
d1e4ebd9 1876static void do_jump_vaddr(u_int rs)
1877{
1878 if (rs != 0)
1879 emit_mov(rs, 0);
104df9d3 1880 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1881 emit_jmpreg(0);
1882}
1883
be516ebe 1884static void do_preload_rhash(u_int r) {
1885 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1886 // register. On ARM the hash can be done with a single instruction (below)
1887}
1888
1889static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1890 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1891}
1892
1893static void do_rhash(u_int rs,u_int rh) {
1894 emit_andimm(rs, 0xf8, rh);
1895}
1896
d1e4ebd9 1897static void do_miniht_load(int ht, u_int rh) {
1898 emit_add64(ht, rh, ht);
1899 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1900}
1901
d1e4ebd9 1902static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1903 emit_cmp(rh, rs);
1904 void *jaddr = out;
1905 emit_jeq(0);
1906 do_jump_vaddr(rs);
1907
1908 set_jump_target(jaddr, out);
1909 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1910 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1911 emit_jmpreg(ht);
be516ebe 1912}
1913
d1e4ebd9 1914// parsed by set_jump_target?
be516ebe 1915static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1916 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1917 emit_movk(return_address&0xffff,rt);
1918 add_to_linker(out,return_address,1);
1919 emit_adr(out,temp);
1920 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1921 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1922}
1923
d9e2b173 1924static unused void clear_cache_arm64(char *start, char *end)
be516ebe 1925{
919981d0 1926 // Don't rely on GCC's __clear_cache implementation, as it caches
1927 // icache/dcache cache line sizes, that can vary between cores on
1928 // big.LITTLE architectures.
1929 uint64_t addr, ctr_el0;
1930 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1931 size_t isize, dsize;
1932
1933 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1934 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1935 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1936
1937 // use the global minimum cache line size
1938 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1939 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1940
1941 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1942 not required for instruction to data coherence. */
1943 if ((ctr_el0 & (1 << 28)) == 0x0) {
1944 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1945 for (; addr < (uint64_t)end; addr += dsize)
1946 // use "civac" instead of "cvau", as this is the suggested workaround for
1947 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1948 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 1949 }
919981d0 1950 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1951
919981d0 1952 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1953 Unification is not required for instruction to data coherence. */
1954 if ((ctr_el0 & (1 << 29)) == 0x0) {
1955 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1956 for (; addr < (uint64_t)end; addr += isize)
1957 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1958
1959 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1960 }
919981d0 1961
1962 __asm__ volatile("isb" : : : "memory");
be516ebe 1963}
1964
1965// CPU-architecture-specific initialization
2a014d73 1966static void arch_init(void)
1967{
1968 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 1969 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 1970 size_t i;
1971 assert(!(diff & 3));
d9e2b173 1972 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 1973 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 1974 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1975 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 1976 }
1977 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 1978}
1979
1980// vim:shiftwidth=2:expandtab