cspace: add forgotten length decrement
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define unused __attribute__((unused))
27
d1e4ebd9 28void do_memhandler_pre();
29void do_memhandler_post();
be516ebe 30
31/* Linker */
d1e4ebd9 32static void set_jump_target(void *addr, void *target)
be516ebe 33{
d9e2b173 34 u_int *ptr = NDRC_WRITE_OFFSET(addr);
d1e4ebd9 35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
3968e69e 37 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
40 }
3968e69e 41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 45 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 46 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 47 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
49 }
3968e69e 50 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
54 }
55 else
3968e69e 56 abort(); // should not happen
be516ebe 57}
58
59// from a pointer to external jump stub (which was produced by emit_extjump2)
60// find where the jumping insn is
61static void *find_extjump_insn(void *stub)
62{
d1e4ebd9 63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
be516ebe 67}
68
104df9d3 69#if 0
be516ebe 70// find where external branch is liked to using addr of it's stub:
3968e69e 71// get address that the stub loads (dyna_linker arg1),
be516ebe 72// treat it as a pointer to branch insn,
73// return addr where that branch jumps to
74static void *get_pointer(void *stub)
75{
d1e4ebd9 76 int *i_ptr = find_extjump_insn(stub);
3968e69e 77 if ((*i_ptr&0xfc000000) == 0x14000000) // b
78 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
79 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
80 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
81 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 82 assert(0);
83 return NULL;
84}
104df9d3 85#endif
be516ebe 86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
d9e2b173 145 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
be516ebe 146 out += 4;
147}
148
3968e69e 149static u_int rn_rd(u_int rn, u_int rd)
150{
151 assert(rn < 31);
152 assert(rd < 31);
153 return (rn << 5) | rd;
154}
155
be516ebe 156static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
157{
d1e4ebd9 158 assert(rm < 32);
159 assert(rn < 32);
160 assert(rd < 32);
be516ebe 161 return (rm << 16) | (rn << 5) | rd;
162}
163
3968e69e 164static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
165{
166 assert(ra < 32);
167 return rm_rn_rd(rm, rn, rd) | (ra << 10);
168}
169
d1e4ebd9 170static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
171{
172 assert(imm7 < 0x80);
173 assert(rt2 < 31);
174 assert(rn < 32);
175 assert(rt < 31);
176 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
177}
178
687b4580 179static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
180{
181 assert(imm6 <= 63);
182 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
183}
184
be516ebe 185static u_int imm16_rd(u_int imm16, u_int rd)
186{
187 assert(imm16 < 0x10000);
188 assert(rd < 31);
189 return (imm16 << 5) | rd;
190}
191
687b4580 192static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
193{
194 assert(imm12 < 0x1000);
d1e4ebd9 195 assert(rn < 32);
196 assert(rd < 32);
197 return (imm12 << 10) | (rn << 5) | rd;
198}
199
200static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
201{
202 assert(imm9 < 0x200);
687b4580 203 assert(rn < 31);
204 assert(rd < 31);
d1e4ebd9 205 return (imm9 << 12) | (rn << 5) | rd;
687b4580 206}
207
d1e4ebd9 208static u_int imm19_rt(u_int imm19, u_int rt)
209{
210 assert(imm19 < 0x80000);
211 assert(rt < 31);
212 return (imm19 << 5) | rt;
213}
214
215static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
216{
217 assert(n < 2);
218 assert(immr < 0x40);
219 assert(imms < 0x40);
220 assert(rn < 32);
221 assert(rd < 32);
222 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
223}
224
225static u_int genjmp(const u_char *addr)
be516ebe 226{
227 intptr_t offset = addr - out;
d1e4ebd9 228 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 229 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 230 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
231 abort();
be516ebe 232 return 0;
233 }
d1e4ebd9 234 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 235}
236
d1e4ebd9 237static u_int genjmpcc(const u_char *addr)
be516ebe 238{
239 intptr_t offset = addr - out;
d1e4ebd9 240 if ((uintptr_t)addr < 3) return 0;
be516ebe 241 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 242 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
243 abort();
244 return 0;
245 }
246 return ((u_int)offset >> 2) & 0x7ffff;
247}
248
249static uint32_t is_mask(u_int value)
250{
251 return value && ((value + 1) & value) == 0;
252}
253
254// This function returns true if the argument contains a
255// non-empty sequence of ones (possibly rotated) with the remainder zero.
256static uint32_t is_rotated_mask(u_int value)
257{
3968e69e 258 if (value == 0 || value == ~0)
be516ebe 259 return 0;
d1e4ebd9 260 if (is_mask((value - 1) | value))
261 return 1;
262 return is_mask((~value - 1) | ~value);
263}
264
265static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
266{
267 int lzeros, tzeros, ones;
268 assert(value != 0);
269 if (is_mask((value - 1) | value)) {
270 lzeros = __builtin_clz(value);
271 tzeros = __builtin_ctz(value);
272 ones = 32 - lzeros - tzeros;
273 *immr = (32 - tzeros) & 31;
274 *imms = ones - 1;
275 return;
be516ebe 276 }
d1e4ebd9 277 value = ~value;
278 if (is_mask((value - 1) | value)) {
279 lzeros = __builtin_clz(value);
280 tzeros = __builtin_ctz(value);
281 ones = 32 - lzeros - tzeros;
3968e69e 282 *immr = lzeros;
d1e4ebd9 283 *imms = 31 - ones;
284 return;
285 }
3968e69e 286 abort();
be516ebe 287}
288
289static void emit_mov(u_int rs, u_int rt)
290{
687b4580 291 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 292 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
293}
294
295static void emit_mov64(u_int rs, u_int rt)
296{
297 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
298 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 299}
300
687b4580 301static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 302{
d1e4ebd9 303 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
304 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 305}
306
d1e4ebd9 307static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 308{
d1e4ebd9 309 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
310 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 311}
312
d1e4ebd9 313static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 314{
3968e69e 315 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 316 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
317}
39b71d9a 318#define emit_adds_ptr emit_adds64
d1e4ebd9 319
320static void emit_neg(u_int rs, u_int rt)
321{
322 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
323 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 324}
325
687b4580 326static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 327{
d1e4ebd9 328 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 329 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 330}
331
3968e69e 332static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
333{
334 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
335 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
336}
337
d1e4ebd9 338static void emit_movz(u_int imm, u_int rt)
be516ebe 339{
d1e4ebd9 340 assem_debug("movz %s,#%#x\n", regname[rt], imm);
341 output_w32(0x52800000 | imm16_rd(imm, rt));
342}
343
344static void emit_movz_lsl16(u_int imm, u_int rt)
345{
346 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
347 output_w32(0x52a00000 | imm16_rd(imm, rt));
348}
349
350static void emit_movn(u_int imm, u_int rt)
351{
352 assem_debug("movn %s,#%#x\n", regname[rt], imm);
353 output_w32(0x12800000 | imm16_rd(imm, rt));
354}
355
356static void emit_movn_lsl16(u_int imm,u_int rt)
357{
358 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
359 output_w32(0x12a00000 | imm16_rd(imm, rt));
360}
361
362static void emit_movk(u_int imm,u_int rt)
363{
364 assem_debug("movk %s,#%#x\n", regname[rt], imm);
365 output_w32(0x72800000 | imm16_rd(imm, rt));
366}
367
368static void emit_movk_lsl16(u_int imm,u_int rt)
369{
370 assert(imm<65536);
3968e69e 371 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 372 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 373}
374
375static void emit_zeroreg(u_int rt)
376{
d1e4ebd9 377 emit_movz(0, rt);
be516ebe 378}
379
be516ebe 380static void emit_movimm(u_int imm, u_int rt)
381{
d1e4ebd9 382 if (imm < 65536)
383 emit_movz(imm, rt);
384 else if ((~imm) < 65536)
385 emit_movn(~imm, rt);
386 else if ((imm&0xffff) == 0)
387 emit_movz_lsl16(imm >> 16, rt);
388 else if (((~imm)&0xffff) == 0)
389 emit_movn_lsl16(~imm >> 16, rt);
390 else if (is_rotated_mask(imm)) {
391 u_int immr, imms;
392 gen_logical_imm(imm, &immr, &imms);
393 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
394 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
395 }
be516ebe 396 else {
d1e4ebd9 397 emit_movz(imm & 0xffff, rt);
398 emit_movk_lsl16(imm >> 16, rt);
be516ebe 399 }
400}
401
aaece508 402static void emit_movimm64(uint64_t imm, u_int rt)
403{
404 u_int shift, op, imm16, insns = 0;
405 for (shift = 0; shift < 4; shift++) {
406 imm16 = (imm >> shift * 16) & 0xffff;
407 if (!imm16)
408 continue;
409 op = insns ? 0xf2800000 : 0xd2800000;
410 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
411 if (shift)
412 assem_debug(",lsl #%u", shift * 16);
413 assem_debug("\n");
414 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
415 insns++;
416 }
417 if (!insns) {
418 assem_debug("movz %s,#0\n", regname64[rt]);
419 output_w32(0xd2800000 | imm16_rd(0, rt));
420 }
421}
422
687b4580 423static void emit_readword(void *addr, u_int rt)
424{
425 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
426 if (!(offset & 3) && offset <= 16380) {
427 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
428 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
429 }
430 else
3968e69e 431 abort();
687b4580 432}
433
d1e4ebd9 434static void emit_readdword(void *addr, u_int rt)
435{
436 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
437 if (!(offset & 7) && offset <= 32760) {
438 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
439 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
440 }
3968e69e 441 else
442 abort();
443}
39b71d9a 444#define emit_readptr emit_readdword
3968e69e 445
446static void emit_readshword(void *addr, u_int rt)
447{
448 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
449 if (!(offset & 1) && offset <= 8190) {
450 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
451 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
452 }
d1e4ebd9 453 else
454 assert(0);
455}
456
be516ebe 457static void emit_loadreg(u_int r, u_int hr)
458{
d1e4ebd9 459 int is64 = 0;
be516ebe 460 if (r == 0)
461 emit_zeroreg(hr);
462 else {
33788798 463 void *addr;
be516ebe 464 switch (r) {
7c3a5182 465 //case HIREG: addr = &hi; break;
466 //case LOREG: addr = &lo; break;
be516ebe 467 case CCREG: addr = &cycle_count; break;
468 case CSREG: addr = &Status; break;
d1e4ebd9 469 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 470 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 471 default:
472 assert(r < 34);
473 addr = &psxRegs.GPR.r[r];
474 break;
be516ebe 475 }
d1e4ebd9 476 if (is64)
477 emit_readdword(addr, hr);
478 else
479 emit_readword(addr, hr);
be516ebe 480 }
481}
482
687b4580 483static void emit_writeword(u_int rt, void *addr)
484{
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 3) && offset <= 16380) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
488 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
489 }
490 else
491 assert(0);
492}
493
d1e4ebd9 494static void emit_writedword(u_int rt, void *addr)
495{
496 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
497 if (!(offset & 7) && offset <= 32760) {
498 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 499 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 500 }
501 else
3968e69e 502 abort();
d1e4ebd9 503}
504
687b4580 505static void emit_storereg(u_int r, u_int hr)
be516ebe 506{
507 assert(r < 64);
7c3a5182 508 void *addr = &psxRegs.GPR.r[r];
be516ebe 509 switch (r) {
7c3a5182 510 //case HIREG: addr = &hi; break;
511 //case LOREG: addr = &lo; break;
be516ebe 512 case CCREG: addr = &cycle_count; break;
7c3a5182 513 default: assert(r < 34); break;
be516ebe 514 }
687b4580 515 emit_writeword(hr, addr);
be516ebe 516}
517
518static void emit_test(u_int rs, u_int rt)
519{
d1e4ebd9 520 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
521 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 522}
523
d1e4ebd9 524static void emit_testimm(u_int rs, u_int imm)
be516ebe 525{
d1e4ebd9 526 u_int immr, imms;
687b4580 527 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 528 assert(is_rotated_mask(imm)); // good enough for PCSX
529 gen_logical_imm(imm, &immr, &imms);
3968e69e 530 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 531}
532
533static void emit_not(u_int rs,u_int rt)
534{
535 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 536 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 537}
538
be516ebe 539static void emit_and(u_int rs1,u_int rs2,u_int rt)
540{
541 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 542 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 543}
544
545static void emit_or(u_int rs1,u_int rs2,u_int rt)
546{
547 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 548 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 549}
550
3968e69e 551static void emit_bic(u_int rs1,u_int rs2,u_int rt)
552{
553 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
554 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
555}
556
be516ebe 557static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
558{
be516ebe 559 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 560 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 561}
562
563static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
564{
be516ebe 565 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 566 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 567}
568
3968e69e 569static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
570{
571 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
572 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
573}
574
be516ebe 575static void emit_xor(u_int rs1,u_int rs2,u_int rt)
576{
577 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 578 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 579}
580
3968e69e 581static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
582{
583 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
584 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
585}
586
d1e4ebd9 587static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 588{
d1e4ebd9 589 unused const char *st = s ? "s" : "";
590 s = s ? 0x20000000 : 0;
591 is64 = is64 ? 0x80000000 : 0;
687b4580 592 if (imm < 4096) {
d1e4ebd9 593 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 595 }
596 else if (-imm < 4096) {
3968e69e 597 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 598 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
599 }
600 else if (imm < 16777216) {
601 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
602 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
603 if ((imm & 0xfff) || s) {
604 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 605 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 606 }
607 }
608 else if (-imm < 16777216) {
609 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
610 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
611 if ((imm & 0xfff) || s) {
612 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
613 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
614 }
687b4580 615 }
616 else
3968e69e 617 abort();
be516ebe 618}
619
d1e4ebd9 620static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
621{
622 emit_addimm_s(0, 0, rs, imm, rt);
623}
624
625static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
626{
627 emit_addimm_s(0, 1, rs, imm, rt);
628}
629
be516ebe 630static void emit_addimm_and_set_flags(int imm, u_int rt)
631{
d1e4ebd9 632 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 633}
634
d1e4ebd9 635static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 636{
d1e4ebd9 637 const char *names[] = { "and", "orr", "eor", "ands" };
638 const char *name = names[op];
639 u_int immr, imms;
640 op = op << 29;
641 if (is_rotated_mask(imm)) {
642 gen_logical_imm(imm, &immr, &imms);
643 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
644 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
645 }
646 else {
647 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
648 host_tempreg_acquire();
649 emit_movimm(imm, HOST_TEMPREG);
650 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
651 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
652 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
653 host_tempreg_release();
654 }
655 (void)name;
be516ebe 656}
657
d1e4ebd9 658static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 659{
d1e4ebd9 660 if (imm == 0)
661 emit_zeroreg(rt);
662 else
663 emit_logicop_imm(0, rs, imm, rt);
be516ebe 664}
665
d1e4ebd9 666static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 667{
d1e4ebd9 668 if (imm == 0) {
669 if (rs != rt)
670 emit_mov(rs, rt);
671 }
672 else
673 emit_logicop_imm(1, rs, imm, rt);
be516ebe 674}
675
d1e4ebd9 676static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 677{
d1e4ebd9 678 if (imm == 0) {
679 if (rs != rt)
680 emit_mov(rs, rt);
681 }
682 else
683 emit_logicop_imm(2, rs, imm, rt);
be516ebe 684}
685
d1e4ebd9 686static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 687{
d1e4ebd9 688 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 690}
691
d1e4ebd9 692static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 693{
d1e4ebd9 694 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 696}
697
698static void emit_shlimm(u_int rs,u_int imm,u_int rt)
699{
be516ebe 700 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 702}
703
3968e69e 704static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 705{
3968e69e 706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 708}
709
3968e69e 710static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 711{
be516ebe 712 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 713 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 714}
715
716static void emit_sarimm(u_int rs,u_int imm,u_int rt)
717{
be516ebe 718 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 719 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 720}
721
722static void emit_rorimm(u_int rs,u_int imm,u_int rt)
723{
3968e69e 724 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 725 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 726}
727
728static void emit_signextend16(u_int rs, u_int rt)
729{
730 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 731 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 732}
733
d1e4ebd9 734static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 735{
3968e69e 736 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 737 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 738}
739
d1e4ebd9 740static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 741{
d1e4ebd9 742 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 744}
745
d1e4ebd9 746static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 747{
d1e4ebd9 748 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
749 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 750}
751
d1e4ebd9 752static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 753{
d1e4ebd9 754 if (imm < 4096) {
755 assem_debug("cmp %s,%#x\n", regname[rs], imm);
756 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
757 }
758 else if (-imm < 4096) {
759 assem_debug("cmn %s,%#x\n", regname[rs], imm);
760 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
761 }
762 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 763 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 764 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
765 }
766 else {
767 host_tempreg_acquire();
768 emit_movimm(imm, HOST_TEMPREG);
769 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
770 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
771 host_tempreg_release();
772 }
be516ebe 773}
774
d1e4ebd9 775static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 776{
d1e4ebd9 777 assert(imm == 0 || imm == 1);
778 assert(cond0 < 0x10);
779 assert(cond1 < 0x10);
780 if (imm) {
781 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
782 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
783 } else {
784 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
785 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
786 }
be516ebe 787}
788
d1e4ebd9 789static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 790{
d1e4ebd9 791 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 792}
793
d1e4ebd9 794static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 795{
d1e4ebd9 796 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 797}
798
799static void emit_cmovb_imm(int imm,u_int rt)
800{
d1e4ebd9 801 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 802}
803
3968e69e 804static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 805{
3968e69e 806 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 808}
809
810static void emit_cmovne_reg(u_int rs,u_int rt)
811{
d1e4ebd9 812 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 814}
815
816static void emit_cmovl_reg(u_int rs,u_int rt)
817{
d1e4ebd9 818 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 820}
821
e3c6bdb5 822static void emit_cmovb_reg(u_int rs,u_int rt)
823{
824 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
825 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
826}
827
be516ebe 828static void emit_cmovs_reg(u_int rs,u_int rt)
829{
d1e4ebd9 830 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
831 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 832}
833
3968e69e 834static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
835{
836 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
837 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
838}
839
be516ebe 840static void emit_slti32(u_int rs,int imm,u_int rt)
841{
842 if(rs!=rt) emit_zeroreg(rt);
843 emit_cmpimm(rs,imm);
844 if(rs==rt) emit_movimm(0,rt);
845 emit_cmovl_imm(1,rt);
846}
847
848static void emit_sltiu32(u_int rs,int imm,u_int rt)
849{
850 if(rs!=rt) emit_zeroreg(rt);
851 emit_cmpimm(rs,imm);
852 if(rs==rt) emit_movimm(0,rt);
853 emit_cmovb_imm(1,rt);
854}
855
856static void emit_cmp(u_int rs,u_int rt)
857{
858 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 859 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 860}
861
882a08fc 862static void emit_cmpcs(u_int rs,u_int rt)
863{
864 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
865 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
866}
867
be516ebe 868static void emit_set_gz32(u_int rs, u_int rt)
869{
870 //assem_debug("set_gz32\n");
871 emit_cmpimm(rs,1);
872 emit_movimm(1,rt);
873 emit_cmovl_imm(0,rt);
874}
875
876static void emit_set_nz32(u_int rs, u_int rt)
877{
878 //assem_debug("set_nz32\n");
d1e4ebd9 879 if(rs!=rt) emit_mov(rs,rt);
880 emit_test(rs,rs);
881 emit_cmovne_imm(1,rt);
be516ebe 882}
883
884static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
885{
886 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
887 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
888 emit_cmp(rs1,rs2);
889 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
890 emit_cmovl_imm(1,rt);
891}
892
893static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
894{
895 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
896 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
897 emit_cmp(rs1,rs2);
898 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
899 emit_cmovb_imm(1,rt);
900}
901
2a014d73 902static int can_jump_or_call(const void *a)
903{
904 intptr_t diff = (u_char *)a - out;
905 return (-134217728 <= diff && diff <= 134217727);
906}
907
d1e4ebd9 908static void emit_call(const void *a)
be516ebe 909{
d1e4ebd9 910 intptr_t diff = (u_char *)a - out;
911 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 912 assert(!(diff & 3));
913 if (-134217728 <= diff && diff <= 134217727)
914 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
915 else
3968e69e 916 abort();
be516ebe 917}
918
d1e4ebd9 919static void emit_jmp(const void *a)
be516ebe 920{
d1e4ebd9 921 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
922 u_int offset = genjmp(a);
923 output_w32(0x14000000 | offset);
be516ebe 924}
925
d1e4ebd9 926static void emit_jne(const void *a)
be516ebe 927{
d1e4ebd9 928 assem_debug("bne %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 931}
932
7c3a5182 933static void emit_jeq(const void *a)
be516ebe 934{
d1e4ebd9 935 assem_debug("beq %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 938}
939
7c3a5182 940static void emit_js(const void *a)
be516ebe 941{
d1e4ebd9 942 assem_debug("bmi %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 945}
946
7c3a5182 947static void emit_jns(const void *a)
be516ebe 948{
d1e4ebd9 949 assem_debug("bpl %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 952}
953
7c3a5182 954static void emit_jl(const void *a)
be516ebe 955{
d1e4ebd9 956 assem_debug("blt %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 959}
960
7c3a5182 961static void emit_jge(const void *a)
be516ebe 962{
d1e4ebd9 963 assem_debug("bge %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 966}
967
7c3a5182 968static void emit_jno(const void *a)
be516ebe 969{
d1e4ebd9 970 assem_debug("bvc %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 973}
974
7c3a5182 975static void emit_jc(const void *a)
be516ebe 976{
d1e4ebd9 977 assem_debug("bcs %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 980}
981
3968e69e 982static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 983{
3968e69e 984 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 985 u_int offset = genjmpcc(a);
3968e69e 986 is64 = is64 ? 0x80000000 : 0;
987 isnz = isnz ? 0x01000000 : 0;
988 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
989}
990
104df9d3 991static unused void emit_cbz(const void *a, u_int r)
3968e69e 992{
993 emit_cb(0, 0, a, r);
be516ebe 994}
995
996static void emit_jmpreg(u_int r)
997{
3968e69e 998 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 999 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 1000}
1001
1002static void emit_retreg(u_int r)
1003{
d1e4ebd9 1004 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 1005 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1006}
1007
1008static void emit_ret(void)
1009{
1010 emit_retreg(LR);
1011}
1012
d1e4ebd9 1013static void emit_adr(void *addr, u_int rt)
1014{
1015 intptr_t offset = (u_char *)addr - out;
1016 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1017 assert(rt < 31);
d1e4ebd9 1018 assem_debug("adr x%d,#%#lx\n", rt, offset);
1019 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1020}
1021
3968e69e 1022static void emit_adrp(void *addr, u_int rt)
1023{
1024 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1025 assert(-4294967296l <= offset && offset < 4294967296l);
1026 assert(rt < 31);
1027 offset >>= 12;
1028 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1029 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1030}
1031
be516ebe 1032static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1033{
d1e4ebd9 1034 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1035 assert(-256 <= offset && offset < 256);
1036 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1037}
1038
1039static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1040{
1041 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1043}
1044
1045static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1061}
39b71d9a 1062#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1063
1064static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1065{
1066 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1067 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1068}
1069
1070static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1071{
1072 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1073 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1074}
1075
1076static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1077{
1078 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1079 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1080}
1081
1082static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1083{
1084 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1085 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1086}
1087
1088static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1089{
1090 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1091 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1092}
1093
be516ebe 1094static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1095{
d1e4ebd9 1096 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1097 assert(-256 <= offset && offset < 256);
1098 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1099}
1100
1101static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1102{
d1e4ebd9 1103 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1104 assert(-256 <= offset && offset < 256);
1105 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1106}
1107
1108static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1109{
d1e4ebd9 1110 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1111 assert(-256 <= offset && offset < 256);
1112 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1113}
1114
1115static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1116{
d1e4ebd9 1117 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1118 assert(-256 <= offset && offset < 256);
1119 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1120}
1121
be516ebe 1122static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1123{
3968e69e 1124 if (!(offset & 3) && (u_int)offset <= 16380) {
1125 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1126 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1127 }
1128 else if (-256 <= offset && offset < 256) {
1129 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1130 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1131 }
687b4580 1132 else
1133 assert(0);
be516ebe 1134}
1135
1136static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1137{
3968e69e 1138 if (!(offset & 1) && (u_int)offset <= 8190) {
1139 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1140 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1141 }
1142 else if (-256 <= offset && offset < 256) {
1143 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1145 }
687b4580 1146 else
1147 assert(0);
be516ebe 1148}
1149
1150static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1151{
3968e69e 1152 if ((u_int)offset < 4096) {
1153 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1154 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1155 }
1156 else if (-256 <= offset && offset < 256) {
1157 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1158 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1159 }
687b4580 1160 else
1161 assert(0);
be516ebe 1162}
1163
3968e69e 1164static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1165{
3968e69e 1166 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1168}
1169
3968e69e 1170static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1171{
3968e69e 1172 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1174}
1175
1176static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1177{
1178 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1179 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1180}
1181
1182static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1183{
1184 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1186}
1187
3968e69e 1188static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1189{
1190 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1192}
1193
1194static void emit_clz(u_int rs, u_int rt)
be516ebe 1195{
1196 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1197 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1198}
1199
be516ebe 1200// special case for checking invalid_code
d1e4ebd9 1201static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1202{
d1e4ebd9 1203 host_tempreg_acquire();
1204 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1205 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1206 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1207 emit_cmpimm(HOST_TEMPREG, imm);
1208 host_tempreg_release();
be516ebe 1209}
1210
3968e69e 1211// special for loadlr_assemble, rs2 is destroyed
1212static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1213{
3968e69e 1214 emit_shl(rs2, shift, rs2);
1215 emit_bic(rs1, rs2, rt);
be516ebe 1216}
1217
3968e69e 1218static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1219{
3968e69e 1220 emit_shr(rs2, shift, rs2);
1221 emit_bic(rs1, rs2, rt);
be516ebe 1222}
1223
687b4580 1224static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1225{
687b4580 1226 u_int op = 0xb9000000;
d1e4ebd9 1227 unused const char *ldst = is_st ? "st" : "ld";
1228 unused char rp = is64 ? 'x' : 'w';
687b4580 1229 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1230 is64 = is64 ? 1 : 0;
1231 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1232 ofs = (ofs >> (2+is64));
687b4580 1233 if (!is_st) op |= 0x00400000;
1234 if (is64) op |= 0x40000000;
d1e4ebd9 1235 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1236}
1237
687b4580 1238static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1239{
687b4580 1240 u_int op = 0x29000000;
d1e4ebd9 1241 unused const char *ldst = is_st ? "st" : "ld";
1242 unused char rp = is64 ? 'x' : 'w';
687b4580 1243 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1244 is64 = is64 ? 1 : 0;
1245 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1246 ofs = (ofs >> (2+is64));
1247 assert(-64 <= ofs && ofs <= 63);
1248 ofs &= 0x7f;
1249 if (!is_st) op |= 0x00400000;
1250 if (is64) op |= 0x80000000;
d1e4ebd9 1251 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1252}
1253
1254static void save_load_regs_all(int is_store, u_int reglist)
1255{
1256 int ofs = 0, c = 0;
1257 u_int r, pair[2];
1258 for (r = 0; reglist; r++, reglist >>= 1) {
1259 if (reglist & 1)
1260 pair[c++] = r;
1261 if (c == 2) {
1262 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1263 ofs += 8 * 2;
1264 c = 0;
1265 }
1266 }
1267 if (c) {
1268 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1269 ofs += 8;
1270 }
1271 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1272}
1273
1274// Save registers before function call
1275static void save_regs(u_int reglist)
1276{
1277 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1278 save_load_regs_all(1, reglist);
be516ebe 1279}
1280
1281// Restore registers after function call
1282static void restore_regs(u_int reglist)
1283{
1284 reglist &= CALLER_SAVE_REGS;
687b4580 1285 save_load_regs_all(0, reglist);
be516ebe 1286}
1287
1288/* Stubs/epilogue */
1289
1290static void literal_pool(int n)
1291{
1292 (void)literals;
1293}
1294
1295static void literal_pool_jumpover(int n)
1296{
1297}
1298
d1e4ebd9 1299// parsed by get_pointer, find_extjump_insn
104df9d3 1300static void emit_extjump(u_char *addr, u_int target)
be516ebe 1301{
d1e4ebd9 1302 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1303
d1e4ebd9 1304 emit_movz(target & 0xffff, 0);
1305 emit_movk_lsl16(target >> 16, 0);
1306
1307 // addr is in the current recompiled block (max 256k)
1308 // offset shouldn't exceed +/-1MB
1309 emit_adr(addr, 1);
104df9d3 1310 emit_far_jump(dyna_linker);
be516ebe 1311}
1312
d1e4ebd9 1313static void check_extjump2(void *src)
be516ebe 1314{
d1e4ebd9 1315 u_int *ptr = src;
1316 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1317 (void)ptr;
be516ebe 1318}
1319
1320// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1321static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1322{
d1e4ebd9 1323 int diff = rt_val - rs_val;
3968e69e 1324 if ((-4096 < diff && diff < 4096)
1325 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1326 emit_addimm(rs, diff, rt);
3968e69e 1327 else if (rt_val == ~rs_val)
1328 emit_not(rs, rt);
d1e4ebd9 1329 else if (is_rotated_mask(rs_val ^ rt_val))
1330 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1331 else
d1e4ebd9 1332 emit_movimm(rt_val, rt);
be516ebe 1333}
1334
d1e4ebd9 1335// return 1 if the above function can do it's job cheaply
687b4580 1336static int is_similar_value(u_int v1, u_int v2)
be516ebe 1337{
687b4580 1338 int diff = v1 - v2;
3968e69e 1339 return (-4096 < diff && diff < 4096)
1340 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1341 || v1 == ~v2
d1e4ebd9 1342 || is_rotated_mask(v1 ^ v2);
1343}
1344
37387d8b 1345static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1346{
1347 if (rt_val < 0x100000000ull) {
1348 emit_movimm_from(rs_val, rs, rt_val, rt);
1349 return;
1350 }
1351 // just move the whole thing. At least on Linux all addresses
1352 // seem to be 48bit, so 3 insns - not great not terrible
aaece508 1353 emit_movimm64(rt_val, rt);
37387d8b 1354}
1355
1356// trashes x2
d1e4ebd9 1357static void pass_args64(u_int a0, u_int a1)
1358{
1359 if(a0==1&&a1==0) {
1360 // must swap
1361 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1362 }
1363 else if(a0!=0&&a1==0) {
1364 emit_mov64(a1,1);
1365 if (a0>=0) emit_mov64(a0,0);
1366 }
1367 else {
1368 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1369 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1370 }
be516ebe 1371}
1372
d1e4ebd9 1373static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1374{
1375 switch(type) {
1376 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1377 case LOADBU_STUB:
1378 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1379 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1380 case LOADHU_STUB:
1381 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1382 case LOADW_STUB:
1383 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1384 default: assert(0);
d1e4ebd9 1385 }
1386}
1387
1388#include "pcsxmem.h"
be516ebe 1389//#include "pcsxmem_inline.c"
1390
1391static void do_readstub(int n)
1392{
1393 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1394 set_jump_target(stubs[n].addr, out);
1395 enum stub_type type = stubs[n].type;
1396 int i = stubs[n].a;
1397 int rs = stubs[n].b;
1398 const struct regstat *i_regs = (void *)stubs[n].c;
1399 u_int reglist = stubs[n].e;
1400 const signed char *i_regmap = i_regs->regmap;
1401 int rt;
cf95b4f0 1402 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1403 rt=get_reg(i_regmap,FTEMP);
1404 }else{
cf95b4f0 1405 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1406 }
1407 assert(rs>=0);
1408 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1409 void *restore_jump = NULL, *handler_jump = NULL;
1410 reglist|=(1<<rs);
1411 for (r = 0; r < HOST_CCREG; r++) {
1412 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1413 temp = r;
1414 break;
1415 }
1416 }
cf95b4f0 1417 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1418 reglist&=~(1<<rt);
1419 if(temp==-1) {
1420 save_regs(reglist);
1421 regs_saved=1;
1422 temp=(rs==0)?2:0;
1423 }
1424 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1425 temp2=1;
1426 emit_readdword(&mem_rtab,temp);
1427 emit_shrimm(rs,12,temp2);
1428 emit_readdword_dualindexedx8(temp,temp2,temp2);
1429 emit_adds64(temp2,temp2,temp2);
1430 handler_jump=out;
1431 emit_jc(0);
cf95b4f0 1432 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1433 switch(type) {
1434 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1435 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1436 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1437 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1438 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1439 default: assert(0);
d1e4ebd9 1440 }
1441 }
1442 if(regs_saved) {
1443 restore_jump=out;
1444 emit_jmp(0); // jump to reg restore
1445 }
1446 else
1447 emit_jmp(stubs[n].retaddr); // return address
1448 set_jump_target(handler_jump, out);
1449
1450 if(!regs_saved)
1451 save_regs(reglist);
1452 void *handler=NULL;
1453 if(type==LOADB_STUB||type==LOADBU_STUB)
1454 handler=jump_handler_read8;
1455 if(type==LOADH_STUB||type==LOADHU_STUB)
1456 handler=jump_handler_read16;
1457 if(type==LOADW_STUB)
1458 handler=jump_handler_read32;
1459 assert(handler);
1460 pass_args64(rs,temp2);
1461 int cc=get_reg(i_regmap,CCREG);
1462 if(cc<0)
1463 emit_loadreg(CCREG,2);
2330734f 1464 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1465 emit_far_call(handler);
d1e4ebd9 1466 // (no cycle reload after read)
cf95b4f0 1467 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1468 loadstore_extend(type,0,rt);
1469 }
1470 if(restore_jump)
1471 set_jump_target(restore_jump, out);
1472 restore_regs(reglist);
1473 emit_jmp(stubs[n].retaddr);
be516ebe 1474}
1475
81dbbf4c 1476static void inline_readstub(enum stub_type type, int i, u_int addr,
1477 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1478{
d1e4ebd9 1479 int rs=get_reg(regmap,target);
1480 int rt=get_reg(regmap,target);
9de8a0c3 1481 if(rs<0) rs=get_reg_temp(regmap);
d1e4ebd9 1482 assert(rs>=0);
1483 u_int is_dynamic=0;
1484 uintptr_t host_addr = 0;
1485 void *handler;
1486 int cc=get_reg(regmap,CCREG);
2330734f 1487 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1488 // return;
1489 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1490 if (handler == NULL) {
cf95b4f0 1491 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1492 return;
37387d8b 1493 if (addr != host_addr)
1494 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1495 switch(type) {
1496 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1497 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1498 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1499 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1500 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1501 default: assert(0);
1502 }
1503 return;
1504 }
37387d8b 1505 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1506 if (is_dynamic) {
d1e4ebd9 1507 if(type==LOADB_STUB||type==LOADBU_STUB)
1508 handler=jump_handler_read8;
1509 if(type==LOADH_STUB||type==LOADHU_STUB)
1510 handler=jump_handler_read16;
1511 if(type==LOADW_STUB)
1512 handler=jump_handler_read32;
1513 }
1514
1515 // call a memhandler
cf95b4f0 1516 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1517 reglist&=~(1<<rt);
1518 save_regs(reglist);
1519 if(target==0)
1520 emit_movimm(addr,0);
1521 else if(rs!=0)
1522 emit_mov(rs,0);
1523 if(cc<0)
1524 emit_loadreg(CCREG,2);
2330734f 1525 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1526 if(is_dynamic) {
1527 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
aaece508 1528 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1529 if (-4294967296l <= offset && offset < 4294967296l) {
1530 emit_adrp((void *)l1, 1);
1531 emit_addimm64(1, l1 & 0xfff, 1);
1532 }
1533 else
1534 emit_movimm64(l1, 1);
3968e69e 1535 }
d1e4ebd9 1536 else
2a014d73 1537 emit_far_call(do_memhandler_pre);
d1e4ebd9 1538
2a014d73 1539 emit_far_call(handler);
d1e4ebd9 1540
1541 // (no cycle reload after read)
cf95b4f0 1542 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1543 loadstore_extend(type, 0, rt);
1544 restore_regs(reglist);
be516ebe 1545}
1546
1547static void do_writestub(int n)
1548{
1549 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1550 set_jump_target(stubs[n].addr, out);
1551 enum stub_type type=stubs[n].type;
1552 int i=stubs[n].a;
1553 int rs=stubs[n].b;
1554 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1555 u_int reglist=stubs[n].e;
1556 signed char *i_regmap=i_regs->regmap;
1557 int rt,r;
cf95b4f0 1558 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1559 rt=get_reg(i_regmap,r=FTEMP);
1560 }else{
cf95b4f0 1561 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1562 }
1563 assert(rs>=0);
1564 assert(rt>=0);
1565 int rtmp,temp=-1,temp2,regs_saved=0;
1566 void *restore_jump = NULL, *handler_jump = NULL;
1567 int reglist2=reglist|(1<<rs)|(1<<rt);
1568 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1569 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1570 temp = rtmp;
1571 break;
1572 }
1573 }
1574 if(temp==-1) {
1575 save_regs(reglist);
1576 regs_saved=1;
1577 for(rtmp=0;rtmp<=3;rtmp++)
1578 if(rtmp!=rs&&rtmp!=rt)
1579 {temp=rtmp;break;}
1580 }
1581 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1582 temp2=3;
1583 else {
1584 host_tempreg_acquire();
1585 temp2=HOST_TEMPREG;
1586 }
1587 emit_readdword(&mem_wtab,temp);
1588 emit_shrimm(rs,12,temp2);
1589 emit_readdword_dualindexedx8(temp,temp2,temp2);
1590 emit_adds64(temp2,temp2,temp2);
1591 handler_jump=out;
1592 emit_jc(0);
1593 switch(type) {
1594 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1595 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1596 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1597 default: assert(0);
1598 }
1599 if(regs_saved) {
1600 restore_jump=out;
1601 emit_jmp(0); // jump to reg restore
1602 }
1603 else
1604 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1605 set_jump_target(handler_jump, out);
1606
d1e4ebd9 1607 if(!regs_saved)
1608 save_regs(reglist);
1609 void *handler=NULL;
1610 switch(type) {
1611 case STOREB_STUB: handler=jump_handler_write8; break;
1612 case STOREH_STUB: handler=jump_handler_write16; break;
1613 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1614 default: assert(0);
d1e4ebd9 1615 }
1616 assert(handler);
1617 pass_args(rs,rt);
1618 if(temp2!=3) {
1619 emit_mov64(temp2,3);
1620 host_tempreg_release();
1621 }
1622 int cc=get_reg(i_regmap,CCREG);
1623 if(cc<0)
1624 emit_loadreg(CCREG,2);
2330734f 1625 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1626 // returns new cycle_count
2a014d73 1627 emit_far_call(handler);
2330734f 1628 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1629 if(cc<0)
1630 emit_storereg(CCREG,2);
1631 if(restore_jump)
1632 set_jump_target(restore_jump, out);
1633 restore_regs(reglist);
1634 emit_jmp(stubs[n].retaddr);
be516ebe 1635}
1636
81dbbf4c 1637static void inline_writestub(enum stub_type type, int i, u_int addr,
1638 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1639{
9de8a0c3 1640 int rs = get_reg_temp(regmap);
687b4580 1641 int rt = get_reg(regmap,target);
1642 assert(rs >= 0);
1643 assert(rt >= 0);
1644 uintptr_t host_addr = 0;
1645 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1646 if (handler == NULL) {
37387d8b 1647 if (addr != host_addr)
1648 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1649 switch (type) {
687b4580 1650 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1651 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1652 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1653 default: assert(0);
1654 }
1655 return;
1656 }
1657
1658 // call a memhandler
1659 save_regs(reglist);
687b4580 1660 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1661 loadstore_extend(type, rt, 0);
1662 int cc, cc_use;
1663 cc = cc_use = get_reg(regmap, CCREG);
1664 if (cc < 0)
1665 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1666 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1667
2a014d73 1668 emit_far_call(do_memhandler_pre);
1669 emit_far_call(handler);
1670 emit_far_call(do_memhandler_post);
2330734f 1671 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1672 if (cc < 0)
1673 emit_storereg(CCREG, cc_use);
687b4580 1674 restore_regs(reglist);
be516ebe 1675}
1676
3968e69e 1677/* Special assem */
1678
81dbbf4c 1679static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1680{
1681 save_load_regs_all(1, reglist);
32631e6a 1682 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1683#ifdef PCNT
1684 emit_movimm(op, 0);
2a014d73 1685 emit_far_call(pcnt_gte_start);
3968e69e 1686#endif
1687 // pointer to cop2 regs
1688 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1689}
1690
1691static void c2op_epilogue(u_int op,u_int reglist)
1692{
1693#ifdef PCNT
1694 emit_movimm(op, 0);
2a014d73 1695 emit_far_call(pcnt_gte_end);
3968e69e 1696#endif
1697 save_load_regs_all(0, reglist);
be516ebe 1698}
1699
81dbbf4c 1700static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1701{
3968e69e 1702 u_int c2op=source[i]&0x3f;
1703 u_int hr,reglist_full=0,reglist;
1704 int need_flags,need_ir;
1705 for(hr=0;hr<HOST_REGS;hr++) {
1706 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1707 }
1708 reglist=reglist_full&CALLER_SAVE_REGS;
1709
1710 if (gte_handlers[c2op]!=NULL) {
1711 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1712 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1713 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1714 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1715 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1716 need_flags=0;
1717 //int shift = (source[i] >> 19) & 1;
1718 //int lm = (source[i] >> 10) & 1;
1719 switch(c2op) {
1720 default:
1721 (void)need_ir;
81dbbf4c 1722 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1723 emit_movimm(source[i],1); // opcode
1724 emit_writeword(1,&psxRegs.code);
2a014d73 1725 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1726 break;
1727 }
1728 c2op_epilogue(c2op,reglist);
1729 }
1730}
1731
1732static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1733{
1734 //value = value & 0x7ffff000;
1735 //if (value & 0x7f87e000) value |= 0x80000000;
1736 emit_andimm(sl, 0x7fffe000, temp);
1737 emit_testimm(temp, 0xff87ffff);
1738 emit_andimm(sl, 0x7ffff000, temp);
1739 host_tempreg_acquire();
1740 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1741 emit_cmovne_reg(HOST_TEMPREG, temp);
1742 host_tempreg_release();
1743 assert(0); // testing needed
1744}
1745
1746static void do_mfc2_31_one(u_int copr,signed char temp)
1747{
1748 emit_readshword(&reg_cop2d[copr],temp);
1749 emit_bicsar_imm(temp,31,temp);
1750 emit_cmpimm(temp,0xf80);
1751 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1752 emit_andimm(temp,0xf80,temp);
1753}
1754
1755static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1756{
1757 if (temp < 0) {
1758 host_tempreg_acquire();
1759 temp = HOST_TEMPREG;
1760 }
1761 do_mfc2_31_one(9,temp);
1762 emit_shrimm(temp,7,tl);
1763 do_mfc2_31_one(10,temp);
1764 emit_orrshr_imm(temp,2,tl);
1765 do_mfc2_31_one(11,temp);
1766 emit_orrshl_imm(temp,3,tl);
1767 emit_writeword(tl,&reg_cop2d[29]);
1768
1769 if (temp == HOST_TEMPREG)
1770 host_tempreg_release();
be516ebe 1771}
1772
2330734f 1773static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1774{
3968e69e 1775 // case 0x18: MULT
1776 // case 0x19: MULTU
1777 // case 0x1A: DIV
1778 // case 0x1B: DIVU
cf95b4f0 1779 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1780 {
cf95b4f0 1781 switch(dops[i].opcode2)
3968e69e 1782 {
1783 case 0x18: // MULT
1784 case 0x19: // MULTU
1785 {
cf95b4f0 1786 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1787 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1788 signed char hi=get_reg(i_regs->regmap,HIREG);
1789 signed char lo=get_reg(i_regs->regmap,LOREG);
1790 assert(m1>=0);
1791 assert(m2>=0);
1792 assert(hi>=0);
1793 assert(lo>=0);
1794
cf95b4f0 1795 if(dops[i].opcode2==0x18) // MULT
3968e69e 1796 emit_smull(m1,m2,hi);
1797 else // MULTU
1798 emit_umull(m1,m2,hi);
1799
1800 emit_mov(hi,lo);
1801 emit_shrimm64(hi,32,hi);
1802 break;
1803 }
1804 case 0x1A: // DIV
1805 case 0x1B: // DIVU
1806 {
cf95b4f0 1807 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1808 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1809 signed char quotient=get_reg(i_regs->regmap,LOREG);
1810 signed char remainder=get_reg(i_regs->regmap,HIREG);
1811 assert(numerator>=0);
1812 assert(denominator>=0);
1813 assert(quotient>=0);
1814 assert(remainder>=0);
1815
cf95b4f0 1816 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1817 emit_sdiv(numerator,denominator,quotient);
1818 else // DIVU
1819 emit_udiv(numerator,denominator,quotient);
1820 emit_msub(quotient,denominator,numerator,remainder);
1821
1822 // div 0 quotient (remainder is already correct)
1823 host_tempreg_acquire();
cf95b4f0 1824 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1825 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1826 else
1827 emit_movimm(~0,HOST_TEMPREG);
1828 emit_test(denominator,denominator);
1829 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1830 host_tempreg_release();
1831 break;
1832 }
1833 default:
1834 assert(0);
1835 }
1836 }
1837 else
1838 {
1839 signed char hr=get_reg(i_regs->regmap,HIREG);
1840 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1841 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1842 {
cf95b4f0 1843 if (dops[i].rs1) {
1844 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1845 assert(numerator >= 0);
1846 if (hr >= 0)
1847 emit_mov(numerator,hr);
1848 if (lr >= 0) {
cf95b4f0 1849 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1850 emit_sub_asrimm(0,numerator,31,lr);
1851 else
1852 emit_movimm(~0,lr);
1853 }
1854 }
1855 else {
1856 if (hr >= 0) emit_zeroreg(hr);
1857 if (lr >= 0) emit_movimm(~0,lr);
1858 }
1859 }
1860 else
1861 {
1862 // Multiply by zero is zero.
1863 if (hr >= 0) emit_zeroreg(hr);
1864 if (lr >= 0) emit_zeroreg(lr);
1865 }
1866 }
be516ebe 1867}
1868#define multdiv_assemble multdiv_assemble_arm64
1869
d1e4ebd9 1870static void do_jump_vaddr(u_int rs)
1871{
1872 if (rs != 0)
1873 emit_mov(rs, 0);
104df9d3 1874 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1875 emit_jmpreg(0);
1876}
1877
be516ebe 1878static void do_preload_rhash(u_int r) {
1879 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1880 // register. On ARM the hash can be done with a single instruction (below)
1881}
1882
1883static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1884 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1885}
1886
1887static void do_rhash(u_int rs,u_int rh) {
1888 emit_andimm(rs, 0xf8, rh);
1889}
1890
d1e4ebd9 1891static void do_miniht_load(int ht, u_int rh) {
1892 emit_add64(ht, rh, ht);
1893 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1894}
1895
d1e4ebd9 1896static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1897 emit_cmp(rh, rs);
1898 void *jaddr = out;
1899 emit_jeq(0);
1900 do_jump_vaddr(rs);
1901
1902 set_jump_target(jaddr, out);
1903 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1904 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1905 emit_jmpreg(ht);
be516ebe 1906}
1907
d1e4ebd9 1908// parsed by set_jump_target?
be516ebe 1909static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1910 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1911 emit_movk(return_address&0xffff,rt);
1912 add_to_linker(out,return_address,1);
1913 emit_adr(out,temp);
1914 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1915 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1916}
1917
d9e2b173 1918static unused void clear_cache_arm64(char *start, char *end)
be516ebe 1919{
919981d0 1920 // Don't rely on GCC's __clear_cache implementation, as it caches
1921 // icache/dcache cache line sizes, that can vary between cores on
1922 // big.LITTLE architectures.
1923 uint64_t addr, ctr_el0;
1924 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1925 size_t isize, dsize;
1926
1927 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1928 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1929 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1930
1931 // use the global minimum cache line size
1932 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1933 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1934
1935 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1936 not required for instruction to data coherence. */
1937 if ((ctr_el0 & (1 << 28)) == 0x0) {
1938 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1939 for (; addr < (uint64_t)end; addr += dsize)
1940 // use "civac" instead of "cvau", as this is the suggested workaround for
1941 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1942 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 1943 }
919981d0 1944 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1945
919981d0 1946 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1947 Unification is not required for instruction to data coherence. */
1948 if ((ctr_el0 & (1 << 29)) == 0x0) {
1949 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1950 for (; addr < (uint64_t)end; addr += isize)
1951 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1952
1953 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1954 }
919981d0 1955
1956 __asm__ volatile("isb" : : : "memory");
be516ebe 1957}
1958
1959// CPU-architecture-specific initialization
2a014d73 1960static void arch_init(void)
1961{
1962 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
d9e2b173 1963 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2a014d73 1964 size_t i;
1965 assert(!(diff & 3));
d9e2b173 1966 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2a014d73 1967 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
d9e2b173 1968 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1969 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2a014d73 1970 }
1971 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 1972}
1973
1974// vim:shiftwidth=2:expandtab