drc: more precise invalidation checking for the invstub case
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define unused __attribute__((unused))
27
d1e4ebd9 28void do_memhandler_pre();
29void do_memhandler_post();
be516ebe 30
31/* Linker */
d1e4ebd9 32static void set_jump_target(void *addr, void *target)
be516ebe 33{
d1e4ebd9 34 u_int *ptr = addr;
35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
3968e69e 37 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
40 }
3968e69e 41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 45 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 46 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 47 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
49 }
3968e69e 50 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
54 }
55 else
3968e69e 56 abort(); // should not happen
be516ebe 57}
58
59// from a pointer to external jump stub (which was produced by emit_extjump2)
60// find where the jumping insn is
61static void *find_extjump_insn(void *stub)
62{
d1e4ebd9 63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
be516ebe 67}
68
104df9d3 69#if 0
be516ebe 70// find where external branch is liked to using addr of it's stub:
3968e69e 71// get address that the stub loads (dyna_linker arg1),
be516ebe 72// treat it as a pointer to branch insn,
73// return addr where that branch jumps to
74static void *get_pointer(void *stub)
75{
d1e4ebd9 76 int *i_ptr = find_extjump_insn(stub);
3968e69e 77 if ((*i_ptr&0xfc000000) == 0x14000000) // b
78 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
79 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
80 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
81 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 82 assert(0);
83 return NULL;
84}
104df9d3 85#endif
be516ebe 86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
3968e69e 149static u_int rn_rd(u_int rn, u_int rd)
150{
151 assert(rn < 31);
152 assert(rd < 31);
153 return (rn << 5) | rd;
154}
155
be516ebe 156static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
157{
d1e4ebd9 158 assert(rm < 32);
159 assert(rn < 32);
160 assert(rd < 32);
be516ebe 161 return (rm << 16) | (rn << 5) | rd;
162}
163
3968e69e 164static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
165{
166 assert(ra < 32);
167 return rm_rn_rd(rm, rn, rd) | (ra << 10);
168}
169
d1e4ebd9 170static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
171{
172 assert(imm7 < 0x80);
173 assert(rt2 < 31);
174 assert(rn < 32);
175 assert(rt < 31);
176 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
177}
178
687b4580 179static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
180{
181 assert(imm6 <= 63);
182 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
183}
184
be516ebe 185static u_int imm16_rd(u_int imm16, u_int rd)
186{
187 assert(imm16 < 0x10000);
188 assert(rd < 31);
189 return (imm16 << 5) | rd;
190}
191
687b4580 192static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
193{
194 assert(imm12 < 0x1000);
d1e4ebd9 195 assert(rn < 32);
196 assert(rd < 32);
197 return (imm12 << 10) | (rn << 5) | rd;
198}
199
200static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
201{
202 assert(imm9 < 0x200);
687b4580 203 assert(rn < 31);
204 assert(rd < 31);
d1e4ebd9 205 return (imm9 << 12) | (rn << 5) | rd;
687b4580 206}
207
d1e4ebd9 208static u_int imm19_rt(u_int imm19, u_int rt)
209{
210 assert(imm19 < 0x80000);
211 assert(rt < 31);
212 return (imm19 << 5) | rt;
213}
214
215static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
216{
217 assert(n < 2);
218 assert(immr < 0x40);
219 assert(imms < 0x40);
220 assert(rn < 32);
221 assert(rd < 32);
222 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
223}
224
225static u_int genjmp(const u_char *addr)
be516ebe 226{
227 intptr_t offset = addr - out;
d1e4ebd9 228 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 229 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 230 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
231 abort();
be516ebe 232 return 0;
233 }
d1e4ebd9 234 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 235}
236
d1e4ebd9 237static u_int genjmpcc(const u_char *addr)
be516ebe 238{
239 intptr_t offset = addr - out;
d1e4ebd9 240 if ((uintptr_t)addr < 3) return 0;
be516ebe 241 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 242 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
243 abort();
244 return 0;
245 }
246 return ((u_int)offset >> 2) & 0x7ffff;
247}
248
249static uint32_t is_mask(u_int value)
250{
251 return value && ((value + 1) & value) == 0;
252}
253
254// This function returns true if the argument contains a
255// non-empty sequence of ones (possibly rotated) with the remainder zero.
256static uint32_t is_rotated_mask(u_int value)
257{
3968e69e 258 if (value == 0 || value == ~0)
be516ebe 259 return 0;
d1e4ebd9 260 if (is_mask((value - 1) | value))
261 return 1;
262 return is_mask((~value - 1) | ~value);
263}
264
265static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
266{
267 int lzeros, tzeros, ones;
268 assert(value != 0);
269 if (is_mask((value - 1) | value)) {
270 lzeros = __builtin_clz(value);
271 tzeros = __builtin_ctz(value);
272 ones = 32 - lzeros - tzeros;
273 *immr = (32 - tzeros) & 31;
274 *imms = ones - 1;
275 return;
be516ebe 276 }
d1e4ebd9 277 value = ~value;
278 if (is_mask((value - 1) | value)) {
279 lzeros = __builtin_clz(value);
280 tzeros = __builtin_ctz(value);
281 ones = 32 - lzeros - tzeros;
3968e69e 282 *immr = lzeros;
d1e4ebd9 283 *imms = 31 - ones;
284 return;
285 }
3968e69e 286 abort();
be516ebe 287}
288
289static void emit_mov(u_int rs, u_int rt)
290{
687b4580 291 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 292 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
293}
294
295static void emit_mov64(u_int rs, u_int rt)
296{
297 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
298 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 299}
300
687b4580 301static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 302{
d1e4ebd9 303 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
304 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 305}
306
d1e4ebd9 307static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 308{
d1e4ebd9 309 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
310 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 311}
312
d1e4ebd9 313static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 314{
3968e69e 315 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 316 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
317}
39b71d9a 318#define emit_adds_ptr emit_adds64
d1e4ebd9 319
320static void emit_neg(u_int rs, u_int rt)
321{
322 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
323 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 324}
325
687b4580 326static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 327{
d1e4ebd9 328 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 329 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 330}
331
3968e69e 332static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
333{
334 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
335 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
336}
337
d1e4ebd9 338static void emit_movz(u_int imm, u_int rt)
be516ebe 339{
d1e4ebd9 340 assem_debug("movz %s,#%#x\n", regname[rt], imm);
341 output_w32(0x52800000 | imm16_rd(imm, rt));
342}
343
344static void emit_movz_lsl16(u_int imm, u_int rt)
345{
346 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
347 output_w32(0x52a00000 | imm16_rd(imm, rt));
348}
349
350static void emit_movn(u_int imm, u_int rt)
351{
352 assem_debug("movn %s,#%#x\n", regname[rt], imm);
353 output_w32(0x12800000 | imm16_rd(imm, rt));
354}
355
356static void emit_movn_lsl16(u_int imm,u_int rt)
357{
358 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
359 output_w32(0x12a00000 | imm16_rd(imm, rt));
360}
361
362static void emit_movk(u_int imm,u_int rt)
363{
364 assem_debug("movk %s,#%#x\n", regname[rt], imm);
365 output_w32(0x72800000 | imm16_rd(imm, rt));
366}
367
368static void emit_movk_lsl16(u_int imm,u_int rt)
369{
370 assert(imm<65536);
3968e69e 371 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 372 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 373}
374
375static void emit_zeroreg(u_int rt)
376{
d1e4ebd9 377 emit_movz(0, rt);
be516ebe 378}
379
be516ebe 380static void emit_movimm(u_int imm, u_int rt)
381{
d1e4ebd9 382 if (imm < 65536)
383 emit_movz(imm, rt);
384 else if ((~imm) < 65536)
385 emit_movn(~imm, rt);
386 else if ((imm&0xffff) == 0)
387 emit_movz_lsl16(imm >> 16, rt);
388 else if (((~imm)&0xffff) == 0)
389 emit_movn_lsl16(~imm >> 16, rt);
390 else if (is_rotated_mask(imm)) {
391 u_int immr, imms;
392 gen_logical_imm(imm, &immr, &imms);
393 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
394 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
395 }
be516ebe 396 else {
d1e4ebd9 397 emit_movz(imm & 0xffff, rt);
398 emit_movk_lsl16(imm >> 16, rt);
be516ebe 399 }
400}
401
687b4580 402static void emit_readword(void *addr, u_int rt)
403{
404 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
405 if (!(offset & 3) && offset <= 16380) {
406 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
407 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
408 }
409 else
3968e69e 410 abort();
687b4580 411}
412
d1e4ebd9 413static void emit_readdword(void *addr, u_int rt)
414{
415 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
416 if (!(offset & 7) && offset <= 32760) {
417 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
418 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
419 }
3968e69e 420 else
421 abort();
422}
39b71d9a 423#define emit_readptr emit_readdword
3968e69e 424
425static void emit_readshword(void *addr, u_int rt)
426{
427 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
428 if (!(offset & 1) && offset <= 8190) {
429 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
430 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
431 }
d1e4ebd9 432 else
433 assert(0);
434}
435
be516ebe 436static void emit_loadreg(u_int r, u_int hr)
437{
d1e4ebd9 438 int is64 = 0;
be516ebe 439 if (r == 0)
440 emit_zeroreg(hr);
441 else {
33788798 442 void *addr;
be516ebe 443 switch (r) {
7c3a5182 444 //case HIREG: addr = &hi; break;
445 //case LOREG: addr = &lo; break;
be516ebe 446 case CCREG: addr = &cycle_count; break;
447 case CSREG: addr = &Status; break;
d1e4ebd9 448 case INVCP: addr = &invc_ptr; is64 = 1; break;
37387d8b 449 case ROREG: addr = &ram_offset; is64 = 1; break;
33788798 450 default:
451 assert(r < 34);
452 addr = &psxRegs.GPR.r[r];
453 break;
be516ebe 454 }
d1e4ebd9 455 if (is64)
456 emit_readdword(addr, hr);
457 else
458 emit_readword(addr, hr);
be516ebe 459 }
460}
461
687b4580 462static void emit_writeword(u_int rt, void *addr)
463{
464 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
465 if (!(offset & 3) && offset <= 16380) {
466 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
467 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
468 }
469 else
470 assert(0);
471}
472
d1e4ebd9 473static void emit_writedword(u_int rt, void *addr)
474{
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 7) && offset <= 32760) {
477 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 478 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 479 }
480 else
3968e69e 481 abort();
d1e4ebd9 482}
483
687b4580 484static void emit_storereg(u_int r, u_int hr)
be516ebe 485{
486 assert(r < 64);
7c3a5182 487 void *addr = &psxRegs.GPR.r[r];
be516ebe 488 switch (r) {
7c3a5182 489 //case HIREG: addr = &hi; break;
490 //case LOREG: addr = &lo; break;
be516ebe 491 case CCREG: addr = &cycle_count; break;
7c3a5182 492 default: assert(r < 34); break;
be516ebe 493 }
687b4580 494 emit_writeword(hr, addr);
be516ebe 495}
496
497static void emit_test(u_int rs, u_int rt)
498{
d1e4ebd9 499 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
500 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 501}
502
d1e4ebd9 503static void emit_testimm(u_int rs, u_int imm)
be516ebe 504{
d1e4ebd9 505 u_int immr, imms;
687b4580 506 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 507 assert(is_rotated_mask(imm)); // good enough for PCSX
508 gen_logical_imm(imm, &immr, &imms);
3968e69e 509 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 510}
511
512static void emit_not(u_int rs,u_int rt)
513{
514 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 515 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 516}
517
be516ebe 518static void emit_and(u_int rs1,u_int rs2,u_int rt)
519{
520 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 521 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 522}
523
524static void emit_or(u_int rs1,u_int rs2,u_int rt)
525{
526 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 527 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 528}
529
3968e69e 530static void emit_bic(u_int rs1,u_int rs2,u_int rt)
531{
532 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
533 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
534}
535
be516ebe 536static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
537{
be516ebe 538 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 539 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 540}
541
542static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
543{
be516ebe 544 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 545 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 546}
547
3968e69e 548static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
549{
550 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
551 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
552}
553
be516ebe 554static void emit_xor(u_int rs1,u_int rs2,u_int rt)
555{
556 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 557 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 558}
559
3968e69e 560static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
561{
562 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
563 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
564}
565
d1e4ebd9 566static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 567{
d1e4ebd9 568 unused const char *st = s ? "s" : "";
569 s = s ? 0x20000000 : 0;
570 is64 = is64 ? 0x80000000 : 0;
687b4580 571 if (imm < 4096) {
d1e4ebd9 572 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
573 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 574 }
575 else if (-imm < 4096) {
3968e69e 576 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 577 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
578 }
579 else if (imm < 16777216) {
580 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
581 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
582 if ((imm & 0xfff) || s) {
583 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 584 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 585 }
586 }
587 else if (-imm < 16777216) {
588 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
589 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
590 if ((imm & 0xfff) || s) {
591 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
592 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
593 }
687b4580 594 }
595 else
3968e69e 596 abort();
be516ebe 597}
598
d1e4ebd9 599static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
600{
601 emit_addimm_s(0, 0, rs, imm, rt);
602}
603
604static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
605{
606 emit_addimm_s(0, 1, rs, imm, rt);
607}
608
be516ebe 609static void emit_addimm_and_set_flags(int imm, u_int rt)
610{
d1e4ebd9 611 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 612}
613
d1e4ebd9 614static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 615{
d1e4ebd9 616 const char *names[] = { "and", "orr", "eor", "ands" };
617 const char *name = names[op];
618 u_int immr, imms;
619 op = op << 29;
620 if (is_rotated_mask(imm)) {
621 gen_logical_imm(imm, &immr, &imms);
622 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
623 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
624 }
625 else {
626 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
627 host_tempreg_acquire();
628 emit_movimm(imm, HOST_TEMPREG);
629 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
630 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
631 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
632 host_tempreg_release();
633 }
634 (void)name;
be516ebe 635}
636
d1e4ebd9 637static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 638{
d1e4ebd9 639 if (imm == 0)
640 emit_zeroreg(rt);
641 else
642 emit_logicop_imm(0, rs, imm, rt);
be516ebe 643}
644
d1e4ebd9 645static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 646{
d1e4ebd9 647 if (imm == 0) {
648 if (rs != rt)
649 emit_mov(rs, rt);
650 }
651 else
652 emit_logicop_imm(1, rs, imm, rt);
be516ebe 653}
654
d1e4ebd9 655static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 656{
d1e4ebd9 657 if (imm == 0) {
658 if (rs != rt)
659 emit_mov(rs, rt);
660 }
661 else
662 emit_logicop_imm(2, rs, imm, rt);
be516ebe 663}
664
d1e4ebd9 665static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 666{
d1e4ebd9 667 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
668 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 669}
670
d1e4ebd9 671static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 672{
d1e4ebd9 673 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
674 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 675}
676
677static void emit_shlimm(u_int rs,u_int imm,u_int rt)
678{
be516ebe 679 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 680 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 681}
682
3968e69e 683static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 684{
3968e69e 685 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
686 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 687}
688
3968e69e 689static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 690{
be516ebe 691 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 692 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 693}
694
695static void emit_sarimm(u_int rs,u_int imm,u_int rt)
696{
be516ebe 697 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 698 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 699}
700
701static void emit_rorimm(u_int rs,u_int imm,u_int rt)
702{
3968e69e 703 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 704 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 705}
706
707static void emit_signextend16(u_int rs, u_int rt)
708{
709 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 710 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 711}
712
d1e4ebd9 713static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 714{
3968e69e 715 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 716 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 717}
718
d1e4ebd9 719static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 720{
d1e4ebd9 721 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
722 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 723}
724
d1e4ebd9 725static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 726{
d1e4ebd9 727 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
728 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 729}
730
d1e4ebd9 731static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 732{
d1e4ebd9 733 if (imm < 4096) {
734 assem_debug("cmp %s,%#x\n", regname[rs], imm);
735 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
736 }
737 else if (-imm < 4096) {
738 assem_debug("cmn %s,%#x\n", regname[rs], imm);
739 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
740 }
741 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 742 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 743 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
744 }
745 else {
746 host_tempreg_acquire();
747 emit_movimm(imm, HOST_TEMPREG);
748 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
749 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
750 host_tempreg_release();
751 }
be516ebe 752}
753
d1e4ebd9 754static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 755{
d1e4ebd9 756 assert(imm == 0 || imm == 1);
757 assert(cond0 < 0x10);
758 assert(cond1 < 0x10);
759 if (imm) {
760 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
761 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
762 } else {
763 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
764 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
765 }
be516ebe 766}
767
d1e4ebd9 768static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 769{
d1e4ebd9 770 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 771}
772
d1e4ebd9 773static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 774{
d1e4ebd9 775 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 776}
777
778static void emit_cmovb_imm(int imm,u_int rt)
779{
d1e4ebd9 780 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 781}
782
3968e69e 783static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 784{
3968e69e 785 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
786 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 787}
788
789static void emit_cmovne_reg(u_int rs,u_int rt)
790{
d1e4ebd9 791 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
792 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 793}
794
795static void emit_cmovl_reg(u_int rs,u_int rt)
796{
d1e4ebd9 797 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
798 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 799}
800
e3c6bdb5 801static void emit_cmovb_reg(u_int rs,u_int rt)
802{
803 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
804 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
805}
806
be516ebe 807static void emit_cmovs_reg(u_int rs,u_int rt)
808{
d1e4ebd9 809 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
810 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 811}
812
3968e69e 813static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
814{
815 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
816 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
817}
818
be516ebe 819static void emit_slti32(u_int rs,int imm,u_int rt)
820{
821 if(rs!=rt) emit_zeroreg(rt);
822 emit_cmpimm(rs,imm);
823 if(rs==rt) emit_movimm(0,rt);
824 emit_cmovl_imm(1,rt);
825}
826
827static void emit_sltiu32(u_int rs,int imm,u_int rt)
828{
829 if(rs!=rt) emit_zeroreg(rt);
830 emit_cmpimm(rs,imm);
831 if(rs==rt) emit_movimm(0,rt);
832 emit_cmovb_imm(1,rt);
833}
834
835static void emit_cmp(u_int rs,u_int rt)
836{
837 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 838 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 839}
840
882a08fc 841static void emit_cmpcs(u_int rs,u_int rt)
842{
843 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
844 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
845}
846
be516ebe 847static void emit_set_gz32(u_int rs, u_int rt)
848{
849 //assem_debug("set_gz32\n");
850 emit_cmpimm(rs,1);
851 emit_movimm(1,rt);
852 emit_cmovl_imm(0,rt);
853}
854
855static void emit_set_nz32(u_int rs, u_int rt)
856{
857 //assem_debug("set_nz32\n");
d1e4ebd9 858 if(rs!=rt) emit_mov(rs,rt);
859 emit_test(rs,rs);
860 emit_cmovne_imm(1,rt);
be516ebe 861}
862
863static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
864{
865 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
866 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
867 emit_cmp(rs1,rs2);
868 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
869 emit_cmovl_imm(1,rt);
870}
871
872static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
873{
874 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
875 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
876 emit_cmp(rs1,rs2);
877 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
878 emit_cmovb_imm(1,rt);
879}
880
2a014d73 881static int can_jump_or_call(const void *a)
882{
883 intptr_t diff = (u_char *)a - out;
884 return (-134217728 <= diff && diff <= 134217727);
885}
886
d1e4ebd9 887static void emit_call(const void *a)
be516ebe 888{
d1e4ebd9 889 intptr_t diff = (u_char *)a - out;
890 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 891 assert(!(diff & 3));
892 if (-134217728 <= diff && diff <= 134217727)
893 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
894 else
3968e69e 895 abort();
be516ebe 896}
897
d1e4ebd9 898static void emit_jmp(const void *a)
be516ebe 899{
d1e4ebd9 900 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
901 u_int offset = genjmp(a);
902 output_w32(0x14000000 | offset);
be516ebe 903}
904
d1e4ebd9 905static void emit_jne(const void *a)
be516ebe 906{
d1e4ebd9 907 assem_debug("bne %p\n", a);
908 u_int offset = genjmpcc(a);
909 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 910}
911
7c3a5182 912static void emit_jeq(const void *a)
be516ebe 913{
d1e4ebd9 914 assem_debug("beq %p\n", a);
915 u_int offset = genjmpcc(a);
916 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 917}
918
7c3a5182 919static void emit_js(const void *a)
be516ebe 920{
d1e4ebd9 921 assem_debug("bmi %p\n", a);
922 u_int offset = genjmpcc(a);
923 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 924}
925
7c3a5182 926static void emit_jns(const void *a)
be516ebe 927{
d1e4ebd9 928 assem_debug("bpl %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 931}
932
7c3a5182 933static void emit_jl(const void *a)
be516ebe 934{
d1e4ebd9 935 assem_debug("blt %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 938}
939
7c3a5182 940static void emit_jge(const void *a)
be516ebe 941{
d1e4ebd9 942 assem_debug("bge %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 945}
946
7c3a5182 947static void emit_jno(const void *a)
be516ebe 948{
d1e4ebd9 949 assem_debug("bvc %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 952}
953
7c3a5182 954static void emit_jc(const void *a)
be516ebe 955{
d1e4ebd9 956 assem_debug("bcs %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 959}
960
3968e69e 961static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 962{
3968e69e 963 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 964 u_int offset = genjmpcc(a);
3968e69e 965 is64 = is64 ? 0x80000000 : 0;
966 isnz = isnz ? 0x01000000 : 0;
967 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
968}
969
104df9d3 970static unused void emit_cbz(const void *a, u_int r)
3968e69e 971{
972 emit_cb(0, 0, a, r);
be516ebe 973}
974
975static void emit_jmpreg(u_int r)
976{
3968e69e 977 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 978 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 979}
980
981static void emit_retreg(u_int r)
982{
d1e4ebd9 983 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 984 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
985}
986
987static void emit_ret(void)
988{
989 emit_retreg(LR);
990}
991
d1e4ebd9 992static void emit_adr(void *addr, u_int rt)
993{
994 intptr_t offset = (u_char *)addr - out;
995 assert(-1048576 <= offset && offset < 1048576);
3968e69e 996 assert(rt < 31);
d1e4ebd9 997 assem_debug("adr x%d,#%#lx\n", rt, offset);
998 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
999}
1000
3968e69e 1001static void emit_adrp(void *addr, u_int rt)
1002{
1003 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1004 assert(-4294967296l <= offset && offset < 4294967296l);
1005 assert(rt < 31);
1006 offset >>= 12;
1007 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1008 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1009}
1010
be516ebe 1011static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1012{
d1e4ebd9 1013 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1014 assert(-256 <= offset && offset < 256);
1015 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1016}
1017
1018static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1019{
1020 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1021 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1022}
1023
1024static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1025{
1026 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1027 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1028}
1029
1030static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1031{
1032 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1033 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1034}
1035
1036static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1037{
1038 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1039 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1040}
39b71d9a 1041#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1042
1043static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1044{
1045 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1047}
1048
1049static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1050{
1051 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1052 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1053}
1054
1055static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1056{
1057 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1058 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1059}
1060
1061static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1062{
1063 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1064 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1065}
1066
1067static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1068{
1069 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1070 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1071}
1072
be516ebe 1073static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1074{
d1e4ebd9 1075 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1076 assert(-256 <= offset && offset < 256);
1077 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1078}
1079
1080static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1081{
d1e4ebd9 1082 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1083 assert(-256 <= offset && offset < 256);
1084 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1085}
1086
1087static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1088{
d1e4ebd9 1089 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1090 assert(-256 <= offset && offset < 256);
1091 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1092}
1093
1094static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1095{
d1e4ebd9 1096 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1097 assert(-256 <= offset && offset < 256);
1098 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1099}
1100
be516ebe 1101static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1102{
3968e69e 1103 if (!(offset & 3) && (u_int)offset <= 16380) {
1104 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1105 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1106 }
1107 else if (-256 <= offset && offset < 256) {
1108 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1109 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1110 }
687b4580 1111 else
1112 assert(0);
be516ebe 1113}
1114
1115static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1116{
3968e69e 1117 if (!(offset & 1) && (u_int)offset <= 8190) {
1118 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1119 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1120 }
1121 else if (-256 <= offset && offset < 256) {
1122 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1123 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1124 }
687b4580 1125 else
1126 assert(0);
be516ebe 1127}
1128
1129static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1130{
3968e69e 1131 if ((u_int)offset < 4096) {
1132 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1133 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1134 }
1135 else if (-256 <= offset && offset < 256) {
1136 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1137 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1138 }
687b4580 1139 else
1140 assert(0);
be516ebe 1141}
1142
3968e69e 1143static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1144{
3968e69e 1145 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1146 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1147}
1148
3968e69e 1149static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1150{
3968e69e 1151 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1152 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1153}
1154
1155static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1156{
1157 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1158 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1159}
1160
1161static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1162{
1163 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1164 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1165}
1166
3968e69e 1167static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1168{
1169 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1170 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1171}
1172
1173static void emit_clz(u_int rs, u_int rt)
be516ebe 1174{
1175 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1176 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1177}
1178
be516ebe 1179// special case for checking invalid_code
d1e4ebd9 1180static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1181{
d1e4ebd9 1182 host_tempreg_acquire();
1183 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1184 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1185 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1186 emit_cmpimm(HOST_TEMPREG, imm);
1187 host_tempreg_release();
be516ebe 1188}
1189
3968e69e 1190// special for loadlr_assemble, rs2 is destroyed
1191static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1192{
3968e69e 1193 emit_shl(rs2, shift, rs2);
1194 emit_bic(rs1, rs2, rt);
be516ebe 1195}
1196
3968e69e 1197static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1198{
3968e69e 1199 emit_shr(rs2, shift, rs2);
1200 emit_bic(rs1, rs2, rt);
be516ebe 1201}
1202
687b4580 1203static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1204{
687b4580 1205 u_int op = 0xb9000000;
d1e4ebd9 1206 unused const char *ldst = is_st ? "st" : "ld";
1207 unused char rp = is64 ? 'x' : 'w';
687b4580 1208 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1209 is64 = is64 ? 1 : 0;
1210 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1211 ofs = (ofs >> (2+is64));
687b4580 1212 if (!is_st) op |= 0x00400000;
1213 if (is64) op |= 0x40000000;
d1e4ebd9 1214 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1215}
1216
687b4580 1217static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1218{
687b4580 1219 u_int op = 0x29000000;
d1e4ebd9 1220 unused const char *ldst = is_st ? "st" : "ld";
1221 unused char rp = is64 ? 'x' : 'w';
687b4580 1222 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1223 is64 = is64 ? 1 : 0;
1224 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1225 ofs = (ofs >> (2+is64));
1226 assert(-64 <= ofs && ofs <= 63);
1227 ofs &= 0x7f;
1228 if (!is_st) op |= 0x00400000;
1229 if (is64) op |= 0x80000000;
d1e4ebd9 1230 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1231}
1232
1233static void save_load_regs_all(int is_store, u_int reglist)
1234{
1235 int ofs = 0, c = 0;
1236 u_int r, pair[2];
1237 for (r = 0; reglist; r++, reglist >>= 1) {
1238 if (reglist & 1)
1239 pair[c++] = r;
1240 if (c == 2) {
1241 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1242 ofs += 8 * 2;
1243 c = 0;
1244 }
1245 }
1246 if (c) {
1247 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1248 ofs += 8;
1249 }
1250 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1251}
1252
1253// Save registers before function call
1254static void save_regs(u_int reglist)
1255{
1256 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1257 save_load_regs_all(1, reglist);
be516ebe 1258}
1259
1260// Restore registers after function call
1261static void restore_regs(u_int reglist)
1262{
1263 reglist &= CALLER_SAVE_REGS;
687b4580 1264 save_load_regs_all(0, reglist);
be516ebe 1265}
1266
1267/* Stubs/epilogue */
1268
1269static void literal_pool(int n)
1270{
1271 (void)literals;
1272}
1273
1274static void literal_pool_jumpover(int n)
1275{
1276}
1277
d1e4ebd9 1278// parsed by get_pointer, find_extjump_insn
104df9d3 1279static void emit_extjump(u_char *addr, u_int target)
be516ebe 1280{
d1e4ebd9 1281 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1282
d1e4ebd9 1283 emit_movz(target & 0xffff, 0);
1284 emit_movk_lsl16(target >> 16, 0);
1285
1286 // addr is in the current recompiled block (max 256k)
1287 // offset shouldn't exceed +/-1MB
1288 emit_adr(addr, 1);
104df9d3 1289 emit_far_jump(dyna_linker);
be516ebe 1290}
1291
d1e4ebd9 1292static void check_extjump2(void *src)
be516ebe 1293{
d1e4ebd9 1294 u_int *ptr = src;
1295 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1296 (void)ptr;
be516ebe 1297}
1298
1299// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1300static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1301{
d1e4ebd9 1302 int diff = rt_val - rs_val;
3968e69e 1303 if ((-4096 < diff && diff < 4096)
1304 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1305 emit_addimm(rs, diff, rt);
3968e69e 1306 else if (rt_val == ~rs_val)
1307 emit_not(rs, rt);
d1e4ebd9 1308 else if (is_rotated_mask(rs_val ^ rt_val))
1309 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1310 else
d1e4ebd9 1311 emit_movimm(rt_val, rt);
be516ebe 1312}
1313
d1e4ebd9 1314// return 1 if the above function can do it's job cheaply
687b4580 1315static int is_similar_value(u_int v1, u_int v2)
be516ebe 1316{
687b4580 1317 int diff = v1 - v2;
3968e69e 1318 return (-4096 < diff && diff < 4096)
1319 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1320 || v1 == ~v2
d1e4ebd9 1321 || is_rotated_mask(v1 ^ v2);
1322}
1323
37387d8b 1324static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1325{
1326 if (rt_val < 0x100000000ull) {
1327 emit_movimm_from(rs_val, rs, rt_val, rt);
1328 return;
1329 }
1330 // just move the whole thing. At least on Linux all addresses
1331 // seem to be 48bit, so 3 insns - not great not terrible
1332 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1333 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1334 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1335 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1336 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1337 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1338 if (rt_val >> 48) {
1339 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1340 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1341 }
1342}
1343
1344// trashes x2
d1e4ebd9 1345static void pass_args64(u_int a0, u_int a1)
1346{
1347 if(a0==1&&a1==0) {
1348 // must swap
1349 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1350 }
1351 else if(a0!=0&&a1==0) {
1352 emit_mov64(a1,1);
1353 if (a0>=0) emit_mov64(a0,0);
1354 }
1355 else {
1356 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1357 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1358 }
be516ebe 1359}
1360
d1e4ebd9 1361static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1362{
1363 switch(type) {
1364 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1365 case LOADBU_STUB:
1366 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1367 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1368 case LOADHU_STUB:
1369 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1370 case LOADW_STUB:
1371 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1372 default: assert(0);
d1e4ebd9 1373 }
1374}
1375
1376#include "pcsxmem.h"
be516ebe 1377//#include "pcsxmem_inline.c"
1378
1379static void do_readstub(int n)
1380{
1381 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1382 set_jump_target(stubs[n].addr, out);
1383 enum stub_type type = stubs[n].type;
1384 int i = stubs[n].a;
1385 int rs = stubs[n].b;
1386 const struct regstat *i_regs = (void *)stubs[n].c;
1387 u_int reglist = stubs[n].e;
1388 const signed char *i_regmap = i_regs->regmap;
1389 int rt;
cf95b4f0 1390 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1391 rt=get_reg(i_regmap,FTEMP);
1392 }else{
cf95b4f0 1393 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1394 }
1395 assert(rs>=0);
1396 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1397 void *restore_jump = NULL, *handler_jump = NULL;
1398 reglist|=(1<<rs);
1399 for (r = 0; r < HOST_CCREG; r++) {
1400 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1401 temp = r;
1402 break;
1403 }
1404 }
cf95b4f0 1405 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1406 reglist&=~(1<<rt);
1407 if(temp==-1) {
1408 save_regs(reglist);
1409 regs_saved=1;
1410 temp=(rs==0)?2:0;
1411 }
1412 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1413 temp2=1;
1414 emit_readdword(&mem_rtab,temp);
1415 emit_shrimm(rs,12,temp2);
1416 emit_readdword_dualindexedx8(temp,temp2,temp2);
1417 emit_adds64(temp2,temp2,temp2);
1418 handler_jump=out;
1419 emit_jc(0);
cf95b4f0 1420 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1421 switch(type) {
1422 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1423 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1424 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1425 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1426 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1427 default: assert(0);
d1e4ebd9 1428 }
1429 }
1430 if(regs_saved) {
1431 restore_jump=out;
1432 emit_jmp(0); // jump to reg restore
1433 }
1434 else
1435 emit_jmp(stubs[n].retaddr); // return address
1436 set_jump_target(handler_jump, out);
1437
1438 if(!regs_saved)
1439 save_regs(reglist);
1440 void *handler=NULL;
1441 if(type==LOADB_STUB||type==LOADBU_STUB)
1442 handler=jump_handler_read8;
1443 if(type==LOADH_STUB||type==LOADHU_STUB)
1444 handler=jump_handler_read16;
1445 if(type==LOADW_STUB)
1446 handler=jump_handler_read32;
1447 assert(handler);
1448 pass_args64(rs,temp2);
1449 int cc=get_reg(i_regmap,CCREG);
1450 if(cc<0)
1451 emit_loadreg(CCREG,2);
2330734f 1452 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1453 emit_far_call(handler);
d1e4ebd9 1454 // (no cycle reload after read)
cf95b4f0 1455 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1456 loadstore_extend(type,0,rt);
1457 }
1458 if(restore_jump)
1459 set_jump_target(restore_jump, out);
1460 restore_regs(reglist);
1461 emit_jmp(stubs[n].retaddr);
be516ebe 1462}
1463
81dbbf4c 1464static void inline_readstub(enum stub_type type, int i, u_int addr,
1465 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1466{
d1e4ebd9 1467 int rs=get_reg(regmap,target);
1468 int rt=get_reg(regmap,target);
9de8a0c3 1469 if(rs<0) rs=get_reg_temp(regmap);
d1e4ebd9 1470 assert(rs>=0);
1471 u_int is_dynamic=0;
1472 uintptr_t host_addr = 0;
1473 void *handler;
1474 int cc=get_reg(regmap,CCREG);
2330734f 1475 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
d1e4ebd9 1476 // return;
1477 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1478 if (handler == NULL) {
cf95b4f0 1479 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1480 return;
37387d8b 1481 if (addr != host_addr)
1482 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1483 switch(type) {
1484 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1485 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1486 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1487 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1488 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1489 default: assert(0);
1490 }
1491 return;
1492 }
37387d8b 1493 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1494 if (is_dynamic) {
d1e4ebd9 1495 if(type==LOADB_STUB||type==LOADBU_STUB)
1496 handler=jump_handler_read8;
1497 if(type==LOADH_STUB||type==LOADHU_STUB)
1498 handler=jump_handler_read16;
1499 if(type==LOADW_STUB)
1500 handler=jump_handler_read32;
1501 }
1502
1503 // call a memhandler
cf95b4f0 1504 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1505 reglist&=~(1<<rt);
1506 save_regs(reglist);
1507 if(target==0)
1508 emit_movimm(addr,0);
1509 else if(rs!=0)
1510 emit_mov(rs,0);
1511 if(cc<0)
1512 emit_loadreg(CCREG,2);
2330734f 1513 emit_addimm(cc<0?2:cc,adj,2);
3968e69e 1514 if(is_dynamic) {
1515 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1516 emit_adrp((void *)l1, 1);
1517 emit_addimm64(1, l1 & 0xfff, 1);
1518 }
d1e4ebd9 1519 else
2a014d73 1520 emit_far_call(do_memhandler_pre);
d1e4ebd9 1521
2a014d73 1522 emit_far_call(handler);
d1e4ebd9 1523
1524 // (no cycle reload after read)
cf95b4f0 1525 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1526 loadstore_extend(type, 0, rt);
1527 restore_regs(reglist);
be516ebe 1528}
1529
1530static void do_writestub(int n)
1531{
1532 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1533 set_jump_target(stubs[n].addr, out);
1534 enum stub_type type=stubs[n].type;
1535 int i=stubs[n].a;
1536 int rs=stubs[n].b;
1537 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1538 u_int reglist=stubs[n].e;
1539 signed char *i_regmap=i_regs->regmap;
1540 int rt,r;
cf95b4f0 1541 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1542 rt=get_reg(i_regmap,r=FTEMP);
1543 }else{
cf95b4f0 1544 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1545 }
1546 assert(rs>=0);
1547 assert(rt>=0);
1548 int rtmp,temp=-1,temp2,regs_saved=0;
1549 void *restore_jump = NULL, *handler_jump = NULL;
1550 int reglist2=reglist|(1<<rs)|(1<<rt);
1551 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1552 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1553 temp = rtmp;
1554 break;
1555 }
1556 }
1557 if(temp==-1) {
1558 save_regs(reglist);
1559 regs_saved=1;
1560 for(rtmp=0;rtmp<=3;rtmp++)
1561 if(rtmp!=rs&&rtmp!=rt)
1562 {temp=rtmp;break;}
1563 }
1564 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1565 temp2=3;
1566 else {
1567 host_tempreg_acquire();
1568 temp2=HOST_TEMPREG;
1569 }
1570 emit_readdword(&mem_wtab,temp);
1571 emit_shrimm(rs,12,temp2);
1572 emit_readdword_dualindexedx8(temp,temp2,temp2);
1573 emit_adds64(temp2,temp2,temp2);
1574 handler_jump=out;
1575 emit_jc(0);
1576 switch(type) {
1577 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1578 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1579 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1580 default: assert(0);
1581 }
1582 if(regs_saved) {
1583 restore_jump=out;
1584 emit_jmp(0); // jump to reg restore
1585 }
1586 else
1587 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1588 set_jump_target(handler_jump, out);
1589
d1e4ebd9 1590 if(!regs_saved)
1591 save_regs(reglist);
1592 void *handler=NULL;
1593 switch(type) {
1594 case STOREB_STUB: handler=jump_handler_write8; break;
1595 case STOREH_STUB: handler=jump_handler_write16; break;
1596 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1597 default: assert(0);
d1e4ebd9 1598 }
1599 assert(handler);
1600 pass_args(rs,rt);
1601 if(temp2!=3) {
1602 emit_mov64(temp2,3);
1603 host_tempreg_release();
1604 }
1605 int cc=get_reg(i_regmap,CCREG);
1606 if(cc<0)
1607 emit_loadreg(CCREG,2);
2330734f 1608 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
d1e4ebd9 1609 // returns new cycle_count
2a014d73 1610 emit_far_call(handler);
2330734f 1611 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
d1e4ebd9 1612 if(cc<0)
1613 emit_storereg(CCREG,2);
1614 if(restore_jump)
1615 set_jump_target(restore_jump, out);
1616 restore_regs(reglist);
1617 emit_jmp(stubs[n].retaddr);
be516ebe 1618}
1619
81dbbf4c 1620static void inline_writestub(enum stub_type type, int i, u_int addr,
1621 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1622{
9de8a0c3 1623 int rs = get_reg_temp(regmap);
687b4580 1624 int rt = get_reg(regmap,target);
1625 assert(rs >= 0);
1626 assert(rt >= 0);
1627 uintptr_t host_addr = 0;
1628 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1629 if (handler == NULL) {
37387d8b 1630 if (addr != host_addr)
1631 emit_movimm_from64(addr, rs, host_addr, rs);
d1e4ebd9 1632 switch (type) {
687b4580 1633 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1634 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1635 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1636 default: assert(0);
1637 }
1638 return;
1639 }
1640
1641 // call a memhandler
1642 save_regs(reglist);
687b4580 1643 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1644 loadstore_extend(type, rt, 0);
1645 int cc, cc_use;
1646 cc = cc_use = get_reg(regmap, CCREG);
1647 if (cc < 0)
1648 emit_loadreg(CCREG, (cc_use = 2));
2330734f 1649 emit_addimm(cc_use, adj, 2);
d1e4ebd9 1650
2a014d73 1651 emit_far_call(do_memhandler_pre);
1652 emit_far_call(handler);
1653 emit_far_call(do_memhandler_post);
2330734f 1654 emit_addimm(0, -adj, cc_use);
d1e4ebd9 1655 if (cc < 0)
1656 emit_storereg(CCREG, cc_use);
687b4580 1657 restore_regs(reglist);
be516ebe 1658}
1659
3968e69e 1660/* Special assem */
1661
81dbbf4c 1662static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1663{
1664 save_load_regs_all(1, reglist);
32631e6a 1665 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1666#ifdef PCNT
1667 emit_movimm(op, 0);
2a014d73 1668 emit_far_call(pcnt_gte_start);
3968e69e 1669#endif
1670 // pointer to cop2 regs
1671 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1672}
1673
1674static void c2op_epilogue(u_int op,u_int reglist)
1675{
1676#ifdef PCNT
1677 emit_movimm(op, 0);
2a014d73 1678 emit_far_call(pcnt_gte_end);
3968e69e 1679#endif
1680 save_load_regs_all(0, reglist);
be516ebe 1681}
1682
81dbbf4c 1683static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1684{
3968e69e 1685 u_int c2op=source[i]&0x3f;
1686 u_int hr,reglist_full=0,reglist;
1687 int need_flags,need_ir;
1688 for(hr=0;hr<HOST_REGS;hr++) {
1689 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1690 }
1691 reglist=reglist_full&CALLER_SAVE_REGS;
1692
1693 if (gte_handlers[c2op]!=NULL) {
1694 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1695 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1696 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1697 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1698 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1699 need_flags=0;
1700 //int shift = (source[i] >> 19) & 1;
1701 //int lm = (source[i] >> 10) & 1;
1702 switch(c2op) {
1703 default:
1704 (void)need_ir;
81dbbf4c 1705 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1706 emit_movimm(source[i],1); // opcode
1707 emit_writeword(1,&psxRegs.code);
2a014d73 1708 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1709 break;
1710 }
1711 c2op_epilogue(c2op,reglist);
1712 }
1713}
1714
1715static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1716{
1717 //value = value & 0x7ffff000;
1718 //if (value & 0x7f87e000) value |= 0x80000000;
1719 emit_andimm(sl, 0x7fffe000, temp);
1720 emit_testimm(temp, 0xff87ffff);
1721 emit_andimm(sl, 0x7ffff000, temp);
1722 host_tempreg_acquire();
1723 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1724 emit_cmovne_reg(HOST_TEMPREG, temp);
1725 host_tempreg_release();
1726 assert(0); // testing needed
1727}
1728
1729static void do_mfc2_31_one(u_int copr,signed char temp)
1730{
1731 emit_readshword(&reg_cop2d[copr],temp);
1732 emit_bicsar_imm(temp,31,temp);
1733 emit_cmpimm(temp,0xf80);
1734 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1735 emit_andimm(temp,0xf80,temp);
1736}
1737
1738static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1739{
1740 if (temp < 0) {
1741 host_tempreg_acquire();
1742 temp = HOST_TEMPREG;
1743 }
1744 do_mfc2_31_one(9,temp);
1745 emit_shrimm(temp,7,tl);
1746 do_mfc2_31_one(10,temp);
1747 emit_orrshr_imm(temp,2,tl);
1748 do_mfc2_31_one(11,temp);
1749 emit_orrshl_imm(temp,3,tl);
1750 emit_writeword(tl,&reg_cop2d[29]);
1751
1752 if (temp == HOST_TEMPREG)
1753 host_tempreg_release();
be516ebe 1754}
1755
2330734f 1756static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
be516ebe 1757{
3968e69e 1758 // case 0x18: MULT
1759 // case 0x19: MULTU
1760 // case 0x1A: DIV
1761 // case 0x1B: DIVU
cf95b4f0 1762 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1763 {
cf95b4f0 1764 switch(dops[i].opcode2)
3968e69e 1765 {
1766 case 0x18: // MULT
1767 case 0x19: // MULTU
1768 {
cf95b4f0 1769 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1770 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1771 signed char hi=get_reg(i_regs->regmap,HIREG);
1772 signed char lo=get_reg(i_regs->regmap,LOREG);
1773 assert(m1>=0);
1774 assert(m2>=0);
1775 assert(hi>=0);
1776 assert(lo>=0);
1777
cf95b4f0 1778 if(dops[i].opcode2==0x18) // MULT
3968e69e 1779 emit_smull(m1,m2,hi);
1780 else // MULTU
1781 emit_umull(m1,m2,hi);
1782
1783 emit_mov(hi,lo);
1784 emit_shrimm64(hi,32,hi);
1785 break;
1786 }
1787 case 0x1A: // DIV
1788 case 0x1B: // DIVU
1789 {
cf95b4f0 1790 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1791 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1792 signed char quotient=get_reg(i_regs->regmap,LOREG);
1793 signed char remainder=get_reg(i_regs->regmap,HIREG);
1794 assert(numerator>=0);
1795 assert(denominator>=0);
1796 assert(quotient>=0);
1797 assert(remainder>=0);
1798
cf95b4f0 1799 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1800 emit_sdiv(numerator,denominator,quotient);
1801 else // DIVU
1802 emit_udiv(numerator,denominator,quotient);
1803 emit_msub(quotient,denominator,numerator,remainder);
1804
1805 // div 0 quotient (remainder is already correct)
1806 host_tempreg_acquire();
cf95b4f0 1807 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1808 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1809 else
1810 emit_movimm(~0,HOST_TEMPREG);
1811 emit_test(denominator,denominator);
1812 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1813 host_tempreg_release();
1814 break;
1815 }
1816 default:
1817 assert(0);
1818 }
1819 }
1820 else
1821 {
1822 signed char hr=get_reg(i_regs->regmap,HIREG);
1823 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1824 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1825 {
cf95b4f0 1826 if (dops[i].rs1) {
1827 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1828 assert(numerator >= 0);
1829 if (hr >= 0)
1830 emit_mov(numerator,hr);
1831 if (lr >= 0) {
cf95b4f0 1832 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1833 emit_sub_asrimm(0,numerator,31,lr);
1834 else
1835 emit_movimm(~0,lr);
1836 }
1837 }
1838 else {
1839 if (hr >= 0) emit_zeroreg(hr);
1840 if (lr >= 0) emit_movimm(~0,lr);
1841 }
1842 }
1843 else
1844 {
1845 // Multiply by zero is zero.
1846 if (hr >= 0) emit_zeroreg(hr);
1847 if (lr >= 0) emit_zeroreg(lr);
1848 }
1849 }
be516ebe 1850}
1851#define multdiv_assemble multdiv_assemble_arm64
1852
d1e4ebd9 1853static void do_jump_vaddr(u_int rs)
1854{
1855 if (rs != 0)
1856 emit_mov(rs, 0);
104df9d3 1857 emit_far_call(ndrc_get_addr_ht);
d1e4ebd9 1858 emit_jmpreg(0);
1859}
1860
be516ebe 1861static void do_preload_rhash(u_int r) {
1862 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1863 // register. On ARM the hash can be done with a single instruction (below)
1864}
1865
1866static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1867 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1868}
1869
1870static void do_rhash(u_int rs,u_int rh) {
1871 emit_andimm(rs, 0xf8, rh);
1872}
1873
d1e4ebd9 1874static void do_miniht_load(int ht, u_int rh) {
1875 emit_add64(ht, rh, ht);
1876 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1877}
1878
d1e4ebd9 1879static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1880 emit_cmp(rh, rs);
1881 void *jaddr = out;
1882 emit_jeq(0);
1883 do_jump_vaddr(rs);
1884
1885 set_jump_target(jaddr, out);
1886 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1887 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1888 emit_jmpreg(ht);
be516ebe 1889}
1890
d1e4ebd9 1891// parsed by set_jump_target?
be516ebe 1892static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1893 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1894 emit_movk(return_address&0xffff,rt);
1895 add_to_linker(out,return_address,1);
1896 emit_adr(out,temp);
1897 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1898 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1899}
1900
919981d0 1901static void clear_cache_arm64(char *start, char *end)
be516ebe 1902{
919981d0 1903 // Don't rely on GCC's __clear_cache implementation, as it caches
1904 // icache/dcache cache line sizes, that can vary between cores on
1905 // big.LITTLE architectures.
1906 uint64_t addr, ctr_el0;
1907 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1908 size_t isize, dsize;
1909
1910 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1911 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1912 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1913
1914 // use the global minimum cache line size
1915 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1916 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1917
1918 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1919 not required for instruction to data coherence. */
1920 if ((ctr_el0 & (1 << 28)) == 0x0) {
1921 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1922 for (; addr < (uint64_t)end; addr += dsize)
1923 // use "civac" instead of "cvau", as this is the suggested workaround for
1924 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1925 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 1926 }
919981d0 1927 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1928
919981d0 1929 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1930 Unification is not required for instruction to data coherence. */
1931 if ((ctr_el0 & (1 << 29)) == 0x0) {
1932 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1933 for (; addr < (uint64_t)end; addr += isize)
1934 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1935
1936 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 1937 }
919981d0 1938
1939 __asm__ volatile("isb" : : : "memory");
be516ebe 1940}
1941
1942// CPU-architecture-specific initialization
2a014d73 1943static void arch_init(void)
1944{
1945 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1946 struct tramp_insns *ops = ndrc->tramp.ops;
1947 size_t i;
1948 assert(!(diff & 3));
1949 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1950 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1951 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1952 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1953 }
1954 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 1955}
1956
1957// vim:shiftwidth=2:expandtab