drc: try to make gte stall handling less bloaty
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define CALLER_SAVE_REGS 0x0007ffff
27
28#define unused __attribute__((unused))
29
d1e4ebd9 30void do_memhandler_pre();
31void do_memhandler_post();
be516ebe 32
33/* Linker */
d1e4ebd9 34static void set_jump_target(void *addr, void *target)
be516ebe 35{
d1e4ebd9 36 u_int *ptr = addr;
37 intptr_t offset = (u_char *)target - (u_char *)addr;
38
3968e69e 39 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
3968e69e 43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_link)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 49 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
3968e69e 52 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
3968e69e 58 abort(); // should not happen
be516ebe 59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
d1e4ebd9 65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
be516ebe 69}
70
71// find where external branch is liked to using addr of it's stub:
3968e69e 72// get address that the stub loads (dyna_linker arg1),
be516ebe 73// treat it as a pointer to branch insn,
74// return addr where that branch jumps to
75static void *get_pointer(void *stub)
76{
d1e4ebd9 77 int *i_ptr = find_extjump_insn(stub);
3968e69e 78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 83 assert(0);
84 return NULL;
85}
86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
d1e4ebd9 149static void output_w64(uint64_t dword)
150{
151 *((uint64_t *)out) = dword;
152 out+=8;
153}
154
155/*
687b4580 156static u_int rm_rd(u_int rm, u_int rd)
157{
158 assert(rm < 31);
159 assert(rd < 31);
160 return (rm << 16) | rd;
161}
d1e4ebd9 162*/
687b4580 163
3968e69e 164static u_int rn_rd(u_int rn, u_int rd)
165{
166 assert(rn < 31);
167 assert(rd < 31);
168 return (rn << 5) | rd;
169}
170
be516ebe 171static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
172{
d1e4ebd9 173 assert(rm < 32);
174 assert(rn < 32);
175 assert(rd < 32);
be516ebe 176 return (rm << 16) | (rn << 5) | rd;
177}
178
3968e69e 179static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180{
181 assert(ra < 32);
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183}
184
d1e4ebd9 185static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
186{
187 assert(imm7 < 0x80);
188 assert(rt2 < 31);
189 assert(rn < 32);
190 assert(rt < 31);
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192}
193
687b4580 194static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195{
196 assert(imm6 <= 63);
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198}
199
be516ebe 200static u_int imm16_rd(u_int imm16, u_int rd)
201{
202 assert(imm16 < 0x10000);
203 assert(rd < 31);
204 return (imm16 << 5) | rd;
205}
206
687b4580 207static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
208{
209 assert(imm12 < 0x1000);
d1e4ebd9 210 assert(rn < 32);
211 assert(rd < 32);
212 return (imm12 << 10) | (rn << 5) | rd;
213}
214
215static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
216{
217 assert(imm9 < 0x200);
687b4580 218 assert(rn < 31);
219 assert(rd < 31);
d1e4ebd9 220 return (imm9 << 12) | (rn << 5) | rd;
687b4580 221}
222
d1e4ebd9 223static u_int imm19_rt(u_int imm19, u_int rt)
224{
225 assert(imm19 < 0x80000);
226 assert(rt < 31);
227 return (imm19 << 5) | rt;
228}
229
230static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
231{
232 assert(n < 2);
233 assert(immr < 0x40);
234 assert(imms < 0x40);
235 assert(rn < 32);
236 assert(rd < 32);
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238}
239
240static u_int genjmp(const u_char *addr)
be516ebe 241{
242 intptr_t offset = addr - out;
d1e4ebd9 243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 244 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
be516ebe 247 return 0;
248 }
d1e4ebd9 249 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 250}
251
d1e4ebd9 252static u_int genjmpcc(const u_char *addr)
be516ebe 253{
254 intptr_t offset = addr - out;
d1e4ebd9 255 if ((uintptr_t)addr < 3) return 0;
be516ebe 256 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
258 abort();
259 return 0;
260 }
261 return ((u_int)offset >> 2) & 0x7ffff;
262}
263
264static uint32_t is_mask(u_int value)
265{
266 return value && ((value + 1) & value) == 0;
267}
268
269// This function returns true if the argument contains a
270// non-empty sequence of ones (possibly rotated) with the remainder zero.
271static uint32_t is_rotated_mask(u_int value)
272{
3968e69e 273 if (value == 0 || value == ~0)
be516ebe 274 return 0;
d1e4ebd9 275 if (is_mask((value - 1) | value))
276 return 1;
277 return is_mask((~value - 1) | ~value);
278}
279
280static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
281{
282 int lzeros, tzeros, ones;
283 assert(value != 0);
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
289 *imms = ones - 1;
290 return;
be516ebe 291 }
d1e4ebd9 292 value = ~value;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
3968e69e 297 *immr = lzeros;
d1e4ebd9 298 *imms = 31 - ones;
299 return;
300 }
3968e69e 301 abort();
be516ebe 302}
303
304static void emit_mov(u_int rs, u_int rt)
305{
687b4580 306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308}
309
310static void emit_mov64(u_int rs, u_int rt)
311{
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 314}
315
687b4580 316static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 317{
d1e4ebd9 318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 320}
321
d1e4ebd9 322static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 323{
d1e4ebd9 324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 326}
327
d1e4ebd9 328static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 329{
3968e69e 330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
332}
333
334static void emit_neg(u_int rs, u_int rt)
335{
336 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
337 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 338}
339
687b4580 340static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 341{
d1e4ebd9 342 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 343 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 344}
345
3968e69e 346static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
347{
348 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
349 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
350}
351
d1e4ebd9 352static void emit_movz(u_int imm, u_int rt)
be516ebe 353{
d1e4ebd9 354 assem_debug("movz %s,#%#x\n", regname[rt], imm);
355 output_w32(0x52800000 | imm16_rd(imm, rt));
356}
357
358static void emit_movz_lsl16(u_int imm, u_int rt)
359{
360 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x52a00000 | imm16_rd(imm, rt));
362}
363
364static void emit_movn(u_int imm, u_int rt)
365{
366 assem_debug("movn %s,#%#x\n", regname[rt], imm);
367 output_w32(0x12800000 | imm16_rd(imm, rt));
368}
369
370static void emit_movn_lsl16(u_int imm,u_int rt)
371{
372 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
373 output_w32(0x12a00000 | imm16_rd(imm, rt));
374}
375
376static void emit_movk(u_int imm,u_int rt)
377{
378 assem_debug("movk %s,#%#x\n", regname[rt], imm);
379 output_w32(0x72800000 | imm16_rd(imm, rt));
380}
381
382static void emit_movk_lsl16(u_int imm,u_int rt)
383{
384 assert(imm<65536);
3968e69e 385 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 386 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 387}
388
389static void emit_zeroreg(u_int rt)
390{
d1e4ebd9 391 emit_movz(0, rt);
be516ebe 392}
393
be516ebe 394static void emit_movimm(u_int imm, u_int rt)
395{
d1e4ebd9 396 if (imm < 65536)
397 emit_movz(imm, rt);
398 else if ((~imm) < 65536)
399 emit_movn(~imm, rt);
400 else if ((imm&0xffff) == 0)
401 emit_movz_lsl16(imm >> 16, rt);
402 else if (((~imm)&0xffff) == 0)
403 emit_movn_lsl16(~imm >> 16, rt);
404 else if (is_rotated_mask(imm)) {
405 u_int immr, imms;
406 gen_logical_imm(imm, &immr, &imms);
407 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
408 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
409 }
be516ebe 410 else {
d1e4ebd9 411 emit_movz(imm & 0xffff, rt);
412 emit_movk_lsl16(imm >> 16, rt);
be516ebe 413 }
414}
415
687b4580 416static void emit_readword(void *addr, u_int rt)
417{
418 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
419 if (!(offset & 3) && offset <= 16380) {
420 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
421 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
422 }
423 else
3968e69e 424 abort();
687b4580 425}
426
d1e4ebd9 427static void emit_readdword(void *addr, u_int rt)
428{
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 7) && offset <= 32760) {
431 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
432 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
433 }
3968e69e 434 else
435 abort();
436}
437
438static void emit_readshword(void *addr, u_int rt)
439{
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
444 }
d1e4ebd9 445 else
446 assert(0);
447}
448
be516ebe 449static void emit_loadreg(u_int r, u_int hr)
450{
d1e4ebd9 451 int is64 = 0;
be516ebe 452 assert(r < 64);
453 if (r == 0)
454 emit_zeroreg(hr);
455 else {
7c3a5182 456 void *addr = &psxRegs.GPR.r[r];
be516ebe 457 switch (r) {
7c3a5182 458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
be516ebe 460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
d1e4ebd9 462 case INVCP: addr = &invc_ptr; is64 = 1; break;
7c3a5182 463 default: assert(r < 34); break;
be516ebe 464 }
d1e4ebd9 465 if (is64)
466 emit_readdword(addr, hr);
467 else
468 emit_readword(addr, hr);
be516ebe 469 }
470}
471
687b4580 472static void emit_writeword(u_int rt, void *addr)
473{
474 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
475 if (!(offset & 3) && offset <= 16380) {
476 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
477 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
478 }
479 else
480 assert(0);
481}
482
d1e4ebd9 483static void emit_writedword(u_int rt, void *addr)
484{
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 7) && offset <= 32760) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 488 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 489 }
490 else
3968e69e 491 abort();
d1e4ebd9 492}
493
687b4580 494static void emit_storereg(u_int r, u_int hr)
be516ebe 495{
496 assert(r < 64);
7c3a5182 497 void *addr = &psxRegs.GPR.r[r];
be516ebe 498 switch (r) {
7c3a5182 499 //case HIREG: addr = &hi; break;
500 //case LOREG: addr = &lo; break;
be516ebe 501 case CCREG: addr = &cycle_count; break;
7c3a5182 502 default: assert(r < 34); break;
be516ebe 503 }
687b4580 504 emit_writeword(hr, addr);
be516ebe 505}
506
507static void emit_test(u_int rs, u_int rt)
508{
d1e4ebd9 509 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
510 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 511}
512
d1e4ebd9 513static void emit_testimm(u_int rs, u_int imm)
be516ebe 514{
d1e4ebd9 515 u_int immr, imms;
687b4580 516 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 517 assert(is_rotated_mask(imm)); // good enough for PCSX
518 gen_logical_imm(imm, &immr, &imms);
3968e69e 519 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 520}
521
522static void emit_not(u_int rs,u_int rt)
523{
524 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 525 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 526}
527
be516ebe 528static void emit_and(u_int rs1,u_int rs2,u_int rt)
529{
530 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 531 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 532}
533
534static void emit_or(u_int rs1,u_int rs2,u_int rt)
535{
536 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 537 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 538}
539
3968e69e 540static void emit_bic(u_int rs1,u_int rs2,u_int rt)
541{
542 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
543 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
544}
545
be516ebe 546static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
547{
be516ebe 548 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 549 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 550}
551
552static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
553{
be516ebe 554 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 555 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 556}
557
3968e69e 558static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
559{
560 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
561 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
562}
563
be516ebe 564static void emit_xor(u_int rs1,u_int rs2,u_int rt)
565{
566 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 567 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 568}
569
3968e69e 570static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
571{
572 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
573 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
574}
575
d1e4ebd9 576static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 577{
d1e4ebd9 578 unused const char *st = s ? "s" : "";
579 s = s ? 0x20000000 : 0;
580 is64 = is64 ? 0x80000000 : 0;
687b4580 581 if (imm < 4096) {
d1e4ebd9 582 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
583 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 584 }
585 else if (-imm < 4096) {
3968e69e 586 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 587 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
588 }
589 else if (imm < 16777216) {
590 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
591 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 595 }
596 }
597 else if (-imm < 16777216) {
598 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
599 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
602 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
603 }
687b4580 604 }
605 else
3968e69e 606 abort();
be516ebe 607}
608
d1e4ebd9 609static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
610{
611 emit_addimm_s(0, 0, rs, imm, rt);
612}
613
614static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
615{
616 emit_addimm_s(0, 1, rs, imm, rt);
617}
618
be516ebe 619static void emit_addimm_and_set_flags(int imm, u_int rt)
620{
d1e4ebd9 621 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 622}
623
624static void emit_addimm_no_flags(u_int imm,u_int rt)
625{
626 emit_addimm(rt,imm,rt);
627}
628
d1e4ebd9 629static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 630{
d1e4ebd9 631 const char *names[] = { "and", "orr", "eor", "ands" };
632 const char *name = names[op];
633 u_int immr, imms;
634 op = op << 29;
635 if (is_rotated_mask(imm)) {
636 gen_logical_imm(imm, &immr, &imms);
637 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
638 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
639 }
640 else {
641 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
642 host_tempreg_acquire();
643 emit_movimm(imm, HOST_TEMPREG);
644 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
645 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
646 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
647 host_tempreg_release();
648 }
649 (void)name;
be516ebe 650}
651
d1e4ebd9 652static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 653{
d1e4ebd9 654 if (imm == 0)
655 emit_zeroreg(rt);
656 else
657 emit_logicop_imm(0, rs, imm, rt);
be516ebe 658}
659
d1e4ebd9 660static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 661{
d1e4ebd9 662 if (imm == 0) {
663 if (rs != rt)
664 emit_mov(rs, rt);
665 }
666 else
667 emit_logicop_imm(1, rs, imm, rt);
be516ebe 668}
669
d1e4ebd9 670static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 671{
d1e4ebd9 672 if (imm == 0) {
673 if (rs != rt)
674 emit_mov(rs, rt);
675 }
676 else
677 emit_logicop_imm(2, rs, imm, rt);
be516ebe 678}
679
d1e4ebd9 680static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 681{
d1e4ebd9 682 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 684}
685
d1e4ebd9 686static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 687{
d1e4ebd9 688 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 690}
691
692static void emit_shlimm(u_int rs,u_int imm,u_int rt)
693{
be516ebe 694 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 696}
697
3968e69e 698static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 699{
3968e69e 700 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 702}
703
3968e69e 704static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 705{
be516ebe 706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 707 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 708}
709
710static void emit_sarimm(u_int rs,u_int imm,u_int rt)
711{
be516ebe 712 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 713 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 714}
715
716static void emit_rorimm(u_int rs,u_int imm,u_int rt)
717{
3968e69e 718 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 719 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 720}
721
722static void emit_signextend16(u_int rs, u_int rt)
723{
724 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 725 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 726}
727
d1e4ebd9 728static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 729{
3968e69e 730 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 731 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 732}
733
d1e4ebd9 734static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 735{
d1e4ebd9 736 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 738}
739
d1e4ebd9 740static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 741{
d1e4ebd9 742 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 744}
745
d1e4ebd9 746static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 747{
d1e4ebd9 748 if (imm < 4096) {
749 assem_debug("cmp %s,%#x\n", regname[rs], imm);
750 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
751 }
752 else if (-imm < 4096) {
753 assem_debug("cmn %s,%#x\n", regname[rs], imm);
754 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
755 }
756 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 757 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 758 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
759 }
760 else {
761 host_tempreg_acquire();
762 emit_movimm(imm, HOST_TEMPREG);
763 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
764 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
765 host_tempreg_release();
766 }
be516ebe 767}
768
d1e4ebd9 769static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 770{
d1e4ebd9 771 assert(imm == 0 || imm == 1);
772 assert(cond0 < 0x10);
773 assert(cond1 < 0x10);
774 if (imm) {
775 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
776 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
777 } else {
778 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
779 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
780 }
be516ebe 781}
782
d1e4ebd9 783static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 784{
d1e4ebd9 785 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 786}
787
d1e4ebd9 788static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 789{
d1e4ebd9 790 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 791}
792
793static void emit_cmovb_imm(int imm,u_int rt)
794{
d1e4ebd9 795 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 796}
797
3968e69e 798static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 799{
3968e69e 800 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
801 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 802}
803
804static void emit_cmovne_reg(u_int rs,u_int rt)
805{
d1e4ebd9 806 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 808}
809
810static void emit_cmovl_reg(u_int rs,u_int rt)
811{
d1e4ebd9 812 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 814}
815
e3c6bdb5 816static void emit_cmovb_reg(u_int rs,u_int rt)
817{
818 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
820}
821
be516ebe 822static void emit_cmovs_reg(u_int rs,u_int rt)
823{
d1e4ebd9 824 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
825 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 826}
827
3968e69e 828static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
829{
830 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
831 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
832}
833
be516ebe 834static void emit_slti32(u_int rs,int imm,u_int rt)
835{
836 if(rs!=rt) emit_zeroreg(rt);
837 emit_cmpimm(rs,imm);
838 if(rs==rt) emit_movimm(0,rt);
839 emit_cmovl_imm(1,rt);
840}
841
842static void emit_sltiu32(u_int rs,int imm,u_int rt)
843{
844 if(rs!=rt) emit_zeroreg(rt);
845 emit_cmpimm(rs,imm);
846 if(rs==rt) emit_movimm(0,rt);
847 emit_cmovb_imm(1,rt);
848}
849
850static void emit_cmp(u_int rs,u_int rt)
851{
852 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 853 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 854}
855
856static void emit_set_gz32(u_int rs, u_int rt)
857{
858 //assem_debug("set_gz32\n");
859 emit_cmpimm(rs,1);
860 emit_movimm(1,rt);
861 emit_cmovl_imm(0,rt);
862}
863
864static void emit_set_nz32(u_int rs, u_int rt)
865{
866 //assem_debug("set_nz32\n");
d1e4ebd9 867 if(rs!=rt) emit_mov(rs,rt);
868 emit_test(rs,rs);
869 emit_cmovne_imm(1,rt);
be516ebe 870}
871
872static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
873{
874 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
875 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
876 emit_cmp(rs1,rs2);
877 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
878 emit_cmovl_imm(1,rt);
879}
880
881static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
882{
883 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
884 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
885 emit_cmp(rs1,rs2);
886 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
887 emit_cmovb_imm(1,rt);
888}
889
2a014d73 890static int can_jump_or_call(const void *a)
891{
892 intptr_t diff = (u_char *)a - out;
893 return (-134217728 <= diff && diff <= 134217727);
894}
895
d1e4ebd9 896static void emit_call(const void *a)
be516ebe 897{
d1e4ebd9 898 intptr_t diff = (u_char *)a - out;
899 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 900 assert(!(diff & 3));
901 if (-134217728 <= diff && diff <= 134217727)
902 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
903 else
3968e69e 904 abort();
be516ebe 905}
906
d1e4ebd9 907static void emit_jmp(const void *a)
be516ebe 908{
d1e4ebd9 909 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
910 u_int offset = genjmp(a);
911 output_w32(0x14000000 | offset);
be516ebe 912}
913
d1e4ebd9 914static void emit_jne(const void *a)
be516ebe 915{
d1e4ebd9 916 assem_debug("bne %p\n", a);
917 u_int offset = genjmpcc(a);
918 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 919}
920
7c3a5182 921static void emit_jeq(const void *a)
be516ebe 922{
d1e4ebd9 923 assem_debug("beq %p\n", a);
924 u_int offset = genjmpcc(a);
925 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 926}
927
7c3a5182 928static void emit_js(const void *a)
be516ebe 929{
d1e4ebd9 930 assem_debug("bmi %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 933}
934
7c3a5182 935static void emit_jns(const void *a)
be516ebe 936{
d1e4ebd9 937 assem_debug("bpl %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 940}
941
7c3a5182 942static void emit_jl(const void *a)
be516ebe 943{
d1e4ebd9 944 assem_debug("blt %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 947}
948
7c3a5182 949static void emit_jge(const void *a)
be516ebe 950{
d1e4ebd9 951 assem_debug("bge %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 954}
955
7c3a5182 956static void emit_jno(const void *a)
be516ebe 957{
d1e4ebd9 958 assem_debug("bvc %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 961}
962
7c3a5182 963static void emit_jc(const void *a)
be516ebe 964{
d1e4ebd9 965 assem_debug("bcs %p\n", a);
966 u_int offset = genjmpcc(a);
967 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 968}
969
3968e69e 970static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 971{
3968e69e 972 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 973 u_int offset = genjmpcc(a);
3968e69e 974 is64 = is64 ? 0x80000000 : 0;
975 isnz = isnz ? 0x01000000 : 0;
976 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
977}
978
979static void emit_cbz(const void *a, u_int r)
980{
981 emit_cb(0, 0, a, r);
be516ebe 982}
983
984static void emit_jmpreg(u_int r)
985{
3968e69e 986 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 987 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 988}
989
990static void emit_retreg(u_int r)
991{
d1e4ebd9 992 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 993 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
994}
995
996static void emit_ret(void)
997{
998 emit_retreg(LR);
999}
1000
d1e4ebd9 1001static void emit_adr(void *addr, u_int rt)
1002{
1003 intptr_t offset = (u_char *)addr - out;
1004 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1005 assert(rt < 31);
d1e4ebd9 1006 assem_debug("adr x%d,#%#lx\n", rt, offset);
1007 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1008}
1009
3968e69e 1010static void emit_adrp(void *addr, u_int rt)
1011{
1012 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1013 assert(-4294967296l <= offset && offset < 4294967296l);
1014 assert(rt < 31);
1015 offset >>= 12;
1016 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1017 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1018}
1019
be516ebe 1020static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1021{
d1e4ebd9 1022 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1023 assert(-256 <= offset && offset < 256);
1024 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1025}
1026
1027static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1028{
1029 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1030 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1031}
1032
1033static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1034{
1035 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1036 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1037}
1038
1039static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1040{
1041 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1043}
1044
1045static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1061}
1062
1063static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064{
1065 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1067}
1068
1069static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070{
1071 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1073}
1074
1075static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1076{
1077 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1078 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1079}
1080
be516ebe 1081static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1082{
d1e4ebd9 1083 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1084 assert(-256 <= offset && offset < 256);
1085 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1086}
1087
1088static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1089{
d1e4ebd9 1090 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1091 assert(-256 <= offset && offset < 256);
1092 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1093}
1094
1095static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1096{
d1e4ebd9 1097 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1098 assert(-256 <= offset && offset < 256);
1099 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1100}
1101
1102static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1103{
d1e4ebd9 1104 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1105 assert(-256 <= offset && offset < 256);
1106 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1107}
1108
be516ebe 1109static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1110{
3968e69e 1111 if (!(offset & 3) && (u_int)offset <= 16380) {
1112 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1113 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1114 }
1115 else if (-256 <= offset && offset < 256) {
1116 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1117 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1118 }
687b4580 1119 else
1120 assert(0);
be516ebe 1121}
1122
1123static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1124{
3968e69e 1125 if (!(offset & 1) && (u_int)offset <= 8190) {
1126 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1127 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1128 }
1129 else if (-256 <= offset && offset < 256) {
1130 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1131 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1132 }
687b4580 1133 else
1134 assert(0);
be516ebe 1135}
1136
1137static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1138{
3968e69e 1139 if ((u_int)offset < 4096) {
1140 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1141 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1142 }
1143 else if (-256 <= offset && offset < 256) {
1144 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1145 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1146 }
687b4580 1147 else
1148 assert(0);
be516ebe 1149}
1150
3968e69e 1151static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1152{
3968e69e 1153 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1155}
1156
3968e69e 1157static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1158{
3968e69e 1159 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1160 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1161}
1162
1163static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1164{
1165 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1166 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1167}
1168
1169static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1170{
1171 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1173}
1174
3968e69e 1175static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1176{
1177 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1179}
1180
1181static void emit_clz(u_int rs, u_int rt)
be516ebe 1182{
1183 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1184 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1185}
1186
be516ebe 1187// special case for checking invalid_code
d1e4ebd9 1188static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1189{
d1e4ebd9 1190 host_tempreg_acquire();
1191 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1192 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1193 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1194 emit_cmpimm(HOST_TEMPREG, imm);
1195 host_tempreg_release();
be516ebe 1196}
1197
3968e69e 1198// special for loadlr_assemble, rs2 is destroyed
1199static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1200{
3968e69e 1201 emit_shl(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
be516ebe 1203}
1204
3968e69e 1205static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1206{
3968e69e 1207 emit_shr(rs2, shift, rs2);
1208 emit_bic(rs1, rs2, rt);
be516ebe 1209}
1210
d1e4ebd9 1211static void emit_loadlp_ofs(u_int ofs, u_int rt)
1212{
1213 output_w32(0x58000000 | imm19_rt(ofs, rt));
1214}
1215
687b4580 1216static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1217{
687b4580 1218 u_int op = 0xb9000000;
d1e4ebd9 1219 unused const char *ldst = is_st ? "st" : "ld";
1220 unused char rp = is64 ? 'x' : 'w';
687b4580 1221 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1222 is64 = is64 ? 1 : 0;
1223 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1224 ofs = (ofs >> (2+is64));
687b4580 1225 if (!is_st) op |= 0x00400000;
1226 if (is64) op |= 0x40000000;
d1e4ebd9 1227 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1228}
1229
687b4580 1230static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1231{
687b4580 1232 u_int op = 0x29000000;
d1e4ebd9 1233 unused const char *ldst = is_st ? "st" : "ld";
1234 unused char rp = is64 ? 'x' : 'w';
687b4580 1235 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1236 is64 = is64 ? 1 : 0;
1237 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1238 ofs = (ofs >> (2+is64));
1239 assert(-64 <= ofs && ofs <= 63);
1240 ofs &= 0x7f;
1241 if (!is_st) op |= 0x00400000;
1242 if (is64) op |= 0x80000000;
d1e4ebd9 1243 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1244}
1245
1246static void save_load_regs_all(int is_store, u_int reglist)
1247{
1248 int ofs = 0, c = 0;
1249 u_int r, pair[2];
1250 for (r = 0; reglist; r++, reglist >>= 1) {
1251 if (reglist & 1)
1252 pair[c++] = r;
1253 if (c == 2) {
1254 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1255 ofs += 8 * 2;
1256 c = 0;
1257 }
1258 }
1259 if (c) {
1260 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1261 ofs += 8;
1262 }
1263 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1264}
1265
1266// Save registers before function call
1267static void save_regs(u_int reglist)
1268{
1269 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1270 save_load_regs_all(1, reglist);
be516ebe 1271}
1272
1273// Restore registers after function call
1274static void restore_regs(u_int reglist)
1275{
1276 reglist &= CALLER_SAVE_REGS;
687b4580 1277 save_load_regs_all(0, reglist);
be516ebe 1278}
1279
1280/* Stubs/epilogue */
1281
1282static void literal_pool(int n)
1283{
1284 (void)literals;
1285}
1286
1287static void literal_pool_jumpover(int n)
1288{
1289}
1290
d1e4ebd9 1291// parsed by get_pointer, find_extjump_insn
1292static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1293{
d1e4ebd9 1294 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1295
d1e4ebd9 1296 emit_movz(target & 0xffff, 0);
1297 emit_movk_lsl16(target >> 16, 0);
1298
1299 // addr is in the current recompiled block (max 256k)
1300 // offset shouldn't exceed +/-1MB
1301 emit_adr(addr, 1);
2a014d73 1302 emit_far_jump(linker);
be516ebe 1303}
1304
d1e4ebd9 1305static void check_extjump2(void *src)
be516ebe 1306{
d1e4ebd9 1307 u_int *ptr = src;
1308 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1309 (void)ptr;
be516ebe 1310}
1311
1312// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1313static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1314{
d1e4ebd9 1315 int diff = rt_val - rs_val;
3968e69e 1316 if ((-4096 < diff && diff < 4096)
1317 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1318 emit_addimm(rs, diff, rt);
3968e69e 1319 else if (rt_val == ~rs_val)
1320 emit_not(rs, rt);
d1e4ebd9 1321 else if (is_rotated_mask(rs_val ^ rt_val))
1322 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1323 else
d1e4ebd9 1324 emit_movimm(rt_val, rt);
be516ebe 1325}
1326
d1e4ebd9 1327// return 1 if the above function can do it's job cheaply
687b4580 1328static int is_similar_value(u_int v1, u_int v2)
be516ebe 1329{
687b4580 1330 int diff = v1 - v2;
3968e69e 1331 return (-4096 < diff && diff < 4096)
1332 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1333 || v1 == ~v2
d1e4ebd9 1334 || is_rotated_mask(v1 ^ v2);
1335}
1336
1337// trashes r2
1338static void pass_args64(u_int a0, u_int a1)
1339{
1340 if(a0==1&&a1==0) {
1341 // must swap
1342 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1343 }
1344 else if(a0!=0&&a1==0) {
1345 emit_mov64(a1,1);
1346 if (a0>=0) emit_mov64(a0,0);
1347 }
1348 else {
1349 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1350 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1351 }
be516ebe 1352}
1353
d1e4ebd9 1354static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1355{
1356 switch(type) {
1357 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1358 case LOADBU_STUB:
1359 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1360 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1361 case LOADHU_STUB:
1362 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1363 case LOADW_STUB:
1364 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1365 default: assert(0);
d1e4ebd9 1366 }
1367}
1368
1369#include "pcsxmem.h"
be516ebe 1370//#include "pcsxmem_inline.c"
1371
1372static void do_readstub(int n)
1373{
1374 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1375 set_jump_target(stubs[n].addr, out);
1376 enum stub_type type = stubs[n].type;
1377 int i = stubs[n].a;
1378 int rs = stubs[n].b;
1379 const struct regstat *i_regs = (void *)stubs[n].c;
1380 u_int reglist = stubs[n].e;
1381 const signed char *i_regmap = i_regs->regmap;
1382 int rt;
1383 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1384 rt=get_reg(i_regmap,FTEMP);
1385 }else{
1386 rt=get_reg(i_regmap,rt1[i]);
1387 }
1388 assert(rs>=0);
1389 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1390 void *restore_jump = NULL, *handler_jump = NULL;
1391 reglist|=(1<<rs);
1392 for (r = 0; r < HOST_CCREG; r++) {
1393 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1394 temp = r;
1395 break;
1396 }
1397 }
1398 if(rt>=0&&rt1[i]!=0)
1399 reglist&=~(1<<rt);
1400 if(temp==-1) {
1401 save_regs(reglist);
1402 regs_saved=1;
1403 temp=(rs==0)?2:0;
1404 }
1405 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1406 temp2=1;
1407 emit_readdword(&mem_rtab,temp);
1408 emit_shrimm(rs,12,temp2);
1409 emit_readdword_dualindexedx8(temp,temp2,temp2);
1410 emit_adds64(temp2,temp2,temp2);
1411 handler_jump=out;
1412 emit_jc(0);
1413 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1414 switch(type) {
1415 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1416 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1417 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1418 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1419 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1420 default: assert(0);
d1e4ebd9 1421 }
1422 }
1423 if(regs_saved) {
1424 restore_jump=out;
1425 emit_jmp(0); // jump to reg restore
1426 }
1427 else
1428 emit_jmp(stubs[n].retaddr); // return address
1429 set_jump_target(handler_jump, out);
1430
1431 if(!regs_saved)
1432 save_regs(reglist);
1433 void *handler=NULL;
1434 if(type==LOADB_STUB||type==LOADBU_STUB)
1435 handler=jump_handler_read8;
1436 if(type==LOADH_STUB||type==LOADHU_STUB)
1437 handler=jump_handler_read16;
1438 if(type==LOADW_STUB)
1439 handler=jump_handler_read32;
1440 assert(handler);
1441 pass_args64(rs,temp2);
1442 int cc=get_reg(i_regmap,CCREG);
1443 if(cc<0)
1444 emit_loadreg(CCREG,2);
bb4f300c 1445 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1446 emit_far_call(handler);
d1e4ebd9 1447 // (no cycle reload after read)
1448 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1449 loadstore_extend(type,0,rt);
1450 }
1451 if(restore_jump)
1452 set_jump_target(restore_jump, out);
1453 restore_regs(reglist);
1454 emit_jmp(stubs[n].retaddr);
be516ebe 1455}
1456
81dbbf4c 1457static void inline_readstub(enum stub_type type, int i, u_int addr,
1458 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1459{
d1e4ebd9 1460 int rs=get_reg(regmap,target);
1461 int rt=get_reg(regmap,target);
1462 if(rs<0) rs=get_reg(regmap,-1);
1463 assert(rs>=0);
1464 u_int is_dynamic=0;
1465 uintptr_t host_addr = 0;
1466 void *handler;
1467 int cc=get_reg(regmap,CCREG);
bb4f300c 1468 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
d1e4ebd9 1469 // return;
1470 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1471 if (handler == NULL) {
1472 if(rt<0||rt1[i]==0)
1473 return;
1474 if (addr != host_addr) {
1475 if (host_addr >= 0x100000000ull)
1476 abort(); // ROREG not implemented
1477 emit_movimm_from(addr, rs, host_addr, rs);
1478 }
1479 switch(type) {
1480 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1481 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1482 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1483 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1484 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1485 default: assert(0);
1486 }
1487 return;
1488 }
1489 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1490 if(is_dynamic) {
1491 if(type==LOADB_STUB||type==LOADBU_STUB)
1492 handler=jump_handler_read8;
1493 if(type==LOADH_STUB||type==LOADHU_STUB)
1494 handler=jump_handler_read16;
1495 if(type==LOADW_STUB)
1496 handler=jump_handler_read32;
1497 }
1498
1499 // call a memhandler
1500 if(rt>=0&&rt1[i]!=0)
1501 reglist&=~(1<<rt);
1502 save_regs(reglist);
1503 if(target==0)
1504 emit_movimm(addr,0);
1505 else if(rs!=0)
1506 emit_mov(rs,0);
1507 if(cc<0)
1508 emit_loadreg(CCREG,2);
bb4f300c 1509 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
3968e69e 1510 if(is_dynamic) {
1511 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1512 emit_adrp((void *)l1, 1);
1513 emit_addimm64(1, l1 & 0xfff, 1);
1514 }
d1e4ebd9 1515 else
2a014d73 1516 emit_far_call(do_memhandler_pre);
d1e4ebd9 1517
2a014d73 1518 emit_far_call(handler);
d1e4ebd9 1519
1520 // (no cycle reload after read)
1521 if(rt>=0&&rt1[i]!=0)
1522 loadstore_extend(type, 0, rt);
1523 restore_regs(reglist);
be516ebe 1524}
1525
1526static void do_writestub(int n)
1527{
1528 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1529 set_jump_target(stubs[n].addr, out);
1530 enum stub_type type=stubs[n].type;
1531 int i=stubs[n].a;
1532 int rs=stubs[n].b;
1533 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1534 u_int reglist=stubs[n].e;
1535 signed char *i_regmap=i_regs->regmap;
1536 int rt,r;
1537 if(itype[i]==C1LS||itype[i]==C2LS) {
1538 rt=get_reg(i_regmap,r=FTEMP);
1539 }else{
1540 rt=get_reg(i_regmap,r=rs2[i]);
1541 }
1542 assert(rs>=0);
1543 assert(rt>=0);
1544 int rtmp,temp=-1,temp2,regs_saved=0;
1545 void *restore_jump = NULL, *handler_jump = NULL;
1546 int reglist2=reglist|(1<<rs)|(1<<rt);
1547 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1548 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1549 temp = rtmp;
1550 break;
1551 }
1552 }
1553 if(temp==-1) {
1554 save_regs(reglist);
1555 regs_saved=1;
1556 for(rtmp=0;rtmp<=3;rtmp++)
1557 if(rtmp!=rs&&rtmp!=rt)
1558 {temp=rtmp;break;}
1559 }
1560 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1561 temp2=3;
1562 else {
1563 host_tempreg_acquire();
1564 temp2=HOST_TEMPREG;
1565 }
1566 emit_readdword(&mem_wtab,temp);
1567 emit_shrimm(rs,12,temp2);
1568 emit_readdword_dualindexedx8(temp,temp2,temp2);
1569 emit_adds64(temp2,temp2,temp2);
1570 handler_jump=out;
1571 emit_jc(0);
1572 switch(type) {
1573 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1574 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1575 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1576 default: assert(0);
1577 }
1578 if(regs_saved) {
1579 restore_jump=out;
1580 emit_jmp(0); // jump to reg restore
1581 }
1582 else
1583 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1584 set_jump_target(handler_jump, out);
1585
1586 // TODO FIXME: regalloc should prefer callee-saved regs
1587 if(!regs_saved)
1588 save_regs(reglist);
1589 void *handler=NULL;
1590 switch(type) {
1591 case STOREB_STUB: handler=jump_handler_write8; break;
1592 case STOREH_STUB: handler=jump_handler_write16; break;
1593 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1594 default: assert(0);
d1e4ebd9 1595 }
1596 assert(handler);
1597 pass_args(rs,rt);
1598 if(temp2!=3) {
1599 emit_mov64(temp2,3);
1600 host_tempreg_release();
1601 }
1602 int cc=get_reg(i_regmap,CCREG);
1603 if(cc<0)
1604 emit_loadreg(CCREG,2);
bb4f300c 1605 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
d1e4ebd9 1606 // returns new cycle_count
2a014d73 1607 emit_far_call(handler);
bb4f300c 1608 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
d1e4ebd9 1609 if(cc<0)
1610 emit_storereg(CCREG,2);
1611 if(restore_jump)
1612 set_jump_target(restore_jump, out);
1613 restore_regs(reglist);
1614 emit_jmp(stubs[n].retaddr);
be516ebe 1615}
1616
81dbbf4c 1617static void inline_writestub(enum stub_type type, int i, u_int addr,
1618 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1619{
687b4580 1620 int rs = get_reg(regmap,-1);
1621 int rt = get_reg(regmap,target);
1622 assert(rs >= 0);
1623 assert(rt >= 0);
1624 uintptr_t host_addr = 0;
1625 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1626 if (handler == NULL) {
d1e4ebd9 1627 if (addr != host_addr) {
1628 if (host_addr >= 0x100000000ull)
1629 abort(); // ROREG not implemented
687b4580 1630 emit_movimm_from(addr, rs, host_addr, rs);
d1e4ebd9 1631 }
1632 switch (type) {
687b4580 1633 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1634 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1635 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1636 default: assert(0);
1637 }
1638 return;
1639 }
1640
1641 // call a memhandler
1642 save_regs(reglist);
687b4580 1643 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1644 loadstore_extend(type, rt, 0);
1645 int cc, cc_use;
1646 cc = cc_use = get_reg(regmap, CCREG);
1647 if (cc < 0)
1648 emit_loadreg(CCREG, (cc_use = 2));
bb4f300c 1649 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
d1e4ebd9 1650
2a014d73 1651 emit_far_call(do_memhandler_pre);
1652 emit_far_call(handler);
1653 emit_far_call(do_memhandler_post);
bb4f300c 1654 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
d1e4ebd9 1655 if (cc < 0)
1656 emit_storereg(CCREG, cc_use);
687b4580 1657 restore_regs(reglist);
be516ebe 1658}
1659
3968e69e 1660static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1661{
3968e69e 1662 int ret = memcmp(source, copy, size);
1663 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1664 return ret;
1665}
1666
1667// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1668static void do_dirty_stub_base(u_int vaddr)
1669{
1670 assert(slen <= MAXBLOCK);
1671 emit_loadlp_ofs(0, 0); // ldr x1, source
1672 emit_loadlp_ofs(0, 1); // ldr x2, copy
1673 emit_movz(slen*4, 2);
2a014d73 1674 emit_far_call(verify_code_arm64);
3968e69e 1675 void *jmp = out;
1676 emit_cbz(0, 0);
1677 emit_movz(vaddr & 0xffff, 0);
1678 emit_movk_lsl16(vaddr >> 16, 0);
2a014d73 1679 emit_far_call(get_addr);
3968e69e 1680 emit_jmpreg(0);
1681 set_jump_target(jmp, out);
1682}
1683
1684static void assert_dirty_stub(const u_int *ptr)
1685{
1686 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1687 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1688 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4
1689 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1690}
1691
d1e4ebd9 1692static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1693{
d1e4ebd9 1694 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1695 assert((*loadl & ~0x1f) == 0x58000000);
1696 assert((ofs & 3) == 0);
1697 assert(ofs < 0x100000);
1698 *loadl |= (ofs >> 2) << 5;
1699}
1700
d1e4ebd9 1701static void do_dirty_stub_emit_literals(u_int *loadlps)
1702{
1703 set_loadlp(&loadlps[0], out);
1704 output_w64((uintptr_t)source);
1705 set_loadlp(&loadlps[1], out);
1706 output_w64((uintptr_t)copy);
be516ebe 1707}
1708
d1e4ebd9 1709static void *do_dirty_stub(int i)
be516ebe 1710{
1711 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1712 u_int *loadlps = (void *)out;
3968e69e 1713 do_dirty_stub_base(start + i*4);
d1e4ebd9 1714 void *entry = out;
be516ebe 1715 load_regs_entry(i);
d1e4ebd9 1716 if (entry == out)
1717 entry = instr_addr[i];
1718 emit_jmp(instr_addr[i]);
1719 do_dirty_stub_emit_literals(loadlps);
1720 return entry;
be516ebe 1721}
1722
3968e69e 1723static void do_dirty_stub_ds(void)
be516ebe 1724{
d1e4ebd9 1725 u_int *loadlps = (void *)out;
3968e69e 1726 do_dirty_stub_base(start + 1);
1727 void *lit_jumpover = out;
d1e4ebd9 1728 emit_jmp(out + 8*2);
1729 do_dirty_stub_emit_literals(loadlps);
3968e69e 1730 set_jump_target(lit_jumpover, out);
be516ebe 1731}
1732
3968e69e 1733static uint64_t get_from_ldr_literal(const u_int *i)
1734{
1735 signed int ofs;
1736 assert((i[0] & 0xff000000) == 0x58000000);
1737 ofs = i[0] << 8;
1738 ofs >>= 5+8;
1739 return *(uint64_t *)(i + ofs);
1740}
be516ebe 1741
3968e69e 1742static uint64_t get_from_movz(const u_int *i)
1743{
1744 assert((i[0] & 0x7fe00000) == 0x52800000);
1745 return (i[0] >> 5) & 0xffff;
1746}
be516ebe 1747
3968e69e 1748// Find the "clean" entry point from a "dirty" entry point
1749// by skipping past the call to verify_code
1750static void *get_clean_addr(u_int *addr)
be516ebe 1751{
3968e69e 1752 assert_dirty_stub(addr);
1753 return addr + 9;
be516ebe 1754}
be516ebe 1755
3968e69e 1756static int verify_dirty(const u_int *ptr)
be516ebe 1757{
3968e69e 1758 const void *source, *copy;
1759 u_int len;
1760 assert_dirty_stub(ptr);
1761 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1762 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1763 len = get_from_movz(&ptr[2]); // movz w3, #slen*4
1764 return !memcmp(source, copy, len);
1765}
1766
1767static int isclean(void *addr)
1768{
1769 const u_int *ptr = addr;
1770 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1771 assert_dirty_stub(ptr);
1772 return 0;
1773 }
1774 return 1;
1775}
1776
1777// get source that block at addr was compiled from (host pointers)
1778static void get_bounds(void *addr, u_char **start, u_char **end)
1779{
1780 const u_int *ptr = addr;
1781 assert_dirty_stub(ptr);
1782 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1783 *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4
1784}
1785
1786/* Special assem */
1787
81dbbf4c 1788static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1789{
1790 save_load_regs_all(1, reglist);
81dbbf4c 1791 cop2_call_stall_check(op, i, i_regs, 0);
3968e69e 1792#ifdef PCNT
1793 emit_movimm(op, 0);
2a014d73 1794 emit_far_call(pcnt_gte_start);
3968e69e 1795#endif
1796 // pointer to cop2 regs
1797 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1798}
1799
1800static void c2op_epilogue(u_int op,u_int reglist)
1801{
1802#ifdef PCNT
1803 emit_movimm(op, 0);
2a014d73 1804 emit_far_call(pcnt_gte_end);
3968e69e 1805#endif
1806 save_load_regs_all(0, reglist);
be516ebe 1807}
1808
81dbbf4c 1809static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1810{
3968e69e 1811 u_int c2op=source[i]&0x3f;
1812 u_int hr,reglist_full=0,reglist;
1813 int need_flags,need_ir;
1814 for(hr=0;hr<HOST_REGS;hr++) {
1815 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1816 }
1817 reglist=reglist_full&CALLER_SAVE_REGS;
1818
1819 if (gte_handlers[c2op]!=NULL) {
1820 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1821 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1822 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1823 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1824 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1825 need_flags=0;
1826 //int shift = (source[i] >> 19) & 1;
1827 //int lm = (source[i] >> 10) & 1;
1828 switch(c2op) {
1829 default:
1830 (void)need_ir;
81dbbf4c 1831 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1832 emit_movimm(source[i],1); // opcode
1833 emit_writeword(1,&psxRegs.code);
2a014d73 1834 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1835 break;
1836 }
1837 c2op_epilogue(c2op,reglist);
1838 }
1839}
1840
1841static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1842{
1843 //value = value & 0x7ffff000;
1844 //if (value & 0x7f87e000) value |= 0x80000000;
1845 emit_andimm(sl, 0x7fffe000, temp);
1846 emit_testimm(temp, 0xff87ffff);
1847 emit_andimm(sl, 0x7ffff000, temp);
1848 host_tempreg_acquire();
1849 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1850 emit_cmovne_reg(HOST_TEMPREG, temp);
1851 host_tempreg_release();
1852 assert(0); // testing needed
1853}
1854
1855static void do_mfc2_31_one(u_int copr,signed char temp)
1856{
1857 emit_readshword(&reg_cop2d[copr],temp);
1858 emit_bicsar_imm(temp,31,temp);
1859 emit_cmpimm(temp,0xf80);
1860 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1861 emit_andimm(temp,0xf80,temp);
1862}
1863
1864static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1865{
1866 if (temp < 0) {
1867 host_tempreg_acquire();
1868 temp = HOST_TEMPREG;
1869 }
1870 do_mfc2_31_one(9,temp);
1871 emit_shrimm(temp,7,tl);
1872 do_mfc2_31_one(10,temp);
1873 emit_orrshr_imm(temp,2,tl);
1874 do_mfc2_31_one(11,temp);
1875 emit_orrshl_imm(temp,3,tl);
1876 emit_writeword(tl,&reg_cop2d[29]);
1877
1878 if (temp == HOST_TEMPREG)
1879 host_tempreg_release();
be516ebe 1880}
1881
1882static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1883{
3968e69e 1884 // case 0x18: MULT
1885 // case 0x19: MULTU
1886 // case 0x1A: DIV
1887 // case 0x1B: DIVU
1888 if(rs1[i]&&rs2[i])
1889 {
1890 switch(opcode2[i])
1891 {
1892 case 0x18: // MULT
1893 case 0x19: // MULTU
1894 {
1895 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1896 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1897 signed char hi=get_reg(i_regs->regmap,HIREG);
1898 signed char lo=get_reg(i_regs->regmap,LOREG);
1899 assert(m1>=0);
1900 assert(m2>=0);
1901 assert(hi>=0);
1902 assert(lo>=0);
1903
1904 if(opcode2[i]==0x18) // MULT
1905 emit_smull(m1,m2,hi);
1906 else // MULTU
1907 emit_umull(m1,m2,hi);
1908
1909 emit_mov(hi,lo);
1910 emit_shrimm64(hi,32,hi);
1911 break;
1912 }
1913 case 0x1A: // DIV
1914 case 0x1B: // DIVU
1915 {
1916 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1917 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1918 signed char quotient=get_reg(i_regs->regmap,LOREG);
1919 signed char remainder=get_reg(i_regs->regmap,HIREG);
1920 assert(numerator>=0);
1921 assert(denominator>=0);
1922 assert(quotient>=0);
1923 assert(remainder>=0);
1924
1925 if (opcode2[i] == 0x1A) // DIV
1926 emit_sdiv(numerator,denominator,quotient);
1927 else // DIVU
1928 emit_udiv(numerator,denominator,quotient);
1929 emit_msub(quotient,denominator,numerator,remainder);
1930
1931 // div 0 quotient (remainder is already correct)
1932 host_tempreg_acquire();
1933 if (opcode2[i] == 0x1A) // DIV
1934 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1935 else
1936 emit_movimm(~0,HOST_TEMPREG);
1937 emit_test(denominator,denominator);
1938 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1939 host_tempreg_release();
1940 break;
1941 }
1942 default:
1943 assert(0);
1944 }
1945 }
1946 else
1947 {
1948 signed char hr=get_reg(i_regs->regmap,HIREG);
1949 signed char lr=get_reg(i_regs->regmap,LOREG);
1950 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1951 {
1952 if (rs1[i]) {
1953 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1954 assert(numerator >= 0);
1955 if (hr >= 0)
1956 emit_mov(numerator,hr);
1957 if (lr >= 0) {
1958 if (opcode2[i] == 0x1A) // DIV
1959 emit_sub_asrimm(0,numerator,31,lr);
1960 else
1961 emit_movimm(~0,lr);
1962 }
1963 }
1964 else {
1965 if (hr >= 0) emit_zeroreg(hr);
1966 if (lr >= 0) emit_movimm(~0,lr);
1967 }
1968 }
1969 else
1970 {
1971 // Multiply by zero is zero.
1972 if (hr >= 0) emit_zeroreg(hr);
1973 if (lr >= 0) emit_zeroreg(lr);
1974 }
1975 }
be516ebe 1976}
1977#define multdiv_assemble multdiv_assemble_arm64
1978
d1e4ebd9 1979static void do_jump_vaddr(u_int rs)
1980{
1981 if (rs != 0)
1982 emit_mov(rs, 0);
2a014d73 1983 emit_far_call(get_addr_ht);
d1e4ebd9 1984 emit_jmpreg(0);
1985}
1986
be516ebe 1987static void do_preload_rhash(u_int r) {
1988 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1989 // register. On ARM the hash can be done with a single instruction (below)
1990}
1991
1992static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1993 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1994}
1995
1996static void do_rhash(u_int rs,u_int rh) {
1997 emit_andimm(rs, 0xf8, rh);
1998}
1999
d1e4ebd9 2000static void do_miniht_load(int ht, u_int rh) {
2001 emit_add64(ht, rh, ht);
2002 emit_ldst(0, 0, rh, ht, 0);
be516ebe 2003}
2004
d1e4ebd9 2005static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2006 emit_cmp(rh, rs);
2007 void *jaddr = out;
2008 emit_jeq(0);
2009 do_jump_vaddr(rs);
2010
2011 set_jump_target(jaddr, out);
2012 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2013 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2014 emit_jmpreg(ht);
be516ebe 2015}
2016
d1e4ebd9 2017// parsed by set_jump_target?
be516ebe 2018static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2019 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2020 emit_movk(return_address&0xffff,rt);
2021 add_to_linker(out,return_address,1);
2022 emit_adr(out,temp);
2023 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2024 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2025}
2026
919981d0 2027static void clear_cache_arm64(char *start, char *end)
be516ebe 2028{
919981d0 2029 // Don't rely on GCC's __clear_cache implementation, as it caches
2030 // icache/dcache cache line sizes, that can vary between cores on
2031 // big.LITTLE architectures.
2032 uint64_t addr, ctr_el0;
2033 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2034 size_t isize, dsize;
2035
2036 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2037 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2038 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2039
2040 // use the global minimum cache line size
2041 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2042 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2043
2044 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2045 not required for instruction to data coherence. */
2046 if ((ctr_el0 & (1 << 28)) == 0x0) {
2047 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2048 for (; addr < (uint64_t)end; addr += dsize)
2049 // use "civac" instead of "cvau", as this is the suggested workaround for
2050 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2051 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2052 }
919981d0 2053 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2054
919981d0 2055 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2056 Unification is not required for instruction to data coherence. */
2057 if ((ctr_el0 & (1 << 29)) == 0x0) {
2058 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2059 for (; addr < (uint64_t)end; addr += isize)
2060 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2061
2062 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2063 }
919981d0 2064
2065 __asm__ volatile("isb" : : : "memory");
be516ebe 2066}
2067
2068// CPU-architecture-specific initialization
2a014d73 2069static void arch_init(void)
2070{
2071 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2072 struct tramp_insns *ops = ndrc->tramp.ops;
2073 size_t i;
2074 assert(!(diff & 3));
2075 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2076 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2077 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2078 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2079 }
2080 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2081}
2082
2083// vim:shiftwidth=2:expandtab