drc/gte: add some stall handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define CALLER_SAVE_REGS 0x0007ffff
27
28#define unused __attribute__((unused))
29
d1e4ebd9 30void do_memhandler_pre();
31void do_memhandler_post();
be516ebe 32
33/* Linker */
d1e4ebd9 34static void set_jump_target(void *addr, void *target)
be516ebe 35{
d1e4ebd9 36 u_int *ptr = addr;
37 intptr_t offset = (u_char *)target - (u_char *)addr;
38
3968e69e 39 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
3968e69e 43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_link)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 49 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
3968e69e 52 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
3968e69e 58 abort(); // should not happen
be516ebe 59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
d1e4ebd9 65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
be516ebe 69}
70
71// find where external branch is liked to using addr of it's stub:
3968e69e 72// get address that the stub loads (dyna_linker arg1),
be516ebe 73// treat it as a pointer to branch insn,
74// return addr where that branch jumps to
75static void *get_pointer(void *stub)
76{
d1e4ebd9 77 int *i_ptr = find_extjump_insn(stub);
3968e69e 78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 83 assert(0);
84 return NULL;
85}
86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
d1e4ebd9 149static void output_w64(uint64_t dword)
150{
151 *((uint64_t *)out) = dword;
152 out+=8;
153}
154
155/*
687b4580 156static u_int rm_rd(u_int rm, u_int rd)
157{
158 assert(rm < 31);
159 assert(rd < 31);
160 return (rm << 16) | rd;
161}
d1e4ebd9 162*/
687b4580 163
3968e69e 164static u_int rn_rd(u_int rn, u_int rd)
165{
166 assert(rn < 31);
167 assert(rd < 31);
168 return (rn << 5) | rd;
169}
170
be516ebe 171static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
172{
d1e4ebd9 173 assert(rm < 32);
174 assert(rn < 32);
175 assert(rd < 32);
be516ebe 176 return (rm << 16) | (rn << 5) | rd;
177}
178
3968e69e 179static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180{
181 assert(ra < 32);
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183}
184
d1e4ebd9 185static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
186{
187 assert(imm7 < 0x80);
188 assert(rt2 < 31);
189 assert(rn < 32);
190 assert(rt < 31);
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192}
193
687b4580 194static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195{
196 assert(imm6 <= 63);
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198}
199
be516ebe 200static u_int imm16_rd(u_int imm16, u_int rd)
201{
202 assert(imm16 < 0x10000);
203 assert(rd < 31);
204 return (imm16 << 5) | rd;
205}
206
687b4580 207static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
208{
209 assert(imm12 < 0x1000);
d1e4ebd9 210 assert(rn < 32);
211 assert(rd < 32);
212 return (imm12 << 10) | (rn << 5) | rd;
213}
214
215static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
216{
217 assert(imm9 < 0x200);
687b4580 218 assert(rn < 31);
219 assert(rd < 31);
d1e4ebd9 220 return (imm9 << 12) | (rn << 5) | rd;
687b4580 221}
222
d1e4ebd9 223static u_int imm19_rt(u_int imm19, u_int rt)
224{
225 assert(imm19 < 0x80000);
226 assert(rt < 31);
227 return (imm19 << 5) | rt;
228}
229
230static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
231{
232 assert(n < 2);
233 assert(immr < 0x40);
234 assert(imms < 0x40);
235 assert(rn < 32);
236 assert(rd < 32);
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238}
239
240static u_int genjmp(const u_char *addr)
be516ebe 241{
242 intptr_t offset = addr - out;
d1e4ebd9 243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 244 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
be516ebe 247 return 0;
248 }
d1e4ebd9 249 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 250}
251
d1e4ebd9 252static u_int genjmpcc(const u_char *addr)
be516ebe 253{
254 intptr_t offset = addr - out;
d1e4ebd9 255 if ((uintptr_t)addr < 3) return 0;
be516ebe 256 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
258 abort();
259 return 0;
260 }
261 return ((u_int)offset >> 2) & 0x7ffff;
262}
263
264static uint32_t is_mask(u_int value)
265{
266 return value && ((value + 1) & value) == 0;
267}
268
269// This function returns true if the argument contains a
270// non-empty sequence of ones (possibly rotated) with the remainder zero.
271static uint32_t is_rotated_mask(u_int value)
272{
3968e69e 273 if (value == 0 || value == ~0)
be516ebe 274 return 0;
d1e4ebd9 275 if (is_mask((value - 1) | value))
276 return 1;
277 return is_mask((~value - 1) | ~value);
278}
279
280static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
281{
282 int lzeros, tzeros, ones;
283 assert(value != 0);
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
289 *imms = ones - 1;
290 return;
be516ebe 291 }
d1e4ebd9 292 value = ~value;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
3968e69e 297 *immr = lzeros;
d1e4ebd9 298 *imms = 31 - ones;
299 return;
300 }
3968e69e 301 abort();
be516ebe 302}
303
304static void emit_mov(u_int rs, u_int rt)
305{
687b4580 306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308}
309
310static void emit_mov64(u_int rs, u_int rt)
311{
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 314}
315
687b4580 316static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 317{
d1e4ebd9 318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 320}
321
d1e4ebd9 322static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 323{
d1e4ebd9 324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 326}
327
d1e4ebd9 328static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 329{
3968e69e 330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
332}
333
334static void emit_neg(u_int rs, u_int rt)
335{
336 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
337 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 338}
339
687b4580 340static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 341{
d1e4ebd9 342 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 343 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 344}
345
3968e69e 346static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
347{
348 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
349 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
350}
351
d1e4ebd9 352static void emit_movz(u_int imm, u_int rt)
be516ebe 353{
d1e4ebd9 354 assem_debug("movz %s,#%#x\n", regname[rt], imm);
355 output_w32(0x52800000 | imm16_rd(imm, rt));
356}
357
358static void emit_movz_lsl16(u_int imm, u_int rt)
359{
360 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x52a00000 | imm16_rd(imm, rt));
362}
363
364static void emit_movn(u_int imm, u_int rt)
365{
366 assem_debug("movn %s,#%#x\n", regname[rt], imm);
367 output_w32(0x12800000 | imm16_rd(imm, rt));
368}
369
370static void emit_movn_lsl16(u_int imm,u_int rt)
371{
372 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
373 output_w32(0x12a00000 | imm16_rd(imm, rt));
374}
375
376static void emit_movk(u_int imm,u_int rt)
377{
378 assem_debug("movk %s,#%#x\n", regname[rt], imm);
379 output_w32(0x72800000 | imm16_rd(imm, rt));
380}
381
382static void emit_movk_lsl16(u_int imm,u_int rt)
383{
384 assert(imm<65536);
3968e69e 385 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 386 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 387}
388
389static void emit_zeroreg(u_int rt)
390{
d1e4ebd9 391 emit_movz(0, rt);
be516ebe 392}
393
be516ebe 394static void emit_movimm(u_int imm, u_int rt)
395{
d1e4ebd9 396 if (imm < 65536)
397 emit_movz(imm, rt);
398 else if ((~imm) < 65536)
399 emit_movn(~imm, rt);
400 else if ((imm&0xffff) == 0)
401 emit_movz_lsl16(imm >> 16, rt);
402 else if (((~imm)&0xffff) == 0)
403 emit_movn_lsl16(~imm >> 16, rt);
404 else if (is_rotated_mask(imm)) {
405 u_int immr, imms;
406 gen_logical_imm(imm, &immr, &imms);
407 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
408 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
409 }
be516ebe 410 else {
d1e4ebd9 411 emit_movz(imm & 0xffff, rt);
412 emit_movk_lsl16(imm >> 16, rt);
be516ebe 413 }
414}
415
687b4580 416static void emit_readword(void *addr, u_int rt)
417{
418 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
419 if (!(offset & 3) && offset <= 16380) {
420 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
421 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
422 }
423 else
3968e69e 424 abort();
687b4580 425}
426
d1e4ebd9 427static void emit_readdword(void *addr, u_int rt)
428{
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 7) && offset <= 32760) {
431 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
432 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
433 }
3968e69e 434 else
435 abort();
436}
437
438static void emit_readshword(void *addr, u_int rt)
439{
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
444 }
d1e4ebd9 445 else
446 assert(0);
447}
448
be516ebe 449static void emit_loadreg(u_int r, u_int hr)
450{
d1e4ebd9 451 int is64 = 0;
be516ebe 452 assert(r < 64);
453 if (r == 0)
454 emit_zeroreg(hr);
455 else {
7c3a5182 456 void *addr = &psxRegs.GPR.r[r];
be516ebe 457 switch (r) {
7c3a5182 458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
be516ebe 460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
d1e4ebd9 462 case INVCP: addr = &invc_ptr; is64 = 1; break;
7c3a5182 463 default: assert(r < 34); break;
be516ebe 464 }
d1e4ebd9 465 if (is64)
466 emit_readdword(addr, hr);
467 else
468 emit_readword(addr, hr);
be516ebe 469 }
470}
471
687b4580 472static void emit_writeword(u_int rt, void *addr)
473{
474 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
475 if (!(offset & 3) && offset <= 16380) {
476 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
477 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
478 }
479 else
480 assert(0);
481}
482
d1e4ebd9 483static void emit_writedword(u_int rt, void *addr)
484{
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 7) && offset <= 32760) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 488 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 489 }
490 else
3968e69e 491 abort();
d1e4ebd9 492}
493
687b4580 494static void emit_storereg(u_int r, u_int hr)
be516ebe 495{
496 assert(r < 64);
7c3a5182 497 void *addr = &psxRegs.GPR.r[r];
be516ebe 498 switch (r) {
7c3a5182 499 //case HIREG: addr = &hi; break;
500 //case LOREG: addr = &lo; break;
be516ebe 501 case CCREG: addr = &cycle_count; break;
7c3a5182 502 default: assert(r < 34); break;
be516ebe 503 }
687b4580 504 emit_writeword(hr, addr);
be516ebe 505}
506
507static void emit_test(u_int rs, u_int rt)
508{
d1e4ebd9 509 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
510 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 511}
512
d1e4ebd9 513static void emit_testimm(u_int rs, u_int imm)
be516ebe 514{
d1e4ebd9 515 u_int immr, imms;
687b4580 516 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 517 assert(is_rotated_mask(imm)); // good enough for PCSX
518 gen_logical_imm(imm, &immr, &imms);
3968e69e 519 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 520}
521
522static void emit_not(u_int rs,u_int rt)
523{
524 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 525 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 526}
527
be516ebe 528static void emit_and(u_int rs1,u_int rs2,u_int rt)
529{
530 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 531 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 532}
533
534static void emit_or(u_int rs1,u_int rs2,u_int rt)
535{
536 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 537 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 538}
539
3968e69e 540static void emit_bic(u_int rs1,u_int rs2,u_int rt)
541{
542 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
543 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
544}
545
be516ebe 546static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
547{
be516ebe 548 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 549 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 550}
551
552static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
553{
be516ebe 554 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 555 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 556}
557
3968e69e 558static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
559{
560 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
561 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
562}
563
be516ebe 564static void emit_xor(u_int rs1,u_int rs2,u_int rt)
565{
566 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 567 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 568}
569
3968e69e 570static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
571{
572 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
573 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
574}
575
d1e4ebd9 576static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 577{
d1e4ebd9 578 unused const char *st = s ? "s" : "";
579 s = s ? 0x20000000 : 0;
580 is64 = is64 ? 0x80000000 : 0;
687b4580 581 if (imm < 4096) {
d1e4ebd9 582 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
583 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 584 }
585 else if (-imm < 4096) {
3968e69e 586 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 587 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
588 }
589 else if (imm < 16777216) {
590 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
591 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 595 }
596 }
597 else if (-imm < 16777216) {
598 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
599 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
602 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
603 }
687b4580 604 }
605 else
3968e69e 606 abort();
be516ebe 607}
608
d1e4ebd9 609static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
610{
611 emit_addimm_s(0, 0, rs, imm, rt);
612}
613
614static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
615{
616 emit_addimm_s(0, 1, rs, imm, rt);
617}
618
be516ebe 619static void emit_addimm_and_set_flags(int imm, u_int rt)
620{
d1e4ebd9 621 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 622}
623
624static void emit_addimm_no_flags(u_int imm,u_int rt)
625{
626 emit_addimm(rt,imm,rt);
627}
628
d1e4ebd9 629static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 630{
d1e4ebd9 631 const char *names[] = { "and", "orr", "eor", "ands" };
632 const char *name = names[op];
633 u_int immr, imms;
634 op = op << 29;
635 if (is_rotated_mask(imm)) {
636 gen_logical_imm(imm, &immr, &imms);
637 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
638 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
639 }
640 else {
641 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
642 host_tempreg_acquire();
643 emit_movimm(imm, HOST_TEMPREG);
644 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
645 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
646 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
647 host_tempreg_release();
648 }
649 (void)name;
be516ebe 650}
651
d1e4ebd9 652static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 653{
d1e4ebd9 654 if (imm == 0)
655 emit_zeroreg(rt);
656 else
657 emit_logicop_imm(0, rs, imm, rt);
be516ebe 658}
659
d1e4ebd9 660static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 661{
d1e4ebd9 662 if (imm == 0) {
663 if (rs != rt)
664 emit_mov(rs, rt);
665 }
666 else
667 emit_logicop_imm(1, rs, imm, rt);
be516ebe 668}
669
d1e4ebd9 670static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 671{
d1e4ebd9 672 if (imm == 0) {
673 if (rs != rt)
674 emit_mov(rs, rt);
675 }
676 else
677 emit_logicop_imm(2, rs, imm, rt);
be516ebe 678}
679
d1e4ebd9 680static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 681{
d1e4ebd9 682 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 684}
685
d1e4ebd9 686static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 687{
d1e4ebd9 688 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 690}
691
692static void emit_shlimm(u_int rs,u_int imm,u_int rt)
693{
be516ebe 694 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 696}
697
3968e69e 698static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 699{
3968e69e 700 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 702}
703
3968e69e 704static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 705{
be516ebe 706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 707 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 708}
709
710static void emit_sarimm(u_int rs,u_int imm,u_int rt)
711{
be516ebe 712 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 713 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 714}
715
716static void emit_rorimm(u_int rs,u_int imm,u_int rt)
717{
3968e69e 718 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 719 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 720}
721
722static void emit_signextend16(u_int rs, u_int rt)
723{
724 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 725 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 726}
727
d1e4ebd9 728static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 729{
3968e69e 730 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 731 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 732}
733
d1e4ebd9 734static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 735{
d1e4ebd9 736 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 738}
739
d1e4ebd9 740static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 741{
d1e4ebd9 742 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 744}
745
d1e4ebd9 746static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 747{
d1e4ebd9 748 if (imm < 4096) {
749 assem_debug("cmp %s,%#x\n", regname[rs], imm);
750 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
751 }
752 else if (-imm < 4096) {
753 assem_debug("cmn %s,%#x\n", regname[rs], imm);
754 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
755 }
756 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 757 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 758 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
759 }
760 else {
761 host_tempreg_acquire();
762 emit_movimm(imm, HOST_TEMPREG);
763 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
764 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
765 host_tempreg_release();
766 }
be516ebe 767}
768
d1e4ebd9 769static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 770{
d1e4ebd9 771 assert(imm == 0 || imm == 1);
772 assert(cond0 < 0x10);
773 assert(cond1 < 0x10);
774 if (imm) {
775 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
776 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
777 } else {
778 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
779 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
780 }
be516ebe 781}
782
d1e4ebd9 783static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 784{
d1e4ebd9 785 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 786}
787
d1e4ebd9 788static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 789{
d1e4ebd9 790 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 791}
792
793static void emit_cmovb_imm(int imm,u_int rt)
794{
d1e4ebd9 795 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 796}
797
3968e69e 798static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 799{
3968e69e 800 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
801 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 802}
803
804static void emit_cmovne_reg(u_int rs,u_int rt)
805{
d1e4ebd9 806 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 808}
809
810static void emit_cmovl_reg(u_int rs,u_int rt)
811{
d1e4ebd9 812 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 814}
815
816static void emit_cmovs_reg(u_int rs,u_int rt)
817{
d1e4ebd9 818 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 820}
821
3968e69e 822static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
823{
824 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
825 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
826}
827
be516ebe 828static void emit_slti32(u_int rs,int imm,u_int rt)
829{
830 if(rs!=rt) emit_zeroreg(rt);
831 emit_cmpimm(rs,imm);
832 if(rs==rt) emit_movimm(0,rt);
833 emit_cmovl_imm(1,rt);
834}
835
836static void emit_sltiu32(u_int rs,int imm,u_int rt)
837{
838 if(rs!=rt) emit_zeroreg(rt);
839 emit_cmpimm(rs,imm);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovb_imm(1,rt);
842}
843
844static void emit_cmp(u_int rs,u_int rt)
845{
846 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 847 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 848}
849
850static void emit_set_gz32(u_int rs, u_int rt)
851{
852 //assem_debug("set_gz32\n");
853 emit_cmpimm(rs,1);
854 emit_movimm(1,rt);
855 emit_cmovl_imm(0,rt);
856}
857
858static void emit_set_nz32(u_int rs, u_int rt)
859{
860 //assem_debug("set_nz32\n");
d1e4ebd9 861 if(rs!=rt) emit_mov(rs,rt);
862 emit_test(rs,rs);
863 emit_cmovne_imm(1,rt);
be516ebe 864}
865
866static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
867{
868 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
869 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
870 emit_cmp(rs1,rs2);
871 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
872 emit_cmovl_imm(1,rt);
873}
874
875static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
876{
877 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
878 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
879 emit_cmp(rs1,rs2);
880 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
881 emit_cmovb_imm(1,rt);
882}
883
2a014d73 884static int can_jump_or_call(const void *a)
885{
886 intptr_t diff = (u_char *)a - out;
887 return (-134217728 <= diff && diff <= 134217727);
888}
889
d1e4ebd9 890static void emit_call(const void *a)
be516ebe 891{
d1e4ebd9 892 intptr_t diff = (u_char *)a - out;
893 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 894 assert(!(diff & 3));
895 if (-134217728 <= diff && diff <= 134217727)
896 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
897 else
3968e69e 898 abort();
be516ebe 899}
900
d1e4ebd9 901static void emit_jmp(const void *a)
be516ebe 902{
d1e4ebd9 903 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
904 u_int offset = genjmp(a);
905 output_w32(0x14000000 | offset);
be516ebe 906}
907
d1e4ebd9 908static void emit_jne(const void *a)
be516ebe 909{
d1e4ebd9 910 assem_debug("bne %p\n", a);
911 u_int offset = genjmpcc(a);
912 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 913}
914
7c3a5182 915static void emit_jeq(const void *a)
be516ebe 916{
d1e4ebd9 917 assem_debug("beq %p\n", a);
918 u_int offset = genjmpcc(a);
919 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 920}
921
7c3a5182 922static void emit_js(const void *a)
be516ebe 923{
d1e4ebd9 924 assem_debug("bmi %p\n", a);
925 u_int offset = genjmpcc(a);
926 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 927}
928
7c3a5182 929static void emit_jns(const void *a)
be516ebe 930{
d1e4ebd9 931 assem_debug("bpl %p\n", a);
932 u_int offset = genjmpcc(a);
933 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 934}
935
7c3a5182 936static void emit_jl(const void *a)
be516ebe 937{
d1e4ebd9 938 assem_debug("blt %p\n", a);
939 u_int offset = genjmpcc(a);
940 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 941}
942
7c3a5182 943static void emit_jge(const void *a)
be516ebe 944{
d1e4ebd9 945 assem_debug("bge %p\n", a);
946 u_int offset = genjmpcc(a);
947 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 948}
949
7c3a5182 950static void emit_jno(const void *a)
be516ebe 951{
d1e4ebd9 952 assem_debug("bvc %p\n", a);
953 u_int offset = genjmpcc(a);
954 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 955}
956
7c3a5182 957static void emit_jc(const void *a)
be516ebe 958{
d1e4ebd9 959 assem_debug("bcs %p\n", a);
960 u_int offset = genjmpcc(a);
961 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 962}
963
3968e69e 964static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 965{
3968e69e 966 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 967 u_int offset = genjmpcc(a);
3968e69e 968 is64 = is64 ? 0x80000000 : 0;
969 isnz = isnz ? 0x01000000 : 0;
970 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
971}
972
973static void emit_cbz(const void *a, u_int r)
974{
975 emit_cb(0, 0, a, r);
be516ebe 976}
977
978static void emit_jmpreg(u_int r)
979{
3968e69e 980 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 981 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 982}
983
984static void emit_retreg(u_int r)
985{
d1e4ebd9 986 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 987 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
988}
989
990static void emit_ret(void)
991{
992 emit_retreg(LR);
993}
994
d1e4ebd9 995static void emit_adr(void *addr, u_int rt)
996{
997 intptr_t offset = (u_char *)addr - out;
998 assert(-1048576 <= offset && offset < 1048576);
3968e69e 999 assert(rt < 31);
d1e4ebd9 1000 assem_debug("adr x%d,#%#lx\n", rt, offset);
1001 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1002}
1003
3968e69e 1004static void emit_adrp(void *addr, u_int rt)
1005{
1006 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1007 assert(-4294967296l <= offset && offset < 4294967296l);
1008 assert(rt < 31);
1009 offset >>= 12;
1010 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1011 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1012}
1013
be516ebe 1014static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1015{
d1e4ebd9 1016 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1017 assert(-256 <= offset && offset < 256);
1018 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1019}
1020
1021static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1022{
1023 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1024 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1025}
1026
1027static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1028{
1029 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1030 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1031}
1032
1033static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1034{
1035 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1036 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1037}
1038
1039static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1040{
1041 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1043}
1044
1045static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1061}
1062
1063static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064{
1065 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1067}
1068
1069static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070{
1071 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1073}
1074
be516ebe 1075static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1076{
d1e4ebd9 1077 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1080}
1081
1082static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1083{
d1e4ebd9 1084 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1087}
1088
1089static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1090{
d1e4ebd9 1091 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1094}
1095
1096static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1097{
d1e4ebd9 1098 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1101}
1102
be516ebe 1103static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1104{
3968e69e 1105 if (!(offset & 3) && (u_int)offset <= 16380) {
1106 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1107 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1108 }
1109 else if (-256 <= offset && offset < 256) {
1110 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1111 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1112 }
687b4580 1113 else
1114 assert(0);
be516ebe 1115}
1116
1117static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1118{
3968e69e 1119 if (!(offset & 1) && (u_int)offset <= 8190) {
1120 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1121 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1122 }
1123 else if (-256 <= offset && offset < 256) {
1124 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1125 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1126 }
687b4580 1127 else
1128 assert(0);
be516ebe 1129}
1130
1131static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1132{
3968e69e 1133 if ((u_int)offset < 4096) {
1134 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1135 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1136 }
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1139 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 }
687b4580 1141 else
1142 assert(0);
be516ebe 1143}
1144
3968e69e 1145static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1146{
3968e69e 1147 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1149}
1150
3968e69e 1151static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1152{
3968e69e 1153 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1155}
1156
1157static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1158{
1159 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1160 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1161}
1162
1163static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1164{
1165 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1167}
1168
3968e69e 1169static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1170{
1171 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1173}
1174
1175static void emit_clz(u_int rs, u_int rt)
be516ebe 1176{
1177 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1178 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1179}
1180
be516ebe 1181// special case for checking invalid_code
d1e4ebd9 1182static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1183{
d1e4ebd9 1184 host_tempreg_acquire();
1185 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1186 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1187 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1188 emit_cmpimm(HOST_TEMPREG, imm);
1189 host_tempreg_release();
be516ebe 1190}
1191
3968e69e 1192// special for loadlr_assemble, rs2 is destroyed
1193static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1194{
3968e69e 1195 emit_shl(rs2, shift, rs2);
1196 emit_bic(rs1, rs2, rt);
be516ebe 1197}
1198
3968e69e 1199static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1200{
3968e69e 1201 emit_shr(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
be516ebe 1203}
1204
d1e4ebd9 1205static void emit_loadlp_ofs(u_int ofs, u_int rt)
1206{
1207 output_w32(0x58000000 | imm19_rt(ofs, rt));
1208}
1209
687b4580 1210static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1211{
687b4580 1212 u_int op = 0xb9000000;
d1e4ebd9 1213 unused const char *ldst = is_st ? "st" : "ld";
1214 unused char rp = is64 ? 'x' : 'w';
687b4580 1215 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1216 is64 = is64 ? 1 : 0;
1217 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1218 ofs = (ofs >> (2+is64));
687b4580 1219 if (!is_st) op |= 0x00400000;
1220 if (is64) op |= 0x40000000;
d1e4ebd9 1221 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1222}
1223
687b4580 1224static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1225{
687b4580 1226 u_int op = 0x29000000;
d1e4ebd9 1227 unused const char *ldst = is_st ? "st" : "ld";
1228 unused char rp = is64 ? 'x' : 'w';
687b4580 1229 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1230 is64 = is64 ? 1 : 0;
1231 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1232 ofs = (ofs >> (2+is64));
1233 assert(-64 <= ofs && ofs <= 63);
1234 ofs &= 0x7f;
1235 if (!is_st) op |= 0x00400000;
1236 if (is64) op |= 0x80000000;
d1e4ebd9 1237 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1238}
1239
1240static void save_load_regs_all(int is_store, u_int reglist)
1241{
1242 int ofs = 0, c = 0;
1243 u_int r, pair[2];
1244 for (r = 0; reglist; r++, reglist >>= 1) {
1245 if (reglist & 1)
1246 pair[c++] = r;
1247 if (c == 2) {
1248 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1249 ofs += 8 * 2;
1250 c = 0;
1251 }
1252 }
1253 if (c) {
1254 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1255 ofs += 8;
1256 }
1257 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1258}
1259
1260// Save registers before function call
1261static void save_regs(u_int reglist)
1262{
1263 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1264 save_load_regs_all(1, reglist);
be516ebe 1265}
1266
1267// Restore registers after function call
1268static void restore_regs(u_int reglist)
1269{
1270 reglist &= CALLER_SAVE_REGS;
687b4580 1271 save_load_regs_all(0, reglist);
be516ebe 1272}
1273
1274/* Stubs/epilogue */
1275
1276static void literal_pool(int n)
1277{
1278 (void)literals;
1279}
1280
1281static void literal_pool_jumpover(int n)
1282{
1283}
1284
d1e4ebd9 1285// parsed by get_pointer, find_extjump_insn
1286static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1287{
d1e4ebd9 1288 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1289
d1e4ebd9 1290 emit_movz(target & 0xffff, 0);
1291 emit_movk_lsl16(target >> 16, 0);
1292
1293 // addr is in the current recompiled block (max 256k)
1294 // offset shouldn't exceed +/-1MB
1295 emit_adr(addr, 1);
2a014d73 1296 emit_far_jump(linker);
be516ebe 1297}
1298
d1e4ebd9 1299static void check_extjump2(void *src)
be516ebe 1300{
d1e4ebd9 1301 u_int *ptr = src;
1302 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1303 (void)ptr;
be516ebe 1304}
1305
1306// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1307static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1308{
d1e4ebd9 1309 int diff = rt_val - rs_val;
3968e69e 1310 if ((-4096 < diff && diff < 4096)
1311 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1312 emit_addimm(rs, diff, rt);
3968e69e 1313 else if (rt_val == ~rs_val)
1314 emit_not(rs, rt);
d1e4ebd9 1315 else if (is_rotated_mask(rs_val ^ rt_val))
1316 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1317 else
d1e4ebd9 1318 emit_movimm(rt_val, rt);
be516ebe 1319}
1320
d1e4ebd9 1321// return 1 if the above function can do it's job cheaply
687b4580 1322static int is_similar_value(u_int v1, u_int v2)
be516ebe 1323{
687b4580 1324 int diff = v1 - v2;
3968e69e 1325 return (-4096 < diff && diff < 4096)
1326 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1327 || v1 == ~v2
d1e4ebd9 1328 || is_rotated_mask(v1 ^ v2);
1329}
1330
1331// trashes r2
1332static void pass_args64(u_int a0, u_int a1)
1333{
1334 if(a0==1&&a1==0) {
1335 // must swap
1336 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1337 }
1338 else if(a0!=0&&a1==0) {
1339 emit_mov64(a1,1);
1340 if (a0>=0) emit_mov64(a0,0);
1341 }
1342 else {
1343 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1344 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1345 }
be516ebe 1346}
1347
d1e4ebd9 1348static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1349{
1350 switch(type) {
1351 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1352 case LOADBU_STUB:
1353 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1354 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1355 case LOADHU_STUB:
1356 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1357 case LOADW_STUB:
1358 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1359 default: assert(0);
d1e4ebd9 1360 }
1361}
1362
1363#include "pcsxmem.h"
be516ebe 1364//#include "pcsxmem_inline.c"
1365
1366static void do_readstub(int n)
1367{
1368 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1369 set_jump_target(stubs[n].addr, out);
1370 enum stub_type type = stubs[n].type;
1371 int i = stubs[n].a;
1372 int rs = stubs[n].b;
1373 const struct regstat *i_regs = (void *)stubs[n].c;
1374 u_int reglist = stubs[n].e;
1375 const signed char *i_regmap = i_regs->regmap;
1376 int rt;
1377 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1378 rt=get_reg(i_regmap,FTEMP);
1379 }else{
1380 rt=get_reg(i_regmap,rt1[i]);
1381 }
1382 assert(rs>=0);
1383 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1384 void *restore_jump = NULL, *handler_jump = NULL;
1385 reglist|=(1<<rs);
1386 for (r = 0; r < HOST_CCREG; r++) {
1387 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1388 temp = r;
1389 break;
1390 }
1391 }
1392 if(rt>=0&&rt1[i]!=0)
1393 reglist&=~(1<<rt);
1394 if(temp==-1) {
1395 save_regs(reglist);
1396 regs_saved=1;
1397 temp=(rs==0)?2:0;
1398 }
1399 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1400 temp2=1;
1401 emit_readdword(&mem_rtab,temp);
1402 emit_shrimm(rs,12,temp2);
1403 emit_readdword_dualindexedx8(temp,temp2,temp2);
1404 emit_adds64(temp2,temp2,temp2);
1405 handler_jump=out;
1406 emit_jc(0);
1407 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1408 switch(type) {
1409 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1410 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1411 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1412 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1413 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1414 default: assert(0);
d1e4ebd9 1415 }
1416 }
1417 if(regs_saved) {
1418 restore_jump=out;
1419 emit_jmp(0); // jump to reg restore
1420 }
1421 else
1422 emit_jmp(stubs[n].retaddr); // return address
1423 set_jump_target(handler_jump, out);
1424
1425 if(!regs_saved)
1426 save_regs(reglist);
1427 void *handler=NULL;
1428 if(type==LOADB_STUB||type==LOADBU_STUB)
1429 handler=jump_handler_read8;
1430 if(type==LOADH_STUB||type==LOADHU_STUB)
1431 handler=jump_handler_read16;
1432 if(type==LOADW_STUB)
1433 handler=jump_handler_read32;
1434 assert(handler);
1435 pass_args64(rs,temp2);
1436 int cc=get_reg(i_regmap,CCREG);
1437 if(cc<0)
1438 emit_loadreg(CCREG,2);
bb4f300c 1439 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1440 emit_far_call(handler);
d1e4ebd9 1441 // (no cycle reload after read)
1442 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1443 loadstore_extend(type,0,rt);
1444 }
1445 if(restore_jump)
1446 set_jump_target(restore_jump, out);
1447 restore_regs(reglist);
1448 emit_jmp(stubs[n].retaddr);
be516ebe 1449}
1450
81dbbf4c 1451static void inline_readstub(enum stub_type type, int i, u_int addr,
1452 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1453{
d1e4ebd9 1454 int rs=get_reg(regmap,target);
1455 int rt=get_reg(regmap,target);
1456 if(rs<0) rs=get_reg(regmap,-1);
1457 assert(rs>=0);
1458 u_int is_dynamic=0;
1459 uintptr_t host_addr = 0;
1460 void *handler;
1461 int cc=get_reg(regmap,CCREG);
bb4f300c 1462 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
d1e4ebd9 1463 // return;
1464 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1465 if (handler == NULL) {
1466 if(rt<0||rt1[i]==0)
1467 return;
1468 if (addr != host_addr) {
1469 if (host_addr >= 0x100000000ull)
1470 abort(); // ROREG not implemented
1471 emit_movimm_from(addr, rs, host_addr, rs);
1472 }
1473 switch(type) {
1474 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1475 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1476 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1477 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1478 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1479 default: assert(0);
1480 }
1481 return;
1482 }
1483 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1484 if(is_dynamic) {
1485 if(type==LOADB_STUB||type==LOADBU_STUB)
1486 handler=jump_handler_read8;
1487 if(type==LOADH_STUB||type==LOADHU_STUB)
1488 handler=jump_handler_read16;
1489 if(type==LOADW_STUB)
1490 handler=jump_handler_read32;
1491 }
1492
1493 // call a memhandler
1494 if(rt>=0&&rt1[i]!=0)
1495 reglist&=~(1<<rt);
1496 save_regs(reglist);
1497 if(target==0)
1498 emit_movimm(addr,0);
1499 else if(rs!=0)
1500 emit_mov(rs,0);
1501 if(cc<0)
1502 emit_loadreg(CCREG,2);
bb4f300c 1503 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
3968e69e 1504 if(is_dynamic) {
1505 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1506 emit_adrp((void *)l1, 1);
1507 emit_addimm64(1, l1 & 0xfff, 1);
1508 }
d1e4ebd9 1509 else
2a014d73 1510 emit_far_call(do_memhandler_pre);
d1e4ebd9 1511
2a014d73 1512 emit_far_call(handler);
d1e4ebd9 1513
1514 // (no cycle reload after read)
1515 if(rt>=0&&rt1[i]!=0)
1516 loadstore_extend(type, 0, rt);
1517 restore_regs(reglist);
be516ebe 1518}
1519
1520static void do_writestub(int n)
1521{
1522 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1523 set_jump_target(stubs[n].addr, out);
1524 enum stub_type type=stubs[n].type;
1525 int i=stubs[n].a;
1526 int rs=stubs[n].b;
1527 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1528 u_int reglist=stubs[n].e;
1529 signed char *i_regmap=i_regs->regmap;
1530 int rt,r;
1531 if(itype[i]==C1LS||itype[i]==C2LS) {
1532 rt=get_reg(i_regmap,r=FTEMP);
1533 }else{
1534 rt=get_reg(i_regmap,r=rs2[i]);
1535 }
1536 assert(rs>=0);
1537 assert(rt>=0);
1538 int rtmp,temp=-1,temp2,regs_saved=0;
1539 void *restore_jump = NULL, *handler_jump = NULL;
1540 int reglist2=reglist|(1<<rs)|(1<<rt);
1541 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1542 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1543 temp = rtmp;
1544 break;
1545 }
1546 }
1547 if(temp==-1) {
1548 save_regs(reglist);
1549 regs_saved=1;
1550 for(rtmp=0;rtmp<=3;rtmp++)
1551 if(rtmp!=rs&&rtmp!=rt)
1552 {temp=rtmp;break;}
1553 }
1554 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1555 temp2=3;
1556 else {
1557 host_tempreg_acquire();
1558 temp2=HOST_TEMPREG;
1559 }
1560 emit_readdword(&mem_wtab,temp);
1561 emit_shrimm(rs,12,temp2);
1562 emit_readdword_dualindexedx8(temp,temp2,temp2);
1563 emit_adds64(temp2,temp2,temp2);
1564 handler_jump=out;
1565 emit_jc(0);
1566 switch(type) {
1567 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1568 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1569 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1570 default: assert(0);
1571 }
1572 if(regs_saved) {
1573 restore_jump=out;
1574 emit_jmp(0); // jump to reg restore
1575 }
1576 else
1577 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1578 set_jump_target(handler_jump, out);
1579
1580 // TODO FIXME: regalloc should prefer callee-saved regs
1581 if(!regs_saved)
1582 save_regs(reglist);
1583 void *handler=NULL;
1584 switch(type) {
1585 case STOREB_STUB: handler=jump_handler_write8; break;
1586 case STOREH_STUB: handler=jump_handler_write16; break;
1587 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1588 default: assert(0);
d1e4ebd9 1589 }
1590 assert(handler);
1591 pass_args(rs,rt);
1592 if(temp2!=3) {
1593 emit_mov64(temp2,3);
1594 host_tempreg_release();
1595 }
1596 int cc=get_reg(i_regmap,CCREG);
1597 if(cc<0)
1598 emit_loadreg(CCREG,2);
bb4f300c 1599 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
d1e4ebd9 1600 // returns new cycle_count
2a014d73 1601 emit_far_call(handler);
bb4f300c 1602 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
d1e4ebd9 1603 if(cc<0)
1604 emit_storereg(CCREG,2);
1605 if(restore_jump)
1606 set_jump_target(restore_jump, out);
1607 restore_regs(reglist);
1608 emit_jmp(stubs[n].retaddr);
be516ebe 1609}
1610
81dbbf4c 1611static void inline_writestub(enum stub_type type, int i, u_int addr,
1612 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1613{
687b4580 1614 int rs = get_reg(regmap,-1);
1615 int rt = get_reg(regmap,target);
1616 assert(rs >= 0);
1617 assert(rt >= 0);
1618 uintptr_t host_addr = 0;
1619 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1620 if (handler == NULL) {
d1e4ebd9 1621 if (addr != host_addr) {
1622 if (host_addr >= 0x100000000ull)
1623 abort(); // ROREG not implemented
687b4580 1624 emit_movimm_from(addr, rs, host_addr, rs);
d1e4ebd9 1625 }
1626 switch (type) {
687b4580 1627 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1628 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1629 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1630 default: assert(0);
1631 }
1632 return;
1633 }
1634
1635 // call a memhandler
1636 save_regs(reglist);
687b4580 1637 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1638 loadstore_extend(type, rt, 0);
1639 int cc, cc_use;
1640 cc = cc_use = get_reg(regmap, CCREG);
1641 if (cc < 0)
1642 emit_loadreg(CCREG, (cc_use = 2));
bb4f300c 1643 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
d1e4ebd9 1644
2a014d73 1645 emit_far_call(do_memhandler_pre);
1646 emit_far_call(handler);
1647 emit_far_call(do_memhandler_post);
bb4f300c 1648 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
d1e4ebd9 1649 if (cc < 0)
1650 emit_storereg(CCREG, cc_use);
687b4580 1651 restore_regs(reglist);
be516ebe 1652}
1653
3968e69e 1654static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1655{
3968e69e 1656 int ret = memcmp(source, copy, size);
1657 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1658 return ret;
1659}
1660
1661// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1662static void do_dirty_stub_base(u_int vaddr)
1663{
1664 assert(slen <= MAXBLOCK);
1665 emit_loadlp_ofs(0, 0); // ldr x1, source
1666 emit_loadlp_ofs(0, 1); // ldr x2, copy
1667 emit_movz(slen*4, 2);
2a014d73 1668 emit_far_call(verify_code_arm64);
3968e69e 1669 void *jmp = out;
1670 emit_cbz(0, 0);
1671 emit_movz(vaddr & 0xffff, 0);
1672 emit_movk_lsl16(vaddr >> 16, 0);
2a014d73 1673 emit_far_call(get_addr);
3968e69e 1674 emit_jmpreg(0);
1675 set_jump_target(jmp, out);
1676}
1677
1678static void assert_dirty_stub(const u_int *ptr)
1679{
1680 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1681 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1682 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4
1683 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1684}
1685
d1e4ebd9 1686static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1687{
d1e4ebd9 1688 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1689 assert((*loadl & ~0x1f) == 0x58000000);
1690 assert((ofs & 3) == 0);
1691 assert(ofs < 0x100000);
1692 *loadl |= (ofs >> 2) << 5;
1693}
1694
d1e4ebd9 1695static void do_dirty_stub_emit_literals(u_int *loadlps)
1696{
1697 set_loadlp(&loadlps[0], out);
1698 output_w64((uintptr_t)source);
1699 set_loadlp(&loadlps[1], out);
1700 output_w64((uintptr_t)copy);
be516ebe 1701}
1702
d1e4ebd9 1703static void *do_dirty_stub(int i)
be516ebe 1704{
1705 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1706 u_int *loadlps = (void *)out;
3968e69e 1707 do_dirty_stub_base(start + i*4);
d1e4ebd9 1708 void *entry = out;
be516ebe 1709 load_regs_entry(i);
d1e4ebd9 1710 if (entry == out)
1711 entry = instr_addr[i];
1712 emit_jmp(instr_addr[i]);
1713 do_dirty_stub_emit_literals(loadlps);
1714 return entry;
be516ebe 1715}
1716
3968e69e 1717static void do_dirty_stub_ds(void)
be516ebe 1718{
d1e4ebd9 1719 u_int *loadlps = (void *)out;
3968e69e 1720 do_dirty_stub_base(start + 1);
1721 void *lit_jumpover = out;
d1e4ebd9 1722 emit_jmp(out + 8*2);
1723 do_dirty_stub_emit_literals(loadlps);
3968e69e 1724 set_jump_target(lit_jumpover, out);
be516ebe 1725}
1726
3968e69e 1727static uint64_t get_from_ldr_literal(const u_int *i)
1728{
1729 signed int ofs;
1730 assert((i[0] & 0xff000000) == 0x58000000);
1731 ofs = i[0] << 8;
1732 ofs >>= 5+8;
1733 return *(uint64_t *)(i + ofs);
1734}
be516ebe 1735
3968e69e 1736static uint64_t get_from_movz(const u_int *i)
1737{
1738 assert((i[0] & 0x7fe00000) == 0x52800000);
1739 return (i[0] >> 5) & 0xffff;
1740}
be516ebe 1741
3968e69e 1742// Find the "clean" entry point from a "dirty" entry point
1743// by skipping past the call to verify_code
1744static void *get_clean_addr(u_int *addr)
be516ebe 1745{
3968e69e 1746 assert_dirty_stub(addr);
1747 return addr + 9;
be516ebe 1748}
be516ebe 1749
3968e69e 1750static int verify_dirty(const u_int *ptr)
be516ebe 1751{
3968e69e 1752 const void *source, *copy;
1753 u_int len;
1754 assert_dirty_stub(ptr);
1755 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1756 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1757 len = get_from_movz(&ptr[2]); // movz w3, #slen*4
1758 return !memcmp(source, copy, len);
1759}
1760
1761static int isclean(void *addr)
1762{
1763 const u_int *ptr = addr;
1764 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1765 assert_dirty_stub(ptr);
1766 return 0;
1767 }
1768 return 1;
1769}
1770
1771// get source that block at addr was compiled from (host pointers)
1772static void get_bounds(void *addr, u_char **start, u_char **end)
1773{
1774 const u_int *ptr = addr;
1775 assert_dirty_stub(ptr);
1776 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1777 *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4
1778}
1779
1780/* Special assem */
1781
81dbbf4c 1782static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1783{
1784 save_load_regs_all(1, reglist);
81dbbf4c 1785 cop2_call_stall_check(op, i, i_regs, 0);
3968e69e 1786#ifdef PCNT
1787 emit_movimm(op, 0);
2a014d73 1788 emit_far_call(pcnt_gte_start);
3968e69e 1789#endif
1790 // pointer to cop2 regs
1791 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1792}
1793
1794static void c2op_epilogue(u_int op,u_int reglist)
1795{
1796#ifdef PCNT
1797 emit_movimm(op, 0);
2a014d73 1798 emit_far_call(pcnt_gte_end);
3968e69e 1799#endif
1800 save_load_regs_all(0, reglist);
be516ebe 1801}
1802
81dbbf4c 1803static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1804{
3968e69e 1805 u_int c2op=source[i]&0x3f;
1806 u_int hr,reglist_full=0,reglist;
1807 int need_flags,need_ir;
1808 for(hr=0;hr<HOST_REGS;hr++) {
1809 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1810 }
1811 reglist=reglist_full&CALLER_SAVE_REGS;
1812
1813 if (gte_handlers[c2op]!=NULL) {
1814 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1815 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1816 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1817 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1818 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1819 need_flags=0;
1820 //int shift = (source[i] >> 19) & 1;
1821 //int lm = (source[i] >> 10) & 1;
1822 switch(c2op) {
1823 default:
1824 (void)need_ir;
81dbbf4c 1825 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1826 emit_movimm(source[i],1); // opcode
1827 emit_writeword(1,&psxRegs.code);
2a014d73 1828 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1829 break;
1830 }
1831 c2op_epilogue(c2op,reglist);
1832 }
1833}
1834
1835static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1836{
1837 //value = value & 0x7ffff000;
1838 //if (value & 0x7f87e000) value |= 0x80000000;
1839 emit_andimm(sl, 0x7fffe000, temp);
1840 emit_testimm(temp, 0xff87ffff);
1841 emit_andimm(sl, 0x7ffff000, temp);
1842 host_tempreg_acquire();
1843 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1844 emit_cmovne_reg(HOST_TEMPREG, temp);
1845 host_tempreg_release();
1846 assert(0); // testing needed
1847}
1848
1849static void do_mfc2_31_one(u_int copr,signed char temp)
1850{
1851 emit_readshword(&reg_cop2d[copr],temp);
1852 emit_bicsar_imm(temp,31,temp);
1853 emit_cmpimm(temp,0xf80);
1854 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1855 emit_andimm(temp,0xf80,temp);
1856}
1857
1858static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1859{
1860 if (temp < 0) {
1861 host_tempreg_acquire();
1862 temp = HOST_TEMPREG;
1863 }
1864 do_mfc2_31_one(9,temp);
1865 emit_shrimm(temp,7,tl);
1866 do_mfc2_31_one(10,temp);
1867 emit_orrshr_imm(temp,2,tl);
1868 do_mfc2_31_one(11,temp);
1869 emit_orrshl_imm(temp,3,tl);
1870 emit_writeword(tl,&reg_cop2d[29]);
1871
1872 if (temp == HOST_TEMPREG)
1873 host_tempreg_release();
be516ebe 1874}
1875
1876static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1877{
3968e69e 1878 // case 0x18: MULT
1879 // case 0x19: MULTU
1880 // case 0x1A: DIV
1881 // case 0x1B: DIVU
1882 if(rs1[i]&&rs2[i])
1883 {
1884 switch(opcode2[i])
1885 {
1886 case 0x18: // MULT
1887 case 0x19: // MULTU
1888 {
1889 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1890 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1891 signed char hi=get_reg(i_regs->regmap,HIREG);
1892 signed char lo=get_reg(i_regs->regmap,LOREG);
1893 assert(m1>=0);
1894 assert(m2>=0);
1895 assert(hi>=0);
1896 assert(lo>=0);
1897
1898 if(opcode2[i]==0x18) // MULT
1899 emit_smull(m1,m2,hi);
1900 else // MULTU
1901 emit_umull(m1,m2,hi);
1902
1903 emit_mov(hi,lo);
1904 emit_shrimm64(hi,32,hi);
1905 break;
1906 }
1907 case 0x1A: // DIV
1908 case 0x1B: // DIVU
1909 {
1910 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1911 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1912 signed char quotient=get_reg(i_regs->regmap,LOREG);
1913 signed char remainder=get_reg(i_regs->regmap,HIREG);
1914 assert(numerator>=0);
1915 assert(denominator>=0);
1916 assert(quotient>=0);
1917 assert(remainder>=0);
1918
1919 if (opcode2[i] == 0x1A) // DIV
1920 emit_sdiv(numerator,denominator,quotient);
1921 else // DIVU
1922 emit_udiv(numerator,denominator,quotient);
1923 emit_msub(quotient,denominator,numerator,remainder);
1924
1925 // div 0 quotient (remainder is already correct)
1926 host_tempreg_acquire();
1927 if (opcode2[i] == 0x1A) // DIV
1928 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1929 else
1930 emit_movimm(~0,HOST_TEMPREG);
1931 emit_test(denominator,denominator);
1932 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1933 host_tempreg_release();
1934 break;
1935 }
1936 default:
1937 assert(0);
1938 }
1939 }
1940 else
1941 {
1942 signed char hr=get_reg(i_regs->regmap,HIREG);
1943 signed char lr=get_reg(i_regs->regmap,LOREG);
1944 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1945 {
1946 if (rs1[i]) {
1947 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1948 assert(numerator >= 0);
1949 if (hr >= 0)
1950 emit_mov(numerator,hr);
1951 if (lr >= 0) {
1952 if (opcode2[i] == 0x1A) // DIV
1953 emit_sub_asrimm(0,numerator,31,lr);
1954 else
1955 emit_movimm(~0,lr);
1956 }
1957 }
1958 else {
1959 if (hr >= 0) emit_zeroreg(hr);
1960 if (lr >= 0) emit_movimm(~0,lr);
1961 }
1962 }
1963 else
1964 {
1965 // Multiply by zero is zero.
1966 if (hr >= 0) emit_zeroreg(hr);
1967 if (lr >= 0) emit_zeroreg(lr);
1968 }
1969 }
be516ebe 1970}
1971#define multdiv_assemble multdiv_assemble_arm64
1972
d1e4ebd9 1973static void do_jump_vaddr(u_int rs)
1974{
1975 if (rs != 0)
1976 emit_mov(rs, 0);
2a014d73 1977 emit_far_call(get_addr_ht);
d1e4ebd9 1978 emit_jmpreg(0);
1979}
1980
be516ebe 1981static void do_preload_rhash(u_int r) {
1982 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1983 // register. On ARM the hash can be done with a single instruction (below)
1984}
1985
1986static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1987 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1988}
1989
1990static void do_rhash(u_int rs,u_int rh) {
1991 emit_andimm(rs, 0xf8, rh);
1992}
1993
d1e4ebd9 1994static void do_miniht_load(int ht, u_int rh) {
1995 emit_add64(ht, rh, ht);
1996 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1997}
1998
d1e4ebd9 1999static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2000 emit_cmp(rh, rs);
2001 void *jaddr = out;
2002 emit_jeq(0);
2003 do_jump_vaddr(rs);
2004
2005 set_jump_target(jaddr, out);
2006 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2007 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2008 emit_jmpreg(ht);
be516ebe 2009}
2010
d1e4ebd9 2011// parsed by set_jump_target?
be516ebe 2012static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2013 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2014 emit_movk(return_address&0xffff,rt);
2015 add_to_linker(out,return_address,1);
2016 emit_adr(out,temp);
2017 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2018 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2019}
2020
919981d0 2021static void clear_cache_arm64(char *start, char *end)
be516ebe 2022{
919981d0 2023 // Don't rely on GCC's __clear_cache implementation, as it caches
2024 // icache/dcache cache line sizes, that can vary between cores on
2025 // big.LITTLE architectures.
2026 uint64_t addr, ctr_el0;
2027 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2028 size_t isize, dsize;
2029
2030 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2031 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2032 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2033
2034 // use the global minimum cache line size
2035 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2036 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2037
2038 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2039 not required for instruction to data coherence. */
2040 if ((ctr_el0 & (1 << 28)) == 0x0) {
2041 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2042 for (; addr < (uint64_t)end; addr += dsize)
2043 // use "civac" instead of "cvau", as this is the suggested workaround for
2044 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2045 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2046 }
919981d0 2047 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2048
919981d0 2049 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2050 Unification is not required for instruction to data coherence. */
2051 if ((ctr_el0 & (1 << 29)) == 0x0) {
2052 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2053 for (; addr < (uint64_t)end; addr += isize)
2054 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2055
2056 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2057 }
919981d0 2058
2059 __asm__ volatile("isb" : : : "memory");
be516ebe 2060}
2061
2062// CPU-architecture-specific initialization
2a014d73 2063static void arch_init(void)
2064{
2065 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2066 struct tramp_insns *ops = ndrc->tramp.ops;
2067 size_t i;
2068 assert(!(diff & 3));
2069 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2070 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2071 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2072 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2073 }
2074 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2075}
2076
2077// vim:shiftwidth=2:expandtab