drc: use helpers for jump checks
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define CALLER_SAVE_REGS 0x0007ffff
27
28#define unused __attribute__((unused))
29
d1e4ebd9 30void do_memhandler_pre();
31void do_memhandler_post();
be516ebe 32
33/* Linker */
d1e4ebd9 34static void set_jump_target(void *addr, void *target)
be516ebe 35{
d1e4ebd9 36 u_int *ptr = addr;
37 intptr_t offset = (u_char *)target - (u_char *)addr;
38
3968e69e 39 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
3968e69e 43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_link)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 49 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
3968e69e 52 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
3968e69e 58 abort(); // should not happen
be516ebe 59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
d1e4ebd9 65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
be516ebe 69}
70
71// find where external branch is liked to using addr of it's stub:
3968e69e 72// get address that the stub loads (dyna_linker arg1),
be516ebe 73// treat it as a pointer to branch insn,
74// return addr where that branch jumps to
75static void *get_pointer(void *stub)
76{
d1e4ebd9 77 int *i_ptr = find_extjump_insn(stub);
3968e69e 78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 83 assert(0);
84 return NULL;
85}
86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
d1e4ebd9 149static void output_w64(uint64_t dword)
150{
151 *((uint64_t *)out) = dword;
152 out+=8;
153}
154
155/*
687b4580 156static u_int rm_rd(u_int rm, u_int rd)
157{
158 assert(rm < 31);
159 assert(rd < 31);
160 return (rm << 16) | rd;
161}
d1e4ebd9 162*/
687b4580 163
3968e69e 164static u_int rn_rd(u_int rn, u_int rd)
165{
166 assert(rn < 31);
167 assert(rd < 31);
168 return (rn << 5) | rd;
169}
170
be516ebe 171static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
172{
d1e4ebd9 173 assert(rm < 32);
174 assert(rn < 32);
175 assert(rd < 32);
be516ebe 176 return (rm << 16) | (rn << 5) | rd;
177}
178
3968e69e 179static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180{
181 assert(ra < 32);
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183}
184
d1e4ebd9 185static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
186{
187 assert(imm7 < 0x80);
188 assert(rt2 < 31);
189 assert(rn < 32);
190 assert(rt < 31);
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192}
193
687b4580 194static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195{
196 assert(imm6 <= 63);
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198}
199
be516ebe 200static u_int imm16_rd(u_int imm16, u_int rd)
201{
202 assert(imm16 < 0x10000);
203 assert(rd < 31);
204 return (imm16 << 5) | rd;
205}
206
687b4580 207static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
208{
209 assert(imm12 < 0x1000);
d1e4ebd9 210 assert(rn < 32);
211 assert(rd < 32);
212 return (imm12 << 10) | (rn << 5) | rd;
213}
214
215static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
216{
217 assert(imm9 < 0x200);
687b4580 218 assert(rn < 31);
219 assert(rd < 31);
d1e4ebd9 220 return (imm9 << 12) | (rn << 5) | rd;
687b4580 221}
222
d1e4ebd9 223static u_int imm19_rt(u_int imm19, u_int rt)
224{
225 assert(imm19 < 0x80000);
226 assert(rt < 31);
227 return (imm19 << 5) | rt;
228}
229
230static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
231{
232 assert(n < 2);
233 assert(immr < 0x40);
234 assert(imms < 0x40);
235 assert(rn < 32);
236 assert(rd < 32);
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238}
239
240static u_int genjmp(const u_char *addr)
be516ebe 241{
242 intptr_t offset = addr - out;
d1e4ebd9 243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 244 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
be516ebe 247 return 0;
248 }
d1e4ebd9 249 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 250}
251
d1e4ebd9 252static u_int genjmpcc(const u_char *addr)
be516ebe 253{
254 intptr_t offset = addr - out;
d1e4ebd9 255 if ((uintptr_t)addr < 3) return 0;
be516ebe 256 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
258 abort();
259 return 0;
260 }
261 return ((u_int)offset >> 2) & 0x7ffff;
262}
263
264static uint32_t is_mask(u_int value)
265{
266 return value && ((value + 1) & value) == 0;
267}
268
269// This function returns true if the argument contains a
270// non-empty sequence of ones (possibly rotated) with the remainder zero.
271static uint32_t is_rotated_mask(u_int value)
272{
3968e69e 273 if (value == 0 || value == ~0)
be516ebe 274 return 0;
d1e4ebd9 275 if (is_mask((value - 1) | value))
276 return 1;
277 return is_mask((~value - 1) | ~value);
278}
279
280static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
281{
282 int lzeros, tzeros, ones;
283 assert(value != 0);
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
289 *imms = ones - 1;
290 return;
be516ebe 291 }
d1e4ebd9 292 value = ~value;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
3968e69e 297 *immr = lzeros;
d1e4ebd9 298 *imms = 31 - ones;
299 return;
300 }
3968e69e 301 abort();
be516ebe 302}
303
304static void emit_mov(u_int rs, u_int rt)
305{
687b4580 306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308}
309
310static void emit_mov64(u_int rs, u_int rt)
311{
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 314}
315
687b4580 316static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 317{
d1e4ebd9 318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 320}
321
d1e4ebd9 322static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 323{
d1e4ebd9 324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 326}
327
d1e4ebd9 328static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 329{
3968e69e 330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
332}
333
334static void emit_neg(u_int rs, u_int rt)
335{
336 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
337 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 338}
339
687b4580 340static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 341{
d1e4ebd9 342 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 343 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 344}
345
3968e69e 346static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
347{
348 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
349 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
350}
351
d1e4ebd9 352static void emit_movz(u_int imm, u_int rt)
be516ebe 353{
d1e4ebd9 354 assem_debug("movz %s,#%#x\n", regname[rt], imm);
355 output_w32(0x52800000 | imm16_rd(imm, rt));
356}
357
358static void emit_movz_lsl16(u_int imm, u_int rt)
359{
360 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x52a00000 | imm16_rd(imm, rt));
362}
363
364static void emit_movn(u_int imm, u_int rt)
365{
366 assem_debug("movn %s,#%#x\n", regname[rt], imm);
367 output_w32(0x12800000 | imm16_rd(imm, rt));
368}
369
370static void emit_movn_lsl16(u_int imm,u_int rt)
371{
372 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
373 output_w32(0x12a00000 | imm16_rd(imm, rt));
374}
375
376static void emit_movk(u_int imm,u_int rt)
377{
378 assem_debug("movk %s,#%#x\n", regname[rt], imm);
379 output_w32(0x72800000 | imm16_rd(imm, rt));
380}
381
382static void emit_movk_lsl16(u_int imm,u_int rt)
383{
384 assert(imm<65536);
3968e69e 385 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 386 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 387}
388
389static void emit_zeroreg(u_int rt)
390{
d1e4ebd9 391 emit_movz(0, rt);
be516ebe 392}
393
be516ebe 394static void emit_movimm(u_int imm, u_int rt)
395{
d1e4ebd9 396 if (imm < 65536)
397 emit_movz(imm, rt);
398 else if ((~imm) < 65536)
399 emit_movn(~imm, rt);
400 else if ((imm&0xffff) == 0)
401 emit_movz_lsl16(imm >> 16, rt);
402 else if (((~imm)&0xffff) == 0)
403 emit_movn_lsl16(~imm >> 16, rt);
404 else if (is_rotated_mask(imm)) {
405 u_int immr, imms;
406 gen_logical_imm(imm, &immr, &imms);
407 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
408 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
409 }
be516ebe 410 else {
d1e4ebd9 411 emit_movz(imm & 0xffff, rt);
412 emit_movk_lsl16(imm >> 16, rt);
be516ebe 413 }
414}
415
687b4580 416static void emit_readword(void *addr, u_int rt)
417{
418 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
419 if (!(offset & 3) && offset <= 16380) {
420 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
421 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
422 }
423 else
3968e69e 424 abort();
687b4580 425}
426
d1e4ebd9 427static void emit_readdword(void *addr, u_int rt)
428{
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 7) && offset <= 32760) {
431 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
432 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
433 }
3968e69e 434 else
435 abort();
436}
437
438static void emit_readshword(void *addr, u_int rt)
439{
440 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
441 if (!(offset & 1) && offset <= 8190) {
442 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
443 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
444 }
d1e4ebd9 445 else
446 assert(0);
447}
448
be516ebe 449static void emit_loadreg(u_int r, u_int hr)
450{
d1e4ebd9 451 int is64 = 0;
be516ebe 452 assert(r < 64);
453 if (r == 0)
454 emit_zeroreg(hr);
455 else {
7c3a5182 456 void *addr = &psxRegs.GPR.r[r];
be516ebe 457 switch (r) {
7c3a5182 458 //case HIREG: addr = &hi; break;
459 //case LOREG: addr = &lo; break;
be516ebe 460 case CCREG: addr = &cycle_count; break;
461 case CSREG: addr = &Status; break;
d1e4ebd9 462 case INVCP: addr = &invc_ptr; is64 = 1; break;
7c3a5182 463 default: assert(r < 34); break;
be516ebe 464 }
d1e4ebd9 465 if (is64)
466 emit_readdword(addr, hr);
467 else
468 emit_readword(addr, hr);
be516ebe 469 }
470}
471
687b4580 472static void emit_writeword(u_int rt, void *addr)
473{
474 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
475 if (!(offset & 3) && offset <= 16380) {
476 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
477 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
478 }
479 else
480 assert(0);
481}
482
d1e4ebd9 483static void emit_writedword(u_int rt, void *addr)
484{
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 7) && offset <= 32760) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 488 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 489 }
490 else
3968e69e 491 abort();
d1e4ebd9 492}
493
687b4580 494static void emit_storereg(u_int r, u_int hr)
be516ebe 495{
496 assert(r < 64);
7c3a5182 497 void *addr = &psxRegs.GPR.r[r];
be516ebe 498 switch (r) {
7c3a5182 499 //case HIREG: addr = &hi; break;
500 //case LOREG: addr = &lo; break;
be516ebe 501 case CCREG: addr = &cycle_count; break;
7c3a5182 502 default: assert(r < 34); break;
be516ebe 503 }
687b4580 504 emit_writeword(hr, addr);
be516ebe 505}
506
507static void emit_test(u_int rs, u_int rt)
508{
d1e4ebd9 509 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
510 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 511}
512
d1e4ebd9 513static void emit_testimm(u_int rs, u_int imm)
be516ebe 514{
d1e4ebd9 515 u_int immr, imms;
687b4580 516 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 517 assert(is_rotated_mask(imm)); // good enough for PCSX
518 gen_logical_imm(imm, &immr, &imms);
3968e69e 519 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 520}
521
522static void emit_not(u_int rs,u_int rt)
523{
524 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 525 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 526}
527
be516ebe 528static void emit_and(u_int rs1,u_int rs2,u_int rt)
529{
530 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 531 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 532}
533
534static void emit_or(u_int rs1,u_int rs2,u_int rt)
535{
536 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 537 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 538}
539
3968e69e 540static void emit_bic(u_int rs1,u_int rs2,u_int rt)
541{
542 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
543 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
544}
545
be516ebe 546static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
547{
be516ebe 548 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 549 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 550}
551
552static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
553{
be516ebe 554 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 555 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 556}
557
3968e69e 558static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
559{
560 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
561 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
562}
563
be516ebe 564static void emit_xor(u_int rs1,u_int rs2,u_int rt)
565{
566 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 567 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 568}
569
3968e69e 570static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
571{
572 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
573 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
574}
575
d1e4ebd9 576static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 577{
d1e4ebd9 578 unused const char *st = s ? "s" : "";
579 s = s ? 0x20000000 : 0;
580 is64 = is64 ? 0x80000000 : 0;
687b4580 581 if (imm < 4096) {
d1e4ebd9 582 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
583 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 584 }
585 else if (-imm < 4096) {
3968e69e 586 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 587 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
588 }
589 else if (imm < 16777216) {
590 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
591 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 595 }
596 }
597 else if (-imm < 16777216) {
598 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
599 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
602 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
603 }
687b4580 604 }
605 else
3968e69e 606 abort();
be516ebe 607}
608
d1e4ebd9 609static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
610{
611 emit_addimm_s(0, 0, rs, imm, rt);
612}
613
614static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
615{
616 emit_addimm_s(0, 1, rs, imm, rt);
617}
618
be516ebe 619static void emit_addimm_and_set_flags(int imm, u_int rt)
620{
d1e4ebd9 621 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 622}
623
624static void emit_addimm_no_flags(u_int imm,u_int rt)
625{
626 emit_addimm(rt,imm,rt);
627}
628
d1e4ebd9 629static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 630{
d1e4ebd9 631 const char *names[] = { "and", "orr", "eor", "ands" };
632 const char *name = names[op];
633 u_int immr, imms;
634 op = op << 29;
635 if (is_rotated_mask(imm)) {
636 gen_logical_imm(imm, &immr, &imms);
637 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
638 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
639 }
640 else {
641 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
642 host_tempreg_acquire();
643 emit_movimm(imm, HOST_TEMPREG);
644 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
645 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
646 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
647 host_tempreg_release();
648 }
649 (void)name;
be516ebe 650}
651
d1e4ebd9 652static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 653{
d1e4ebd9 654 if (imm == 0)
655 emit_zeroreg(rt);
656 else
657 emit_logicop_imm(0, rs, imm, rt);
be516ebe 658}
659
d1e4ebd9 660static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 661{
d1e4ebd9 662 if (imm == 0) {
663 if (rs != rt)
664 emit_mov(rs, rt);
665 }
666 else
667 emit_logicop_imm(1, rs, imm, rt);
be516ebe 668}
669
d1e4ebd9 670static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 671{
d1e4ebd9 672 if (imm == 0) {
673 if (rs != rt)
674 emit_mov(rs, rt);
675 }
676 else
677 emit_logicop_imm(2, rs, imm, rt);
be516ebe 678}
679
d1e4ebd9 680static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 681{
d1e4ebd9 682 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 684}
685
d1e4ebd9 686static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 687{
d1e4ebd9 688 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 690}
691
692static void emit_shlimm(u_int rs,u_int imm,u_int rt)
693{
be516ebe 694 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 696}
697
3968e69e 698static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 699{
3968e69e 700 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 702}
703
3968e69e 704static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 705{
be516ebe 706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 707 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 708}
709
710static void emit_sarimm(u_int rs,u_int imm,u_int rt)
711{
be516ebe 712 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 713 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 714}
715
716static void emit_rorimm(u_int rs,u_int imm,u_int rt)
717{
3968e69e 718 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 719 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 720}
721
722static void emit_signextend16(u_int rs, u_int rt)
723{
724 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 725 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 726}
727
d1e4ebd9 728static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 729{
3968e69e 730 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 731 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 732}
733
d1e4ebd9 734static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 735{
d1e4ebd9 736 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 738}
739
d1e4ebd9 740static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 741{
d1e4ebd9 742 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 744}
745
d1e4ebd9 746static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 747{
d1e4ebd9 748 if (imm < 4096) {
749 assem_debug("cmp %s,%#x\n", regname[rs], imm);
750 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
751 }
752 else if (-imm < 4096) {
753 assem_debug("cmn %s,%#x\n", regname[rs], imm);
754 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
755 }
756 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 757 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 758 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
759 }
760 else {
761 host_tempreg_acquire();
762 emit_movimm(imm, HOST_TEMPREG);
763 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
764 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
765 host_tempreg_release();
766 }
be516ebe 767}
768
d1e4ebd9 769static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 770{
d1e4ebd9 771 assert(imm == 0 || imm == 1);
772 assert(cond0 < 0x10);
773 assert(cond1 < 0x10);
774 if (imm) {
775 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
776 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
777 } else {
778 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
779 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
780 }
be516ebe 781}
782
d1e4ebd9 783static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 784{
d1e4ebd9 785 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 786}
787
d1e4ebd9 788static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 789{
d1e4ebd9 790 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 791}
792
793static void emit_cmovb_imm(int imm,u_int rt)
794{
d1e4ebd9 795 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 796}
797
3968e69e 798static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 799{
3968e69e 800 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
801 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 802}
803
804static void emit_cmovne_reg(u_int rs,u_int rt)
805{
d1e4ebd9 806 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 808}
809
810static void emit_cmovl_reg(u_int rs,u_int rt)
811{
d1e4ebd9 812 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 814}
815
816static void emit_cmovs_reg(u_int rs,u_int rt)
817{
d1e4ebd9 818 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 820}
821
3968e69e 822static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
823{
824 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
825 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
826}
827
be516ebe 828static void emit_slti32(u_int rs,int imm,u_int rt)
829{
830 if(rs!=rt) emit_zeroreg(rt);
831 emit_cmpimm(rs,imm);
832 if(rs==rt) emit_movimm(0,rt);
833 emit_cmovl_imm(1,rt);
834}
835
836static void emit_sltiu32(u_int rs,int imm,u_int rt)
837{
838 if(rs!=rt) emit_zeroreg(rt);
839 emit_cmpimm(rs,imm);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovb_imm(1,rt);
842}
843
844static void emit_cmp(u_int rs,u_int rt)
845{
846 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 847 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 848}
849
850static void emit_set_gz32(u_int rs, u_int rt)
851{
852 //assem_debug("set_gz32\n");
853 emit_cmpimm(rs,1);
854 emit_movimm(1,rt);
855 emit_cmovl_imm(0,rt);
856}
857
858static void emit_set_nz32(u_int rs, u_int rt)
859{
860 //assem_debug("set_nz32\n");
d1e4ebd9 861 if(rs!=rt) emit_mov(rs,rt);
862 emit_test(rs,rs);
863 emit_cmovne_imm(1,rt);
be516ebe 864}
865
866static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
867{
868 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
869 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
870 emit_cmp(rs1,rs2);
871 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
872 emit_cmovl_imm(1,rt);
873}
874
875static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
876{
877 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
878 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
879 emit_cmp(rs1,rs2);
880 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
881 emit_cmovb_imm(1,rt);
882}
883
2a014d73 884static int can_jump_or_call(const void *a)
885{
886 intptr_t diff = (u_char *)a - out;
887 return (-134217728 <= diff && diff <= 134217727);
888}
889
d1e4ebd9 890static void emit_call(const void *a)
be516ebe 891{
d1e4ebd9 892 intptr_t diff = (u_char *)a - out;
893 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 894 assert(!(diff & 3));
895 if (-134217728 <= diff && diff <= 134217727)
896 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
897 else
3968e69e 898 abort();
be516ebe 899}
900
d1e4ebd9 901static void emit_jmp(const void *a)
be516ebe 902{
d1e4ebd9 903 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
904 u_int offset = genjmp(a);
905 output_w32(0x14000000 | offset);
be516ebe 906}
907
d1e4ebd9 908static void emit_jne(const void *a)
be516ebe 909{
d1e4ebd9 910 assem_debug("bne %p\n", a);
911 u_int offset = genjmpcc(a);
912 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 913}
914
7c3a5182 915static void emit_jeq(const void *a)
be516ebe 916{
d1e4ebd9 917 assem_debug("beq %p\n", a);
918 u_int offset = genjmpcc(a);
919 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 920}
921
7c3a5182 922static void emit_js(const void *a)
be516ebe 923{
d1e4ebd9 924 assem_debug("bmi %p\n", a);
925 u_int offset = genjmpcc(a);
926 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 927}
928
7c3a5182 929static void emit_jns(const void *a)
be516ebe 930{
d1e4ebd9 931 assem_debug("bpl %p\n", a);
932 u_int offset = genjmpcc(a);
933 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 934}
935
7c3a5182 936static void emit_jl(const void *a)
be516ebe 937{
d1e4ebd9 938 assem_debug("blt %p\n", a);
939 u_int offset = genjmpcc(a);
940 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 941}
942
7c3a5182 943static void emit_jge(const void *a)
be516ebe 944{
d1e4ebd9 945 assem_debug("bge %p\n", a);
946 u_int offset = genjmpcc(a);
947 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 948}
949
7c3a5182 950static void emit_jno(const void *a)
be516ebe 951{
d1e4ebd9 952 assem_debug("bvc %p\n", a);
953 u_int offset = genjmpcc(a);
954 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 955}
956
7c3a5182 957static void emit_jc(const void *a)
be516ebe 958{
d1e4ebd9 959 assem_debug("bcs %p\n", a);
960 u_int offset = genjmpcc(a);
961 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 962}
963
3968e69e 964static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 965{
3968e69e 966 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 967 u_int offset = genjmpcc(a);
3968e69e 968 is64 = is64 ? 0x80000000 : 0;
969 isnz = isnz ? 0x01000000 : 0;
970 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
971}
972
973static void emit_cbz(const void *a, u_int r)
974{
975 emit_cb(0, 0, a, r);
be516ebe 976}
977
978static void emit_jmpreg(u_int r)
979{
3968e69e 980 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 981 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 982}
983
984static void emit_retreg(u_int r)
985{
d1e4ebd9 986 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 987 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
988}
989
990static void emit_ret(void)
991{
992 emit_retreg(LR);
993}
994
d1e4ebd9 995static void emit_adr(void *addr, u_int rt)
996{
997 intptr_t offset = (u_char *)addr - out;
998 assert(-1048576 <= offset && offset < 1048576);
3968e69e 999 assert(rt < 31);
d1e4ebd9 1000 assem_debug("adr x%d,#%#lx\n", rt, offset);
1001 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1002}
1003
3968e69e 1004static void emit_adrp(void *addr, u_int rt)
1005{
1006 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1007 assert(-4294967296l <= offset && offset < 4294967296l);
1008 assert(rt < 31);
1009 offset >>= 12;
1010 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1011 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1012}
1013
be516ebe 1014static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1015{
d1e4ebd9 1016 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1017 assert(-256 <= offset && offset < 256);
1018 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1019}
1020
1021static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1022{
1023 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1024 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1025}
1026
1027static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1028{
1029 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1030 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1031}
1032
1033static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1034{
1035 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1036 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1037}
1038
1039static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1040{
1041 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1043}
1044
1045static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1061}
1062
1063static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064{
1065 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1067}
1068
1069static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070{
1071 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1073}
1074
be516ebe 1075static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1076{
d1e4ebd9 1077 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1080}
1081
1082static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1083{
d1e4ebd9 1084 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1087}
1088
1089static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1090{
d1e4ebd9 1091 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1094}
1095
1096static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1097{
d1e4ebd9 1098 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1101}
1102
be516ebe 1103static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1104{
3968e69e 1105 if (!(offset & 3) && (u_int)offset <= 16380) {
1106 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1107 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1108 }
1109 else if (-256 <= offset && offset < 256) {
1110 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1111 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1112 }
687b4580 1113 else
1114 assert(0);
be516ebe 1115}
1116
1117static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1118{
3968e69e 1119 if (!(offset & 1) && (u_int)offset <= 8190) {
1120 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1121 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1122 }
1123 else if (-256 <= offset && offset < 256) {
1124 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1125 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1126 }
687b4580 1127 else
1128 assert(0);
be516ebe 1129}
1130
1131static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1132{
3968e69e 1133 if ((u_int)offset < 4096) {
1134 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1135 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1136 }
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1139 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 }
687b4580 1141 else
1142 assert(0);
be516ebe 1143}
1144
3968e69e 1145static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1146{
3968e69e 1147 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1149}
1150
3968e69e 1151static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1152{
3968e69e 1153 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1155}
1156
1157static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1158{
1159 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1160 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1161}
1162
1163static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1164{
1165 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1167}
1168
3968e69e 1169static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1170{
1171 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1173}
1174
1175static void emit_clz(u_int rs, u_int rt)
be516ebe 1176{
1177 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1178 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1179}
1180
be516ebe 1181// special case for checking invalid_code
d1e4ebd9 1182static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1183{
d1e4ebd9 1184 host_tempreg_acquire();
1185 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1186 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1187 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1188 emit_cmpimm(HOST_TEMPREG, imm);
1189 host_tempreg_release();
be516ebe 1190}
1191
3968e69e 1192// special for loadlr_assemble, rs2 is destroyed
1193static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1194{
3968e69e 1195 emit_shl(rs2, shift, rs2);
1196 emit_bic(rs1, rs2, rt);
be516ebe 1197}
1198
3968e69e 1199static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1200{
3968e69e 1201 emit_shr(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
be516ebe 1203}
1204
d1e4ebd9 1205static void emit_loadlp_ofs(u_int ofs, u_int rt)
1206{
1207 output_w32(0x58000000 | imm19_rt(ofs, rt));
1208}
1209
687b4580 1210static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1211{
687b4580 1212 u_int op = 0xb9000000;
d1e4ebd9 1213 unused const char *ldst = is_st ? "st" : "ld";
1214 unused char rp = is64 ? 'x' : 'w';
687b4580 1215 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1216 is64 = is64 ? 1 : 0;
1217 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1218 ofs = (ofs >> (2+is64));
687b4580 1219 if (!is_st) op |= 0x00400000;
1220 if (is64) op |= 0x40000000;
d1e4ebd9 1221 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1222}
1223
687b4580 1224static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1225{
687b4580 1226 u_int op = 0x29000000;
d1e4ebd9 1227 unused const char *ldst = is_st ? "st" : "ld";
1228 unused char rp = is64 ? 'x' : 'w';
687b4580 1229 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1230 is64 = is64 ? 1 : 0;
1231 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1232 ofs = (ofs >> (2+is64));
1233 assert(-64 <= ofs && ofs <= 63);
1234 ofs &= 0x7f;
1235 if (!is_st) op |= 0x00400000;
1236 if (is64) op |= 0x80000000;
d1e4ebd9 1237 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1238}
1239
1240static void save_load_regs_all(int is_store, u_int reglist)
1241{
1242 int ofs = 0, c = 0;
1243 u_int r, pair[2];
1244 for (r = 0; reglist; r++, reglist >>= 1) {
1245 if (reglist & 1)
1246 pair[c++] = r;
1247 if (c == 2) {
1248 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1249 ofs += 8 * 2;
1250 c = 0;
1251 }
1252 }
1253 if (c) {
1254 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1255 ofs += 8;
1256 }
1257 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1258}
1259
1260// Save registers before function call
1261static void save_regs(u_int reglist)
1262{
1263 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1264 save_load_regs_all(1, reglist);
be516ebe 1265}
1266
1267// Restore registers after function call
1268static void restore_regs(u_int reglist)
1269{
1270 reglist &= CALLER_SAVE_REGS;
687b4580 1271 save_load_regs_all(0, reglist);
be516ebe 1272}
1273
1274/* Stubs/epilogue */
1275
1276static void literal_pool(int n)
1277{
1278 (void)literals;
1279}
1280
1281static void literal_pool_jumpover(int n)
1282{
1283}
1284
d1e4ebd9 1285// parsed by get_pointer, find_extjump_insn
1286static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1287{
d1e4ebd9 1288 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1289
d1e4ebd9 1290 emit_movz(target & 0xffff, 0);
1291 emit_movk_lsl16(target >> 16, 0);
1292
1293 // addr is in the current recompiled block (max 256k)
1294 // offset shouldn't exceed +/-1MB
1295 emit_adr(addr, 1);
2a014d73 1296 emit_far_jump(linker);
be516ebe 1297}
1298
d1e4ebd9 1299static void check_extjump2(void *src)
be516ebe 1300{
d1e4ebd9 1301 u_int *ptr = src;
1302 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1303 (void)ptr;
be516ebe 1304}
1305
1306// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1307static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1308{
d1e4ebd9 1309 int diff = rt_val - rs_val;
3968e69e 1310 if ((-4096 < diff && diff < 4096)
1311 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1312 emit_addimm(rs, diff, rt);
3968e69e 1313 else if (rt_val == ~rs_val)
1314 emit_not(rs, rt);
d1e4ebd9 1315 else if (is_rotated_mask(rs_val ^ rt_val))
1316 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1317 else
d1e4ebd9 1318 emit_movimm(rt_val, rt);
be516ebe 1319}
1320
d1e4ebd9 1321// return 1 if the above function can do it's job cheaply
687b4580 1322static int is_similar_value(u_int v1, u_int v2)
be516ebe 1323{
687b4580 1324 int diff = v1 - v2;
3968e69e 1325 return (-4096 < diff && diff < 4096)
1326 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1327 || v1 == ~v2
d1e4ebd9 1328 || is_rotated_mask(v1 ^ v2);
1329}
1330
1331// trashes r2
1332static void pass_args64(u_int a0, u_int a1)
1333{
1334 if(a0==1&&a1==0) {
1335 // must swap
1336 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1337 }
1338 else if(a0!=0&&a1==0) {
1339 emit_mov64(a1,1);
1340 if (a0>=0) emit_mov64(a0,0);
1341 }
1342 else {
1343 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1344 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1345 }
be516ebe 1346}
1347
d1e4ebd9 1348static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1349{
1350 switch(type) {
1351 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1352 case LOADBU_STUB:
1353 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1354 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1355 case LOADHU_STUB:
1356 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1357 case LOADW_STUB:
1358 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1359 default: assert(0);
d1e4ebd9 1360 }
1361}
1362
1363#include "pcsxmem.h"
be516ebe 1364//#include "pcsxmem_inline.c"
1365
1366static void do_readstub(int n)
1367{
1368 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1369 set_jump_target(stubs[n].addr, out);
1370 enum stub_type type = stubs[n].type;
1371 int i = stubs[n].a;
1372 int rs = stubs[n].b;
1373 const struct regstat *i_regs = (void *)stubs[n].c;
1374 u_int reglist = stubs[n].e;
1375 const signed char *i_regmap = i_regs->regmap;
1376 int rt;
1377 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1378 rt=get_reg(i_regmap,FTEMP);
1379 }else{
1380 rt=get_reg(i_regmap,rt1[i]);
1381 }
1382 assert(rs>=0);
1383 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1384 void *restore_jump = NULL, *handler_jump = NULL;
1385 reglist|=(1<<rs);
1386 for (r = 0; r < HOST_CCREG; r++) {
1387 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1388 temp = r;
1389 break;
1390 }
1391 }
1392 if(rt>=0&&rt1[i]!=0)
1393 reglist&=~(1<<rt);
1394 if(temp==-1) {
1395 save_regs(reglist);
1396 regs_saved=1;
1397 temp=(rs==0)?2:0;
1398 }
1399 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1400 temp2=1;
1401 emit_readdword(&mem_rtab,temp);
1402 emit_shrimm(rs,12,temp2);
1403 emit_readdword_dualindexedx8(temp,temp2,temp2);
1404 emit_adds64(temp2,temp2,temp2);
1405 handler_jump=out;
1406 emit_jc(0);
1407 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1408 switch(type) {
1409 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1410 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1411 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1412 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1413 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1414 default: assert(0);
d1e4ebd9 1415 }
1416 }
1417 if(regs_saved) {
1418 restore_jump=out;
1419 emit_jmp(0); // jump to reg restore
1420 }
1421 else
1422 emit_jmp(stubs[n].retaddr); // return address
1423 set_jump_target(handler_jump, out);
1424
1425 if(!regs_saved)
1426 save_regs(reglist);
1427 void *handler=NULL;
1428 if(type==LOADB_STUB||type==LOADBU_STUB)
1429 handler=jump_handler_read8;
1430 if(type==LOADH_STUB||type==LOADHU_STUB)
1431 handler=jump_handler_read16;
1432 if(type==LOADW_STUB)
1433 handler=jump_handler_read32;
1434 assert(handler);
1435 pass_args64(rs,temp2);
1436 int cc=get_reg(i_regmap,CCREG);
1437 if(cc<0)
1438 emit_loadreg(CCREG,2);
bb4f300c 1439 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1440 emit_far_call(handler);
d1e4ebd9 1441 // (no cycle reload after read)
1442 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1443 loadstore_extend(type,0,rt);
1444 }
1445 if(restore_jump)
1446 set_jump_target(restore_jump, out);
1447 restore_regs(reglist);
1448 emit_jmp(stubs[n].retaddr);
be516ebe 1449}
1450
1451static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1452{
d1e4ebd9 1453 int rs=get_reg(regmap,target);
1454 int rt=get_reg(regmap,target);
1455 if(rs<0) rs=get_reg(regmap,-1);
1456 assert(rs>=0);
1457 u_int is_dynamic=0;
1458 uintptr_t host_addr = 0;
1459 void *handler;
1460 int cc=get_reg(regmap,CCREG);
bb4f300c 1461 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
d1e4ebd9 1462 // return;
1463 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1464 if (handler == NULL) {
1465 if(rt<0||rt1[i]==0)
1466 return;
1467 if (addr != host_addr) {
1468 if (host_addr >= 0x100000000ull)
1469 abort(); // ROREG not implemented
1470 emit_movimm_from(addr, rs, host_addr, rs);
1471 }
1472 switch(type) {
1473 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1474 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1475 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1476 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1477 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1478 default: assert(0);
1479 }
1480 return;
1481 }
1482 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1483 if(is_dynamic) {
1484 if(type==LOADB_STUB||type==LOADBU_STUB)
1485 handler=jump_handler_read8;
1486 if(type==LOADH_STUB||type==LOADHU_STUB)
1487 handler=jump_handler_read16;
1488 if(type==LOADW_STUB)
1489 handler=jump_handler_read32;
1490 }
1491
1492 // call a memhandler
1493 if(rt>=0&&rt1[i]!=0)
1494 reglist&=~(1<<rt);
1495 save_regs(reglist);
1496 if(target==0)
1497 emit_movimm(addr,0);
1498 else if(rs!=0)
1499 emit_mov(rs,0);
1500 if(cc<0)
1501 emit_loadreg(CCREG,2);
bb4f300c 1502 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
3968e69e 1503 if(is_dynamic) {
1504 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1505 emit_adrp((void *)l1, 1);
1506 emit_addimm64(1, l1 & 0xfff, 1);
1507 }
d1e4ebd9 1508 else
2a014d73 1509 emit_far_call(do_memhandler_pre);
d1e4ebd9 1510
2a014d73 1511 emit_far_call(handler);
d1e4ebd9 1512
1513 // (no cycle reload after read)
1514 if(rt>=0&&rt1[i]!=0)
1515 loadstore_extend(type, 0, rt);
1516 restore_regs(reglist);
be516ebe 1517}
1518
1519static void do_writestub(int n)
1520{
1521 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1522 set_jump_target(stubs[n].addr, out);
1523 enum stub_type type=stubs[n].type;
1524 int i=stubs[n].a;
1525 int rs=stubs[n].b;
1526 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1527 u_int reglist=stubs[n].e;
1528 signed char *i_regmap=i_regs->regmap;
1529 int rt,r;
1530 if(itype[i]==C1LS||itype[i]==C2LS) {
1531 rt=get_reg(i_regmap,r=FTEMP);
1532 }else{
1533 rt=get_reg(i_regmap,r=rs2[i]);
1534 }
1535 assert(rs>=0);
1536 assert(rt>=0);
1537 int rtmp,temp=-1,temp2,regs_saved=0;
1538 void *restore_jump = NULL, *handler_jump = NULL;
1539 int reglist2=reglist|(1<<rs)|(1<<rt);
1540 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1541 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1542 temp = rtmp;
1543 break;
1544 }
1545 }
1546 if(temp==-1) {
1547 save_regs(reglist);
1548 regs_saved=1;
1549 for(rtmp=0;rtmp<=3;rtmp++)
1550 if(rtmp!=rs&&rtmp!=rt)
1551 {temp=rtmp;break;}
1552 }
1553 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1554 temp2=3;
1555 else {
1556 host_tempreg_acquire();
1557 temp2=HOST_TEMPREG;
1558 }
1559 emit_readdword(&mem_wtab,temp);
1560 emit_shrimm(rs,12,temp2);
1561 emit_readdword_dualindexedx8(temp,temp2,temp2);
1562 emit_adds64(temp2,temp2,temp2);
1563 handler_jump=out;
1564 emit_jc(0);
1565 switch(type) {
1566 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1567 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1568 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1569 default: assert(0);
1570 }
1571 if(regs_saved) {
1572 restore_jump=out;
1573 emit_jmp(0); // jump to reg restore
1574 }
1575 else
1576 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1577 set_jump_target(handler_jump, out);
1578
1579 // TODO FIXME: regalloc should prefer callee-saved regs
1580 if(!regs_saved)
1581 save_regs(reglist);
1582 void *handler=NULL;
1583 switch(type) {
1584 case STOREB_STUB: handler=jump_handler_write8; break;
1585 case STOREH_STUB: handler=jump_handler_write16; break;
1586 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1587 default: assert(0);
d1e4ebd9 1588 }
1589 assert(handler);
1590 pass_args(rs,rt);
1591 if(temp2!=3) {
1592 emit_mov64(temp2,3);
1593 host_tempreg_release();
1594 }
1595 int cc=get_reg(i_regmap,CCREG);
1596 if(cc<0)
1597 emit_loadreg(CCREG,2);
bb4f300c 1598 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
d1e4ebd9 1599 // returns new cycle_count
2a014d73 1600 emit_far_call(handler);
bb4f300c 1601 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
d1e4ebd9 1602 if(cc<0)
1603 emit_storereg(CCREG,2);
1604 if(restore_jump)
1605 set_jump_target(restore_jump, out);
1606 restore_regs(reglist);
1607 emit_jmp(stubs[n].retaddr);
be516ebe 1608}
1609
1610static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1611{
687b4580 1612 int rs = get_reg(regmap,-1);
1613 int rt = get_reg(regmap,target);
1614 assert(rs >= 0);
1615 assert(rt >= 0);
1616 uintptr_t host_addr = 0;
1617 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1618 if (handler == NULL) {
d1e4ebd9 1619 if (addr != host_addr) {
1620 if (host_addr >= 0x100000000ull)
1621 abort(); // ROREG not implemented
687b4580 1622 emit_movimm_from(addr, rs, host_addr, rs);
d1e4ebd9 1623 }
1624 switch (type) {
687b4580 1625 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1626 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1627 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1628 default: assert(0);
1629 }
1630 return;
1631 }
1632
1633 // call a memhandler
1634 save_regs(reglist);
687b4580 1635 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1636 loadstore_extend(type, rt, 0);
1637 int cc, cc_use;
1638 cc = cc_use = get_reg(regmap, CCREG);
1639 if (cc < 0)
1640 emit_loadreg(CCREG, (cc_use = 2));
bb4f300c 1641 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
d1e4ebd9 1642
2a014d73 1643 emit_far_call(do_memhandler_pre);
1644 emit_far_call(handler);
1645 emit_far_call(do_memhandler_post);
bb4f300c 1646 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
d1e4ebd9 1647 if (cc < 0)
1648 emit_storereg(CCREG, cc_use);
687b4580 1649 restore_regs(reglist);
be516ebe 1650}
1651
3968e69e 1652static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1653{
3968e69e 1654 int ret = memcmp(source, copy, size);
1655 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1656 return ret;
1657}
1658
1659// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1660static void do_dirty_stub_base(u_int vaddr)
1661{
1662 assert(slen <= MAXBLOCK);
1663 emit_loadlp_ofs(0, 0); // ldr x1, source
1664 emit_loadlp_ofs(0, 1); // ldr x2, copy
1665 emit_movz(slen*4, 2);
2a014d73 1666 emit_far_call(verify_code_arm64);
3968e69e 1667 void *jmp = out;
1668 emit_cbz(0, 0);
1669 emit_movz(vaddr & 0xffff, 0);
1670 emit_movk_lsl16(vaddr >> 16, 0);
2a014d73 1671 emit_far_call(get_addr);
3968e69e 1672 emit_jmpreg(0);
1673 set_jump_target(jmp, out);
1674}
1675
1676static void assert_dirty_stub(const u_int *ptr)
1677{
1678 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1679 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1680 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4
1681 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1682}
1683
d1e4ebd9 1684static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1685{
d1e4ebd9 1686 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1687 assert((*loadl & ~0x1f) == 0x58000000);
1688 assert((ofs & 3) == 0);
1689 assert(ofs < 0x100000);
1690 *loadl |= (ofs >> 2) << 5;
1691}
1692
d1e4ebd9 1693static void do_dirty_stub_emit_literals(u_int *loadlps)
1694{
1695 set_loadlp(&loadlps[0], out);
1696 output_w64((uintptr_t)source);
1697 set_loadlp(&loadlps[1], out);
1698 output_w64((uintptr_t)copy);
be516ebe 1699}
1700
d1e4ebd9 1701static void *do_dirty_stub(int i)
be516ebe 1702{
1703 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1704 u_int *loadlps = (void *)out;
3968e69e 1705 do_dirty_stub_base(start + i*4);
d1e4ebd9 1706 void *entry = out;
be516ebe 1707 load_regs_entry(i);
d1e4ebd9 1708 if (entry == out)
1709 entry = instr_addr[i];
1710 emit_jmp(instr_addr[i]);
1711 do_dirty_stub_emit_literals(loadlps);
1712 return entry;
be516ebe 1713}
1714
3968e69e 1715static void do_dirty_stub_ds(void)
be516ebe 1716{
d1e4ebd9 1717 u_int *loadlps = (void *)out;
3968e69e 1718 do_dirty_stub_base(start + 1);
1719 void *lit_jumpover = out;
d1e4ebd9 1720 emit_jmp(out + 8*2);
1721 do_dirty_stub_emit_literals(loadlps);
3968e69e 1722 set_jump_target(lit_jumpover, out);
be516ebe 1723}
1724
3968e69e 1725static uint64_t get_from_ldr_literal(const u_int *i)
1726{
1727 signed int ofs;
1728 assert((i[0] & 0xff000000) == 0x58000000);
1729 ofs = i[0] << 8;
1730 ofs >>= 5+8;
1731 return *(uint64_t *)(i + ofs);
1732}
be516ebe 1733
3968e69e 1734static uint64_t get_from_movz(const u_int *i)
1735{
1736 assert((i[0] & 0x7fe00000) == 0x52800000);
1737 return (i[0] >> 5) & 0xffff;
1738}
be516ebe 1739
3968e69e 1740// Find the "clean" entry point from a "dirty" entry point
1741// by skipping past the call to verify_code
1742static void *get_clean_addr(u_int *addr)
be516ebe 1743{
3968e69e 1744 assert_dirty_stub(addr);
1745 return addr + 9;
be516ebe 1746}
be516ebe 1747
3968e69e 1748static int verify_dirty(const u_int *ptr)
be516ebe 1749{
3968e69e 1750 const void *source, *copy;
1751 u_int len;
1752 assert_dirty_stub(ptr);
1753 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1754 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1755 len = get_from_movz(&ptr[2]); // movz w3, #slen*4
1756 return !memcmp(source, copy, len);
1757}
1758
1759static int isclean(void *addr)
1760{
1761 const u_int *ptr = addr;
1762 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1763 assert_dirty_stub(ptr);
1764 return 0;
1765 }
1766 return 1;
1767}
1768
1769// get source that block at addr was compiled from (host pointers)
1770static void get_bounds(void *addr, u_char **start, u_char **end)
1771{
1772 const u_int *ptr = addr;
1773 assert_dirty_stub(ptr);
1774 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1775 *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4
1776}
1777
1778/* Special assem */
1779
1780static void c2op_prologue(u_int op,u_int reglist)
1781{
1782 save_load_regs_all(1, reglist);
1783#ifdef PCNT
1784 emit_movimm(op, 0);
2a014d73 1785 emit_far_call(pcnt_gte_start);
3968e69e 1786#endif
1787 // pointer to cop2 regs
1788 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1789}
1790
1791static void c2op_epilogue(u_int op,u_int reglist)
1792{
1793#ifdef PCNT
1794 emit_movimm(op, 0);
2a014d73 1795 emit_far_call(pcnt_gte_end);
3968e69e 1796#endif
1797 save_load_regs_all(0, reglist);
be516ebe 1798}
1799
1800static void c2op_assemble(int i,struct regstat *i_regs)
1801{
3968e69e 1802 u_int c2op=source[i]&0x3f;
1803 u_int hr,reglist_full=0,reglist;
1804 int need_flags,need_ir;
1805 for(hr=0;hr<HOST_REGS;hr++) {
1806 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1807 }
1808 reglist=reglist_full&CALLER_SAVE_REGS;
1809
1810 if (gte_handlers[c2op]!=NULL) {
1811 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1812 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1813 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1814 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1815 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1816 need_flags=0;
1817 //int shift = (source[i] >> 19) & 1;
1818 //int lm = (source[i] >> 10) & 1;
1819 switch(c2op) {
1820 default:
1821 (void)need_ir;
1822 c2op_prologue(c2op,reglist);
1823 emit_movimm(source[i],1); // opcode
1824 emit_writeword(1,&psxRegs.code);
2a014d73 1825 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1826 break;
1827 }
1828 c2op_epilogue(c2op,reglist);
1829 }
1830}
1831
1832static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1833{
1834 //value = value & 0x7ffff000;
1835 //if (value & 0x7f87e000) value |= 0x80000000;
1836 emit_andimm(sl, 0x7fffe000, temp);
1837 emit_testimm(temp, 0xff87ffff);
1838 emit_andimm(sl, 0x7ffff000, temp);
1839 host_tempreg_acquire();
1840 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1841 emit_cmovne_reg(HOST_TEMPREG, temp);
1842 host_tempreg_release();
1843 assert(0); // testing needed
1844}
1845
1846static void do_mfc2_31_one(u_int copr,signed char temp)
1847{
1848 emit_readshword(&reg_cop2d[copr],temp);
1849 emit_bicsar_imm(temp,31,temp);
1850 emit_cmpimm(temp,0xf80);
1851 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1852 emit_andimm(temp,0xf80,temp);
1853}
1854
1855static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1856{
1857 if (temp < 0) {
1858 host_tempreg_acquire();
1859 temp = HOST_TEMPREG;
1860 }
1861 do_mfc2_31_one(9,temp);
1862 emit_shrimm(temp,7,tl);
1863 do_mfc2_31_one(10,temp);
1864 emit_orrshr_imm(temp,2,tl);
1865 do_mfc2_31_one(11,temp);
1866 emit_orrshl_imm(temp,3,tl);
1867 emit_writeword(tl,&reg_cop2d[29]);
1868
1869 if (temp == HOST_TEMPREG)
1870 host_tempreg_release();
be516ebe 1871}
1872
1873static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1874{
3968e69e 1875 // case 0x18: MULT
1876 // case 0x19: MULTU
1877 // case 0x1A: DIV
1878 // case 0x1B: DIVU
1879 if(rs1[i]&&rs2[i])
1880 {
1881 switch(opcode2[i])
1882 {
1883 case 0x18: // MULT
1884 case 0x19: // MULTU
1885 {
1886 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1887 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1888 signed char hi=get_reg(i_regs->regmap,HIREG);
1889 signed char lo=get_reg(i_regs->regmap,LOREG);
1890 assert(m1>=0);
1891 assert(m2>=0);
1892 assert(hi>=0);
1893 assert(lo>=0);
1894
1895 if(opcode2[i]==0x18) // MULT
1896 emit_smull(m1,m2,hi);
1897 else // MULTU
1898 emit_umull(m1,m2,hi);
1899
1900 emit_mov(hi,lo);
1901 emit_shrimm64(hi,32,hi);
1902 break;
1903 }
1904 case 0x1A: // DIV
1905 case 0x1B: // DIVU
1906 {
1907 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1908 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1909 signed char quotient=get_reg(i_regs->regmap,LOREG);
1910 signed char remainder=get_reg(i_regs->regmap,HIREG);
1911 assert(numerator>=0);
1912 assert(denominator>=0);
1913 assert(quotient>=0);
1914 assert(remainder>=0);
1915
1916 if (opcode2[i] == 0x1A) // DIV
1917 emit_sdiv(numerator,denominator,quotient);
1918 else // DIVU
1919 emit_udiv(numerator,denominator,quotient);
1920 emit_msub(quotient,denominator,numerator,remainder);
1921
1922 // div 0 quotient (remainder is already correct)
1923 host_tempreg_acquire();
1924 if (opcode2[i] == 0x1A) // DIV
1925 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1926 else
1927 emit_movimm(~0,HOST_TEMPREG);
1928 emit_test(denominator,denominator);
1929 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1930 host_tempreg_release();
1931 break;
1932 }
1933 default:
1934 assert(0);
1935 }
1936 }
1937 else
1938 {
1939 signed char hr=get_reg(i_regs->regmap,HIREG);
1940 signed char lr=get_reg(i_regs->regmap,LOREG);
1941 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1942 {
1943 if (rs1[i]) {
1944 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1945 assert(numerator >= 0);
1946 if (hr >= 0)
1947 emit_mov(numerator,hr);
1948 if (lr >= 0) {
1949 if (opcode2[i] == 0x1A) // DIV
1950 emit_sub_asrimm(0,numerator,31,lr);
1951 else
1952 emit_movimm(~0,lr);
1953 }
1954 }
1955 else {
1956 if (hr >= 0) emit_zeroreg(hr);
1957 if (lr >= 0) emit_movimm(~0,lr);
1958 }
1959 }
1960 else
1961 {
1962 // Multiply by zero is zero.
1963 if (hr >= 0) emit_zeroreg(hr);
1964 if (lr >= 0) emit_zeroreg(lr);
1965 }
1966 }
be516ebe 1967}
1968#define multdiv_assemble multdiv_assemble_arm64
1969
d1e4ebd9 1970static void do_jump_vaddr(u_int rs)
1971{
1972 if (rs != 0)
1973 emit_mov(rs, 0);
2a014d73 1974 emit_far_call(get_addr_ht);
d1e4ebd9 1975 emit_jmpreg(0);
1976}
1977
be516ebe 1978static void do_preload_rhash(u_int r) {
1979 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1980 // register. On ARM the hash can be done with a single instruction (below)
1981}
1982
1983static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1984 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1985}
1986
1987static void do_rhash(u_int rs,u_int rh) {
1988 emit_andimm(rs, 0xf8, rh);
1989}
1990
d1e4ebd9 1991static void do_miniht_load(int ht, u_int rh) {
1992 emit_add64(ht, rh, ht);
1993 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1994}
1995
d1e4ebd9 1996static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1997 emit_cmp(rh, rs);
1998 void *jaddr = out;
1999 emit_jeq(0);
2000 do_jump_vaddr(rs);
2001
2002 set_jump_target(jaddr, out);
2003 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2004 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2005 emit_jmpreg(ht);
be516ebe 2006}
2007
d1e4ebd9 2008// parsed by set_jump_target?
be516ebe 2009static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2010 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2011 emit_movk(return_address&0xffff,rt);
2012 add_to_linker(out,return_address,1);
2013 emit_adr(out,temp);
2014 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2015 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2016}
2017
919981d0 2018static void clear_cache_arm64(char *start, char *end)
be516ebe 2019{
919981d0 2020 // Don't rely on GCC's __clear_cache implementation, as it caches
2021 // icache/dcache cache line sizes, that can vary between cores on
2022 // big.LITTLE architectures.
2023 uint64_t addr, ctr_el0;
2024 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2025 size_t isize, dsize;
2026
2027 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2028 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2029 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2030
2031 // use the global minimum cache line size
2032 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2033 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2034
2035 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2036 not required for instruction to data coherence. */
2037 if ((ctr_el0 & (1 << 28)) == 0x0) {
2038 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2039 for (; addr < (uint64_t)end; addr += dsize)
2040 // use "civac" instead of "cvau", as this is the suggested workaround for
2041 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2042 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2043 }
919981d0 2044 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2045
919981d0 2046 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2047 Unification is not required for instruction to data coherence. */
2048 if ((ctr_el0 & (1 << 29)) == 0x0) {
2049 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2050 for (; addr < (uint64_t)end; addr += isize)
2051 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2052
2053 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2054 }
919981d0 2055
2056 __asm__ volatile("isb" : : : "memory");
be516ebe 2057}
2058
2059// CPU-architecture-specific initialization
2a014d73 2060static void arch_init(void)
2061{
2062 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2063 struct tramp_insns *ops = ndrc->tramp.ops;
2064 size_t i;
2065 assert(!(diff & 3));
2066 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2067 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2068 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2069 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2070 }
2071 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2072}
2073
2074// vim:shiftwidth=2:expandtab