drc: botched msb bit check
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
be516ebe 26#define CALLER_SAVE_REGS 0x0007ffff
27
28#define unused __attribute__((unused))
29
d1e4ebd9 30void do_memhandler_pre();
31void do_memhandler_post();
be516ebe 32
33/* Linker */
d1e4ebd9 34static void set_jump_target(void *addr, void *target)
be516ebe 35{
d1e4ebd9 36 u_int *ptr = addr;
37 intptr_t offset = (u_char *)target - (u_char *)addr;
38
3968e69e 39 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
3968e69e 43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
3d680478 47 // should only happen when jumping to an already compiled block (see add_jump_out)
d1e4ebd9 48 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 49 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
3968e69e 52 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
3968e69e 58 abort(); // should not happen
be516ebe 59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
d1e4ebd9 65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
be516ebe 69}
70
71// find where external branch is liked to using addr of it's stub:
3968e69e 72// get address that the stub loads (dyna_linker arg1),
be516ebe 73// treat it as a pointer to branch insn,
74// return addr where that branch jumps to
75static void *get_pointer(void *stub)
76{
d1e4ebd9 77 int *i_ptr = find_extjump_insn(stub);
3968e69e 78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 83 assert(0);
84 return NULL;
85}
86
be516ebe 87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
d1e4ebd9 120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 141};
142
be516ebe 143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
d1e4ebd9 149static void output_w64(uint64_t dword)
150{
151 *((uint64_t *)out) = dword;
152 out+=8;
153}
154
155/*
687b4580 156static u_int rm_rd(u_int rm, u_int rd)
157{
158 assert(rm < 31);
159 assert(rd < 31);
160 return (rm << 16) | rd;
161}
d1e4ebd9 162*/
687b4580 163
3968e69e 164static u_int rn_rd(u_int rn, u_int rd)
165{
166 assert(rn < 31);
167 assert(rd < 31);
168 return (rn << 5) | rd;
169}
170
be516ebe 171static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
172{
d1e4ebd9 173 assert(rm < 32);
174 assert(rn < 32);
175 assert(rd < 32);
be516ebe 176 return (rm << 16) | (rn << 5) | rd;
177}
178
3968e69e 179static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180{
181 assert(ra < 32);
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183}
184
d1e4ebd9 185static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
186{
187 assert(imm7 < 0x80);
188 assert(rt2 < 31);
189 assert(rn < 32);
190 assert(rt < 31);
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192}
193
687b4580 194static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195{
196 assert(imm6 <= 63);
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198}
199
be516ebe 200static u_int imm16_rd(u_int imm16, u_int rd)
201{
202 assert(imm16 < 0x10000);
203 assert(rd < 31);
204 return (imm16 << 5) | rd;
205}
206
687b4580 207static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
208{
209 assert(imm12 < 0x1000);
d1e4ebd9 210 assert(rn < 32);
211 assert(rd < 32);
212 return (imm12 << 10) | (rn << 5) | rd;
213}
214
215static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
216{
217 assert(imm9 < 0x200);
687b4580 218 assert(rn < 31);
219 assert(rd < 31);
d1e4ebd9 220 return (imm9 << 12) | (rn << 5) | rd;
687b4580 221}
222
d1e4ebd9 223static u_int imm19_rt(u_int imm19, u_int rt)
224{
225 assert(imm19 < 0x80000);
226 assert(rt < 31);
227 return (imm19 << 5) | rt;
228}
229
230static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
231{
232 assert(n < 2);
233 assert(immr < 0x40);
234 assert(imms < 0x40);
235 assert(rn < 32);
236 assert(rd < 32);
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238}
239
240static u_int genjmp(const u_char *addr)
be516ebe 241{
242 intptr_t offset = addr - out;
d1e4ebd9 243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 244 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
be516ebe 247 return 0;
248 }
d1e4ebd9 249 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 250}
251
d1e4ebd9 252static u_int genjmpcc(const u_char *addr)
be516ebe 253{
254 intptr_t offset = addr - out;
d1e4ebd9 255 if ((uintptr_t)addr < 3) return 0;
be516ebe 256 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
258 abort();
259 return 0;
260 }
261 return ((u_int)offset >> 2) & 0x7ffff;
262}
263
264static uint32_t is_mask(u_int value)
265{
266 return value && ((value + 1) & value) == 0;
267}
268
269// This function returns true if the argument contains a
270// non-empty sequence of ones (possibly rotated) with the remainder zero.
271static uint32_t is_rotated_mask(u_int value)
272{
3968e69e 273 if (value == 0 || value == ~0)
be516ebe 274 return 0;
d1e4ebd9 275 if (is_mask((value - 1) | value))
276 return 1;
277 return is_mask((~value - 1) | ~value);
278}
279
280static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
281{
282 int lzeros, tzeros, ones;
283 assert(value != 0);
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
289 *imms = ones - 1;
290 return;
be516ebe 291 }
d1e4ebd9 292 value = ~value;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
3968e69e 297 *immr = lzeros;
d1e4ebd9 298 *imms = 31 - ones;
299 return;
300 }
3968e69e 301 abort();
be516ebe 302}
303
304static void emit_mov(u_int rs, u_int rt)
305{
687b4580 306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308}
309
310static void emit_mov64(u_int rs, u_int rt)
311{
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 314}
315
687b4580 316static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 317{
d1e4ebd9 318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 320}
321
d1e4ebd9 322static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 323{
d1e4ebd9 324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 326}
327
d1e4ebd9 328static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 329{
3968e69e 330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
332}
39b71d9a 333#define emit_adds_ptr emit_adds64
d1e4ebd9 334
335static void emit_neg(u_int rs, u_int rt)
336{
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 339}
340
687b4580 341static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 342{
d1e4ebd9 343 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 344 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 345}
346
3968e69e 347static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
348{
349 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
350 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
351}
352
d1e4ebd9 353static void emit_movz(u_int imm, u_int rt)
be516ebe 354{
d1e4ebd9 355 assem_debug("movz %s,#%#x\n", regname[rt], imm);
356 output_w32(0x52800000 | imm16_rd(imm, rt));
357}
358
359static void emit_movz_lsl16(u_int imm, u_int rt)
360{
361 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
362 output_w32(0x52a00000 | imm16_rd(imm, rt));
363}
364
365static void emit_movn(u_int imm, u_int rt)
366{
367 assem_debug("movn %s,#%#x\n", regname[rt], imm);
368 output_w32(0x12800000 | imm16_rd(imm, rt));
369}
370
371static void emit_movn_lsl16(u_int imm,u_int rt)
372{
373 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x12a00000 | imm16_rd(imm, rt));
375}
376
377static void emit_movk(u_int imm,u_int rt)
378{
379 assem_debug("movk %s,#%#x\n", regname[rt], imm);
380 output_w32(0x72800000 | imm16_rd(imm, rt));
381}
382
383static void emit_movk_lsl16(u_int imm,u_int rt)
384{
385 assert(imm<65536);
3968e69e 386 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 387 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 388}
389
390static void emit_zeroreg(u_int rt)
391{
d1e4ebd9 392 emit_movz(0, rt);
be516ebe 393}
394
be516ebe 395static void emit_movimm(u_int imm, u_int rt)
396{
d1e4ebd9 397 if (imm < 65536)
398 emit_movz(imm, rt);
399 else if ((~imm) < 65536)
400 emit_movn(~imm, rt);
401 else if ((imm&0xffff) == 0)
402 emit_movz_lsl16(imm >> 16, rt);
403 else if (((~imm)&0xffff) == 0)
404 emit_movn_lsl16(~imm >> 16, rt);
405 else if (is_rotated_mask(imm)) {
406 u_int immr, imms;
407 gen_logical_imm(imm, &immr, &imms);
408 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
409 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
410 }
be516ebe 411 else {
d1e4ebd9 412 emit_movz(imm & 0xffff, rt);
413 emit_movk_lsl16(imm >> 16, rt);
be516ebe 414 }
415}
416
687b4580 417static void emit_readword(void *addr, u_int rt)
418{
419 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
420 if (!(offset & 3) && offset <= 16380) {
421 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
422 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
423 }
424 else
3968e69e 425 abort();
687b4580 426}
427
d1e4ebd9 428static void emit_readdword(void *addr, u_int rt)
429{
430 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
431 if (!(offset & 7) && offset <= 32760) {
432 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
433 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
434 }
3968e69e 435 else
436 abort();
437}
39b71d9a 438#define emit_readptr emit_readdword
3968e69e 439
440static void emit_readshword(void *addr, u_int rt)
441{
442 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
443 if (!(offset & 1) && offset <= 8190) {
444 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
445 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
446 }
d1e4ebd9 447 else
448 assert(0);
449}
450
be516ebe 451static void emit_loadreg(u_int r, u_int hr)
452{
d1e4ebd9 453 int is64 = 0;
be516ebe 454 assert(r < 64);
455 if (r == 0)
456 emit_zeroreg(hr);
457 else {
7c3a5182 458 void *addr = &psxRegs.GPR.r[r];
be516ebe 459 switch (r) {
7c3a5182 460 //case HIREG: addr = &hi; break;
461 //case LOREG: addr = &lo; break;
be516ebe 462 case CCREG: addr = &cycle_count; break;
463 case CSREG: addr = &Status; break;
d1e4ebd9 464 case INVCP: addr = &invc_ptr; is64 = 1; break;
7c3a5182 465 default: assert(r < 34); break;
be516ebe 466 }
d1e4ebd9 467 if (is64)
468 emit_readdword(addr, hr);
469 else
470 emit_readword(addr, hr);
be516ebe 471 }
472}
473
687b4580 474static void emit_writeword(u_int rt, void *addr)
475{
476 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
477 if (!(offset & 3) && offset <= 16380) {
478 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
479 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
480 }
481 else
482 assert(0);
483}
484
d1e4ebd9 485static void emit_writedword(u_int rt, void *addr)
486{
487 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
488 if (!(offset & 7) && offset <= 32760) {
489 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 490 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 491 }
492 else
3968e69e 493 abort();
d1e4ebd9 494}
495
687b4580 496static void emit_storereg(u_int r, u_int hr)
be516ebe 497{
498 assert(r < 64);
7c3a5182 499 void *addr = &psxRegs.GPR.r[r];
be516ebe 500 switch (r) {
7c3a5182 501 //case HIREG: addr = &hi; break;
502 //case LOREG: addr = &lo; break;
be516ebe 503 case CCREG: addr = &cycle_count; break;
7c3a5182 504 default: assert(r < 34); break;
be516ebe 505 }
687b4580 506 emit_writeword(hr, addr);
be516ebe 507}
508
509static void emit_test(u_int rs, u_int rt)
510{
d1e4ebd9 511 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
512 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 513}
514
d1e4ebd9 515static void emit_testimm(u_int rs, u_int imm)
be516ebe 516{
d1e4ebd9 517 u_int immr, imms;
687b4580 518 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 519 assert(is_rotated_mask(imm)); // good enough for PCSX
520 gen_logical_imm(imm, &immr, &imms);
3968e69e 521 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 522}
523
524static void emit_not(u_int rs,u_int rt)
525{
526 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 527 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 528}
529
be516ebe 530static void emit_and(u_int rs1,u_int rs2,u_int rt)
531{
532 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 533 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 534}
535
536static void emit_or(u_int rs1,u_int rs2,u_int rt)
537{
538 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 539 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 540}
541
3968e69e 542static void emit_bic(u_int rs1,u_int rs2,u_int rt)
543{
544 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
545 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
546}
547
be516ebe 548static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
549{
be516ebe 550 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 551 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 552}
553
554static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
555{
be516ebe 556 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 557 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 558}
559
3968e69e 560static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
561{
562 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
563 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
564}
565
be516ebe 566static void emit_xor(u_int rs1,u_int rs2,u_int rt)
567{
568 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 569 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 570}
571
3968e69e 572static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
573{
574 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
575 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
576}
577
d1e4ebd9 578static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 579{
d1e4ebd9 580 unused const char *st = s ? "s" : "";
581 s = s ? 0x20000000 : 0;
582 is64 = is64 ? 0x80000000 : 0;
687b4580 583 if (imm < 4096) {
d1e4ebd9 584 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
585 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 586 }
587 else if (-imm < 4096) {
3968e69e 588 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 589 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
590 }
591 else if (imm < 16777216) {
592 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
593 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
594 if ((imm & 0xfff) || s) {
595 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 596 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 597 }
598 }
599 else if (-imm < 16777216) {
600 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
601 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
602 if ((imm & 0xfff) || s) {
603 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
604 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
605 }
687b4580 606 }
607 else
3968e69e 608 abort();
be516ebe 609}
610
d1e4ebd9 611static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
612{
613 emit_addimm_s(0, 0, rs, imm, rt);
614}
615
616static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
617{
618 emit_addimm_s(0, 1, rs, imm, rt);
619}
620
be516ebe 621static void emit_addimm_and_set_flags(int imm, u_int rt)
622{
d1e4ebd9 623 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 624}
625
626static void emit_addimm_no_flags(u_int imm,u_int rt)
627{
628 emit_addimm(rt,imm,rt);
629}
630
d1e4ebd9 631static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 632{
d1e4ebd9 633 const char *names[] = { "and", "orr", "eor", "ands" };
634 const char *name = names[op];
635 u_int immr, imms;
636 op = op << 29;
637 if (is_rotated_mask(imm)) {
638 gen_logical_imm(imm, &immr, &imms);
639 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
640 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
641 }
642 else {
643 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
644 host_tempreg_acquire();
645 emit_movimm(imm, HOST_TEMPREG);
646 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
647 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
648 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
649 host_tempreg_release();
650 }
651 (void)name;
be516ebe 652}
653
d1e4ebd9 654static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 655{
d1e4ebd9 656 if (imm == 0)
657 emit_zeroreg(rt);
658 else
659 emit_logicop_imm(0, rs, imm, rt);
be516ebe 660}
661
d1e4ebd9 662static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 663{
d1e4ebd9 664 if (imm == 0) {
665 if (rs != rt)
666 emit_mov(rs, rt);
667 }
668 else
669 emit_logicop_imm(1, rs, imm, rt);
be516ebe 670}
671
d1e4ebd9 672static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 673{
d1e4ebd9 674 if (imm == 0) {
675 if (rs != rt)
676 emit_mov(rs, rt);
677 }
678 else
679 emit_logicop_imm(2, rs, imm, rt);
be516ebe 680}
681
d1e4ebd9 682static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 683{
d1e4ebd9 684 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
685 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 686}
687
d1e4ebd9 688static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 689{
d1e4ebd9 690 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 692}
693
694static void emit_shlimm(u_int rs,u_int imm,u_int rt)
695{
be516ebe 696 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 698}
699
3968e69e 700static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 701{
3968e69e 702 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 704}
705
3968e69e 706static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 707{
be516ebe 708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 709 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 710}
711
712static void emit_sarimm(u_int rs,u_int imm,u_int rt)
713{
be516ebe 714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 715 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 716}
717
718static void emit_rorimm(u_int rs,u_int imm,u_int rt)
719{
3968e69e 720 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 721 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 722}
723
724static void emit_signextend16(u_int rs, u_int rt)
725{
726 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 727 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 728}
729
d1e4ebd9 730static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 731{
3968e69e 732 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 733 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 734}
735
d1e4ebd9 736static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 737{
d1e4ebd9 738 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 740}
741
d1e4ebd9 742static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 743{
d1e4ebd9 744 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 746}
747
d1e4ebd9 748static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 749{
d1e4ebd9 750 if (imm < 4096) {
751 assem_debug("cmp %s,%#x\n", regname[rs], imm);
752 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
753 }
754 else if (-imm < 4096) {
755 assem_debug("cmn %s,%#x\n", regname[rs], imm);
756 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
757 }
758 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 759 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 760 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
761 }
762 else {
763 host_tempreg_acquire();
764 emit_movimm(imm, HOST_TEMPREG);
765 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
766 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
767 host_tempreg_release();
768 }
be516ebe 769}
770
d1e4ebd9 771static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 772{
d1e4ebd9 773 assert(imm == 0 || imm == 1);
774 assert(cond0 < 0x10);
775 assert(cond1 < 0x10);
776 if (imm) {
777 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
778 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
779 } else {
780 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
781 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
782 }
be516ebe 783}
784
d1e4ebd9 785static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 786{
d1e4ebd9 787 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 788}
789
d1e4ebd9 790static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 791{
d1e4ebd9 792 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 793}
794
795static void emit_cmovb_imm(int imm,u_int rt)
796{
d1e4ebd9 797 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 798}
799
3968e69e 800static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 801{
3968e69e 802 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
803 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 804}
805
806static void emit_cmovne_reg(u_int rs,u_int rt)
807{
d1e4ebd9 808 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 810}
811
812static void emit_cmovl_reg(u_int rs,u_int rt)
813{
d1e4ebd9 814 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 816}
817
e3c6bdb5 818static void emit_cmovb_reg(u_int rs,u_int rt)
819{
820 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
822}
823
be516ebe 824static void emit_cmovs_reg(u_int rs,u_int rt)
825{
d1e4ebd9 826 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 828}
829
3968e69e 830static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
831{
832 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
833 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
834}
835
be516ebe 836static void emit_slti32(u_int rs,int imm,u_int rt)
837{
838 if(rs!=rt) emit_zeroreg(rt);
839 emit_cmpimm(rs,imm);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovl_imm(1,rt);
842}
843
844static void emit_sltiu32(u_int rs,int imm,u_int rt)
845{
846 if(rs!=rt) emit_zeroreg(rt);
847 emit_cmpimm(rs,imm);
848 if(rs==rt) emit_movimm(0,rt);
849 emit_cmovb_imm(1,rt);
850}
851
852static void emit_cmp(u_int rs,u_int rt)
853{
854 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 855 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 856}
857
858static void emit_set_gz32(u_int rs, u_int rt)
859{
860 //assem_debug("set_gz32\n");
861 emit_cmpimm(rs,1);
862 emit_movimm(1,rt);
863 emit_cmovl_imm(0,rt);
864}
865
866static void emit_set_nz32(u_int rs, u_int rt)
867{
868 //assem_debug("set_nz32\n");
d1e4ebd9 869 if(rs!=rt) emit_mov(rs,rt);
870 emit_test(rs,rs);
871 emit_cmovne_imm(1,rt);
be516ebe 872}
873
874static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
875{
876 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
878 emit_cmp(rs1,rs2);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovl_imm(1,rt);
881}
882
883static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
884{
885 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
887 emit_cmp(rs1,rs2);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovb_imm(1,rt);
890}
891
2a014d73 892static int can_jump_or_call(const void *a)
893{
894 intptr_t diff = (u_char *)a - out;
895 return (-134217728 <= diff && diff <= 134217727);
896}
897
d1e4ebd9 898static void emit_call(const void *a)
be516ebe 899{
d1e4ebd9 900 intptr_t diff = (u_char *)a - out;
901 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 902 assert(!(diff & 3));
903 if (-134217728 <= diff && diff <= 134217727)
904 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
905 else
3968e69e 906 abort();
be516ebe 907}
908
d1e4ebd9 909static void emit_jmp(const void *a)
be516ebe 910{
d1e4ebd9 911 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
912 u_int offset = genjmp(a);
913 output_w32(0x14000000 | offset);
be516ebe 914}
915
d1e4ebd9 916static void emit_jne(const void *a)
be516ebe 917{
d1e4ebd9 918 assem_debug("bne %p\n", a);
919 u_int offset = genjmpcc(a);
920 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 921}
922
7c3a5182 923static void emit_jeq(const void *a)
be516ebe 924{
d1e4ebd9 925 assem_debug("beq %p\n", a);
926 u_int offset = genjmpcc(a);
927 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 928}
929
7c3a5182 930static void emit_js(const void *a)
be516ebe 931{
d1e4ebd9 932 assem_debug("bmi %p\n", a);
933 u_int offset = genjmpcc(a);
934 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 935}
936
7c3a5182 937static void emit_jns(const void *a)
be516ebe 938{
d1e4ebd9 939 assem_debug("bpl %p\n", a);
940 u_int offset = genjmpcc(a);
941 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 942}
943
7c3a5182 944static void emit_jl(const void *a)
be516ebe 945{
d1e4ebd9 946 assem_debug("blt %p\n", a);
947 u_int offset = genjmpcc(a);
948 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 949}
950
7c3a5182 951static void emit_jge(const void *a)
be516ebe 952{
d1e4ebd9 953 assem_debug("bge %p\n", a);
954 u_int offset = genjmpcc(a);
955 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 956}
957
7c3a5182 958static void emit_jno(const void *a)
be516ebe 959{
d1e4ebd9 960 assem_debug("bvc %p\n", a);
961 u_int offset = genjmpcc(a);
962 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 963}
964
7c3a5182 965static void emit_jc(const void *a)
be516ebe 966{
d1e4ebd9 967 assem_debug("bcs %p\n", a);
968 u_int offset = genjmpcc(a);
969 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 970}
971
3968e69e 972static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 973{
3968e69e 974 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 975 u_int offset = genjmpcc(a);
3968e69e 976 is64 = is64 ? 0x80000000 : 0;
977 isnz = isnz ? 0x01000000 : 0;
978 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
979}
980
981static void emit_cbz(const void *a, u_int r)
982{
983 emit_cb(0, 0, a, r);
be516ebe 984}
985
986static void emit_jmpreg(u_int r)
987{
3968e69e 988 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 989 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 990}
991
992static void emit_retreg(u_int r)
993{
d1e4ebd9 994 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 995 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
996}
997
998static void emit_ret(void)
999{
1000 emit_retreg(LR);
1001}
1002
d1e4ebd9 1003static void emit_adr(void *addr, u_int rt)
1004{
1005 intptr_t offset = (u_char *)addr - out;
1006 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1007 assert(rt < 31);
d1e4ebd9 1008 assem_debug("adr x%d,#%#lx\n", rt, offset);
1009 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1010}
1011
3968e69e 1012static void emit_adrp(void *addr, u_int rt)
1013{
1014 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1015 assert(-4294967296l <= offset && offset < 4294967296l);
1016 assert(rt < 31);
1017 offset >>= 12;
1018 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1019 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1020}
1021
be516ebe 1022static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1023{
d1e4ebd9 1024 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1025 assert(-256 <= offset && offset < 256);
1026 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1027}
1028
1029static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1030{
1031 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1033}
1034
1035static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1036{
1037 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1039}
1040
1041static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1042{
1043 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1045}
1046
1047static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1048{
1049 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1050 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1051}
39b71d9a 1052#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
d1e4ebd9 1053
1054static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1055{
1056 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1058}
1059
1060static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1061{
1062 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1064}
1065
1066static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1067{
1068 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1070}
1071
1072static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1073{
1074 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1075 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1076}
1077
1078static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1079{
1080 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1081 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1082}
1083
be516ebe 1084static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1085{
d1e4ebd9 1086 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1087 assert(-256 <= offset && offset < 256);
1088 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1089}
1090
1091static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1092{
d1e4ebd9 1093 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1094 assert(-256 <= offset && offset < 256);
1095 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1096}
1097
1098static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1099{
d1e4ebd9 1100 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1103}
1104
1105static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1106{
d1e4ebd9 1107 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1108 assert(-256 <= offset && offset < 256);
1109 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1110}
1111
be516ebe 1112static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1113{
3968e69e 1114 if (!(offset & 3) && (u_int)offset <= 16380) {
1115 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1116 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1117 }
1118 else if (-256 <= offset && offset < 256) {
1119 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1120 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1121 }
687b4580 1122 else
1123 assert(0);
be516ebe 1124}
1125
1126static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1127{
3968e69e 1128 if (!(offset & 1) && (u_int)offset <= 8190) {
1129 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1130 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1131 }
1132 else if (-256 <= offset && offset < 256) {
1133 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1134 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1135 }
687b4580 1136 else
1137 assert(0);
be516ebe 1138}
1139
1140static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1141{
3968e69e 1142 if ((u_int)offset < 4096) {
1143 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1144 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1145 }
1146 else if (-256 <= offset && offset < 256) {
1147 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1148 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1149 }
687b4580 1150 else
1151 assert(0);
be516ebe 1152}
1153
3968e69e 1154static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1155{
3968e69e 1156 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1157 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1158}
1159
3968e69e 1160static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1161{
3968e69e 1162 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1163 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1164}
1165
1166static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1167{
1168 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1169 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1170}
1171
1172static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1173{
1174 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1176}
1177
3968e69e 1178static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1179{
1180 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1182}
1183
1184static void emit_clz(u_int rs, u_int rt)
be516ebe 1185{
1186 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1187 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1188}
1189
be516ebe 1190// special case for checking invalid_code
d1e4ebd9 1191static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1192{
d1e4ebd9 1193 host_tempreg_acquire();
1194 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1195 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1196 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1197 emit_cmpimm(HOST_TEMPREG, imm);
1198 host_tempreg_release();
be516ebe 1199}
1200
3968e69e 1201// special for loadlr_assemble, rs2 is destroyed
1202static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1203{
3968e69e 1204 emit_shl(rs2, shift, rs2);
1205 emit_bic(rs1, rs2, rt);
be516ebe 1206}
1207
3968e69e 1208static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1209{
3968e69e 1210 emit_shr(rs2, shift, rs2);
1211 emit_bic(rs1, rs2, rt);
be516ebe 1212}
1213
d1e4ebd9 1214static void emit_loadlp_ofs(u_int ofs, u_int rt)
1215{
1216 output_w32(0x58000000 | imm19_rt(ofs, rt));
1217}
1218
687b4580 1219static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1220{
687b4580 1221 u_int op = 0xb9000000;
d1e4ebd9 1222 unused const char *ldst = is_st ? "st" : "ld";
1223 unused char rp = is64 ? 'x' : 'w';
687b4580 1224 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1225 is64 = is64 ? 1 : 0;
1226 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1227 ofs = (ofs >> (2+is64));
687b4580 1228 if (!is_st) op |= 0x00400000;
1229 if (is64) op |= 0x40000000;
d1e4ebd9 1230 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1231}
1232
687b4580 1233static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1234{
687b4580 1235 u_int op = 0x29000000;
d1e4ebd9 1236 unused const char *ldst = is_st ? "st" : "ld";
1237 unused char rp = is64 ? 'x' : 'w';
687b4580 1238 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1239 is64 = is64 ? 1 : 0;
1240 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1241 ofs = (ofs >> (2+is64));
1242 assert(-64 <= ofs && ofs <= 63);
1243 ofs &= 0x7f;
1244 if (!is_st) op |= 0x00400000;
1245 if (is64) op |= 0x80000000;
d1e4ebd9 1246 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1247}
1248
1249static void save_load_regs_all(int is_store, u_int reglist)
1250{
1251 int ofs = 0, c = 0;
1252 u_int r, pair[2];
1253 for (r = 0; reglist; r++, reglist >>= 1) {
1254 if (reglist & 1)
1255 pair[c++] = r;
1256 if (c == 2) {
1257 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1258 ofs += 8 * 2;
1259 c = 0;
1260 }
1261 }
1262 if (c) {
1263 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1264 ofs += 8;
1265 }
1266 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1267}
1268
1269// Save registers before function call
1270static void save_regs(u_int reglist)
1271{
1272 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1273 save_load_regs_all(1, reglist);
be516ebe 1274}
1275
1276// Restore registers after function call
1277static void restore_regs(u_int reglist)
1278{
1279 reglist &= CALLER_SAVE_REGS;
687b4580 1280 save_load_regs_all(0, reglist);
be516ebe 1281}
1282
1283/* Stubs/epilogue */
1284
1285static void literal_pool(int n)
1286{
1287 (void)literals;
1288}
1289
1290static void literal_pool_jumpover(int n)
1291{
1292}
1293
d1e4ebd9 1294// parsed by get_pointer, find_extjump_insn
1295static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1296{
d1e4ebd9 1297 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1298
d1e4ebd9 1299 emit_movz(target & 0xffff, 0);
1300 emit_movk_lsl16(target >> 16, 0);
1301
1302 // addr is in the current recompiled block (max 256k)
1303 // offset shouldn't exceed +/-1MB
1304 emit_adr(addr, 1);
2a014d73 1305 emit_far_jump(linker);
be516ebe 1306}
1307
d1e4ebd9 1308static void check_extjump2(void *src)
be516ebe 1309{
d1e4ebd9 1310 u_int *ptr = src;
1311 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1312 (void)ptr;
be516ebe 1313}
1314
1315// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1316static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1317{
d1e4ebd9 1318 int diff = rt_val - rs_val;
3968e69e 1319 if ((-4096 < diff && diff < 4096)
1320 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1321 emit_addimm(rs, diff, rt);
3968e69e 1322 else if (rt_val == ~rs_val)
1323 emit_not(rs, rt);
d1e4ebd9 1324 else if (is_rotated_mask(rs_val ^ rt_val))
1325 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1326 else
d1e4ebd9 1327 emit_movimm(rt_val, rt);
be516ebe 1328}
1329
d1e4ebd9 1330// return 1 if the above function can do it's job cheaply
687b4580 1331static int is_similar_value(u_int v1, u_int v2)
be516ebe 1332{
687b4580 1333 int diff = v1 - v2;
3968e69e 1334 return (-4096 < diff && diff < 4096)
1335 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1336 || v1 == ~v2
d1e4ebd9 1337 || is_rotated_mask(v1 ^ v2);
1338}
1339
1340// trashes r2
1341static void pass_args64(u_int a0, u_int a1)
1342{
1343 if(a0==1&&a1==0) {
1344 // must swap
1345 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1346 }
1347 else if(a0!=0&&a1==0) {
1348 emit_mov64(a1,1);
1349 if (a0>=0) emit_mov64(a0,0);
1350 }
1351 else {
1352 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1353 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1354 }
be516ebe 1355}
1356
d1e4ebd9 1357static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1358{
1359 switch(type) {
1360 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1361 case LOADBU_STUB:
1362 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1363 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1364 case LOADHU_STUB:
1365 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1366 case LOADW_STUB:
1367 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1368 default: assert(0);
d1e4ebd9 1369 }
1370}
1371
1372#include "pcsxmem.h"
be516ebe 1373//#include "pcsxmem_inline.c"
1374
1375static void do_readstub(int n)
1376{
1377 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1378 set_jump_target(stubs[n].addr, out);
1379 enum stub_type type = stubs[n].type;
1380 int i = stubs[n].a;
1381 int rs = stubs[n].b;
1382 const struct regstat *i_regs = (void *)stubs[n].c;
1383 u_int reglist = stubs[n].e;
1384 const signed char *i_regmap = i_regs->regmap;
1385 int rt;
cf95b4f0 1386 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
d1e4ebd9 1387 rt=get_reg(i_regmap,FTEMP);
1388 }else{
cf95b4f0 1389 rt=get_reg(i_regmap,dops[i].rt1);
d1e4ebd9 1390 }
1391 assert(rs>=0);
1392 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1393 void *restore_jump = NULL, *handler_jump = NULL;
1394 reglist|=(1<<rs);
1395 for (r = 0; r < HOST_CCREG; r++) {
1396 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1397 temp = r;
1398 break;
1399 }
1400 }
cf95b4f0 1401 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1402 reglist&=~(1<<rt);
1403 if(temp==-1) {
1404 save_regs(reglist);
1405 regs_saved=1;
1406 temp=(rs==0)?2:0;
1407 }
1408 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1409 temp2=1;
1410 emit_readdword(&mem_rtab,temp);
1411 emit_shrimm(rs,12,temp2);
1412 emit_readdword_dualindexedx8(temp,temp2,temp2);
1413 emit_adds64(temp2,temp2,temp2);
1414 handler_jump=out;
1415 emit_jc(0);
cf95b4f0 1416 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1417 switch(type) {
1418 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1419 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1420 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1421 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1422 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1423 default: assert(0);
d1e4ebd9 1424 }
1425 }
1426 if(regs_saved) {
1427 restore_jump=out;
1428 emit_jmp(0); // jump to reg restore
1429 }
1430 else
1431 emit_jmp(stubs[n].retaddr); // return address
1432 set_jump_target(handler_jump, out);
1433
1434 if(!regs_saved)
1435 save_regs(reglist);
1436 void *handler=NULL;
1437 if(type==LOADB_STUB||type==LOADBU_STUB)
1438 handler=jump_handler_read8;
1439 if(type==LOADH_STUB||type==LOADHU_STUB)
1440 handler=jump_handler_read16;
1441 if(type==LOADW_STUB)
1442 handler=jump_handler_read32;
1443 assert(handler);
1444 pass_args64(rs,temp2);
1445 int cc=get_reg(i_regmap,CCREG);
1446 if(cc<0)
1447 emit_loadreg(CCREG,2);
bb4f300c 1448 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1449 emit_far_call(handler);
d1e4ebd9 1450 // (no cycle reload after read)
cf95b4f0 1451 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
d1e4ebd9 1452 loadstore_extend(type,0,rt);
1453 }
1454 if(restore_jump)
1455 set_jump_target(restore_jump, out);
1456 restore_regs(reglist);
1457 emit_jmp(stubs[n].retaddr);
be516ebe 1458}
1459
81dbbf4c 1460static void inline_readstub(enum stub_type type, int i, u_int addr,
1461 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1462{
d1e4ebd9 1463 int rs=get_reg(regmap,target);
1464 int rt=get_reg(regmap,target);
1465 if(rs<0) rs=get_reg(regmap,-1);
1466 assert(rs>=0);
1467 u_int is_dynamic=0;
1468 uintptr_t host_addr = 0;
1469 void *handler;
1470 int cc=get_reg(regmap,CCREG);
bb4f300c 1471 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
d1e4ebd9 1472 // return;
1473 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1474 if (handler == NULL) {
cf95b4f0 1475 if(rt<0||dops[i].rt1==0)
d1e4ebd9 1476 return;
1477 if (addr != host_addr) {
1478 if (host_addr >= 0x100000000ull)
1479 abort(); // ROREG not implemented
1480 emit_movimm_from(addr, rs, host_addr, rs);
1481 }
1482 switch(type) {
1483 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1484 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1485 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1486 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1487 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1488 default: assert(0);
1489 }
1490 return;
1491 }
1492 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1493 if(is_dynamic) {
1494 if(type==LOADB_STUB||type==LOADBU_STUB)
1495 handler=jump_handler_read8;
1496 if(type==LOADH_STUB||type==LOADHU_STUB)
1497 handler=jump_handler_read16;
1498 if(type==LOADW_STUB)
1499 handler=jump_handler_read32;
1500 }
1501
1502 // call a memhandler
cf95b4f0 1503 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1504 reglist&=~(1<<rt);
1505 save_regs(reglist);
1506 if(target==0)
1507 emit_movimm(addr,0);
1508 else if(rs!=0)
1509 emit_mov(rs,0);
1510 if(cc<0)
1511 emit_loadreg(CCREG,2);
bb4f300c 1512 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
3968e69e 1513 if(is_dynamic) {
1514 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1515 emit_adrp((void *)l1, 1);
1516 emit_addimm64(1, l1 & 0xfff, 1);
1517 }
d1e4ebd9 1518 else
2a014d73 1519 emit_far_call(do_memhandler_pre);
d1e4ebd9 1520
2a014d73 1521 emit_far_call(handler);
d1e4ebd9 1522
1523 // (no cycle reload after read)
cf95b4f0 1524 if(rt>=0&&dops[i].rt1!=0)
d1e4ebd9 1525 loadstore_extend(type, 0, rt);
1526 restore_regs(reglist);
be516ebe 1527}
1528
1529static void do_writestub(int n)
1530{
1531 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1532 set_jump_target(stubs[n].addr, out);
1533 enum stub_type type=stubs[n].type;
1534 int i=stubs[n].a;
1535 int rs=stubs[n].b;
1536 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1537 u_int reglist=stubs[n].e;
1538 signed char *i_regmap=i_regs->regmap;
1539 int rt,r;
cf95b4f0 1540 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
d1e4ebd9 1541 rt=get_reg(i_regmap,r=FTEMP);
1542 }else{
cf95b4f0 1543 rt=get_reg(i_regmap,r=dops[i].rs2);
d1e4ebd9 1544 }
1545 assert(rs>=0);
1546 assert(rt>=0);
1547 int rtmp,temp=-1,temp2,regs_saved=0;
1548 void *restore_jump = NULL, *handler_jump = NULL;
1549 int reglist2=reglist|(1<<rs)|(1<<rt);
1550 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1551 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1552 temp = rtmp;
1553 break;
1554 }
1555 }
1556 if(temp==-1) {
1557 save_regs(reglist);
1558 regs_saved=1;
1559 for(rtmp=0;rtmp<=3;rtmp++)
1560 if(rtmp!=rs&&rtmp!=rt)
1561 {temp=rtmp;break;}
1562 }
1563 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1564 temp2=3;
1565 else {
1566 host_tempreg_acquire();
1567 temp2=HOST_TEMPREG;
1568 }
1569 emit_readdword(&mem_wtab,temp);
1570 emit_shrimm(rs,12,temp2);
1571 emit_readdword_dualindexedx8(temp,temp2,temp2);
1572 emit_adds64(temp2,temp2,temp2);
1573 handler_jump=out;
1574 emit_jc(0);
1575 switch(type) {
1576 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1577 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1578 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1579 default: assert(0);
1580 }
1581 if(regs_saved) {
1582 restore_jump=out;
1583 emit_jmp(0); // jump to reg restore
1584 }
1585 else
1586 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1587 set_jump_target(handler_jump, out);
1588
1589 // TODO FIXME: regalloc should prefer callee-saved regs
1590 if(!regs_saved)
1591 save_regs(reglist);
1592 void *handler=NULL;
1593 switch(type) {
1594 case STOREB_STUB: handler=jump_handler_write8; break;
1595 case STOREH_STUB: handler=jump_handler_write16; break;
1596 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1597 default: assert(0);
d1e4ebd9 1598 }
1599 assert(handler);
1600 pass_args(rs,rt);
1601 if(temp2!=3) {
1602 emit_mov64(temp2,3);
1603 host_tempreg_release();
1604 }
1605 int cc=get_reg(i_regmap,CCREG);
1606 if(cc<0)
1607 emit_loadreg(CCREG,2);
bb4f300c 1608 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
d1e4ebd9 1609 // returns new cycle_count
2a014d73 1610 emit_far_call(handler);
bb4f300c 1611 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
d1e4ebd9 1612 if(cc<0)
1613 emit_storereg(CCREG,2);
1614 if(restore_jump)
1615 set_jump_target(restore_jump, out);
1616 restore_regs(reglist);
1617 emit_jmp(stubs[n].retaddr);
be516ebe 1618}
1619
81dbbf4c 1620static void inline_writestub(enum stub_type type, int i, u_int addr,
1621 const signed char regmap[], int target, int adj, u_int reglist)
be516ebe 1622{
687b4580 1623 int rs = get_reg(regmap,-1);
1624 int rt = get_reg(regmap,target);
1625 assert(rs >= 0);
1626 assert(rt >= 0);
1627 uintptr_t host_addr = 0;
1628 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1629 if (handler == NULL) {
d1e4ebd9 1630 if (addr != host_addr) {
1631 if (host_addr >= 0x100000000ull)
1632 abort(); // ROREG not implemented
687b4580 1633 emit_movimm_from(addr, rs, host_addr, rs);
d1e4ebd9 1634 }
1635 switch (type) {
687b4580 1636 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1637 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1638 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1639 default: assert(0);
1640 }
1641 return;
1642 }
1643
1644 // call a memhandler
1645 save_regs(reglist);
687b4580 1646 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1647 loadstore_extend(type, rt, 0);
1648 int cc, cc_use;
1649 cc = cc_use = get_reg(regmap, CCREG);
1650 if (cc < 0)
1651 emit_loadreg(CCREG, (cc_use = 2));
bb4f300c 1652 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
d1e4ebd9 1653
2a014d73 1654 emit_far_call(do_memhandler_pre);
1655 emit_far_call(handler);
1656 emit_far_call(do_memhandler_post);
bb4f300c 1657 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
d1e4ebd9 1658 if (cc < 0)
1659 emit_storereg(CCREG, cc_use);
687b4580 1660 restore_regs(reglist);
be516ebe 1661}
1662
3968e69e 1663static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1664{
3968e69e 1665 int ret = memcmp(source, copy, size);
1666 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1667 return ret;
1668}
1669
1670// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1671static void do_dirty_stub_base(u_int vaddr, u_int source_len)
3968e69e 1672{
3d680478 1673 assert(source_len <= MAXBLOCK*4);
3968e69e 1674 emit_loadlp_ofs(0, 0); // ldr x1, source
1675 emit_loadlp_ofs(0, 1); // ldr x2, copy
3d680478 1676 emit_movz(source_len, 2);
2a014d73 1677 emit_far_call(verify_code_arm64);
3968e69e 1678 void *jmp = out;
1679 emit_cbz(0, 0);
1680 emit_movz(vaddr & 0xffff, 0);
1681 emit_movk_lsl16(vaddr >> 16, 0);
2a014d73 1682 emit_far_call(get_addr);
3968e69e 1683 emit_jmpreg(0);
1684 set_jump_target(jmp, out);
1685}
1686
1687static void assert_dirty_stub(const u_int *ptr)
1688{
1689 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1690 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
3d680478 1691 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
3968e69e 1692 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1693}
1694
d1e4ebd9 1695static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1696{
d1e4ebd9 1697 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1698 assert((*loadl & ~0x1f) == 0x58000000);
1699 assert((ofs & 3) == 0);
1700 assert(ofs < 0x100000);
1701 *loadl |= (ofs >> 2) << 5;
1702}
1703
d1e4ebd9 1704static void do_dirty_stub_emit_literals(u_int *loadlps)
1705{
1706 set_loadlp(&loadlps[0], out);
1707 output_w64((uintptr_t)source);
1708 set_loadlp(&loadlps[1], out);
1709 output_w64((uintptr_t)copy);
be516ebe 1710}
1711
3d680478 1712static void *do_dirty_stub(int i, u_int source_len)
be516ebe 1713{
1714 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1715 u_int *loadlps = (void *)out;
3d680478 1716 do_dirty_stub_base(start + i*4, source_len);
d1e4ebd9 1717 void *entry = out;
be516ebe 1718 load_regs_entry(i);
d1e4ebd9 1719 if (entry == out)
1720 entry = instr_addr[i];
1721 emit_jmp(instr_addr[i]);
1722 do_dirty_stub_emit_literals(loadlps);
1723 return entry;
be516ebe 1724}
1725
3d680478 1726static void do_dirty_stub_ds(u_int source_len)
be516ebe 1727{
d1e4ebd9 1728 u_int *loadlps = (void *)out;
3d680478 1729 do_dirty_stub_base(start + 1, source_len);
3968e69e 1730 void *lit_jumpover = out;
d1e4ebd9 1731 emit_jmp(out + 8*2);
1732 do_dirty_stub_emit_literals(loadlps);
3968e69e 1733 set_jump_target(lit_jumpover, out);
be516ebe 1734}
1735
3968e69e 1736static uint64_t get_from_ldr_literal(const u_int *i)
1737{
1738 signed int ofs;
1739 assert((i[0] & 0xff000000) == 0x58000000);
1740 ofs = i[0] << 8;
1741 ofs >>= 5+8;
1742 return *(uint64_t *)(i + ofs);
1743}
be516ebe 1744
3968e69e 1745static uint64_t get_from_movz(const u_int *i)
1746{
1747 assert((i[0] & 0x7fe00000) == 0x52800000);
1748 return (i[0] >> 5) & 0xffff;
1749}
be516ebe 1750
3968e69e 1751// Find the "clean" entry point from a "dirty" entry point
1752// by skipping past the call to verify_code
1753static void *get_clean_addr(u_int *addr)
be516ebe 1754{
3968e69e 1755 assert_dirty_stub(addr);
1756 return addr + 9;
be516ebe 1757}
be516ebe 1758
3968e69e 1759static int verify_dirty(const u_int *ptr)
be516ebe 1760{
3968e69e 1761 const void *source, *copy;
1762 u_int len;
1763 assert_dirty_stub(ptr);
1764 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1765 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
3d680478 1766 len = get_from_movz(&ptr[2]); // movz w3, #source_len
3968e69e 1767 return !memcmp(source, copy, len);
1768}
1769
1770static int isclean(void *addr)
1771{
1772 const u_int *ptr = addr;
1773 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1774 assert_dirty_stub(ptr);
1775 return 0;
1776 }
1777 return 1;
1778}
1779
1780// get source that block at addr was compiled from (host pointers)
1781static void get_bounds(void *addr, u_char **start, u_char **end)
1782{
1783 const u_int *ptr = addr;
1784 assert_dirty_stub(ptr);
1785 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
3d680478 1786 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
3968e69e 1787}
1788
1789/* Special assem */
1790
81dbbf4c 1791static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
3968e69e 1792{
1793 save_load_regs_all(1, reglist);
32631e6a 1794 cop2_do_stall_check(op, i, i_regs, 0);
3968e69e 1795#ifdef PCNT
1796 emit_movimm(op, 0);
2a014d73 1797 emit_far_call(pcnt_gte_start);
3968e69e 1798#endif
1799 // pointer to cop2 regs
1800 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1801}
1802
1803static void c2op_epilogue(u_int op,u_int reglist)
1804{
1805#ifdef PCNT
1806 emit_movimm(op, 0);
2a014d73 1807 emit_far_call(pcnt_gte_end);
3968e69e 1808#endif
1809 save_load_regs_all(0, reglist);
be516ebe 1810}
1811
81dbbf4c 1812static void c2op_assemble(int i, const struct regstat *i_regs)
be516ebe 1813{
3968e69e 1814 u_int c2op=source[i]&0x3f;
1815 u_int hr,reglist_full=0,reglist;
1816 int need_flags,need_ir;
1817 for(hr=0;hr<HOST_REGS;hr++) {
1818 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1819 }
1820 reglist=reglist_full&CALLER_SAVE_REGS;
1821
1822 if (gte_handlers[c2op]!=NULL) {
1823 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1824 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1825 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1826 source[i],gte_unneeded[i+1],need_flags,need_ir);
d62c125a 1827 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
3968e69e 1828 need_flags=0;
1829 //int shift = (source[i] >> 19) & 1;
1830 //int lm = (source[i] >> 10) & 1;
1831 switch(c2op) {
1832 default:
1833 (void)need_ir;
81dbbf4c 1834 c2op_prologue(c2op, i, i_regs, reglist);
3968e69e 1835 emit_movimm(source[i],1); // opcode
1836 emit_writeword(1,&psxRegs.code);
2a014d73 1837 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3968e69e 1838 break;
1839 }
1840 c2op_epilogue(c2op,reglist);
1841 }
1842}
1843
1844static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1845{
1846 //value = value & 0x7ffff000;
1847 //if (value & 0x7f87e000) value |= 0x80000000;
1848 emit_andimm(sl, 0x7fffe000, temp);
1849 emit_testimm(temp, 0xff87ffff);
1850 emit_andimm(sl, 0x7ffff000, temp);
1851 host_tempreg_acquire();
1852 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1853 emit_cmovne_reg(HOST_TEMPREG, temp);
1854 host_tempreg_release();
1855 assert(0); // testing needed
1856}
1857
1858static void do_mfc2_31_one(u_int copr,signed char temp)
1859{
1860 emit_readshword(&reg_cop2d[copr],temp);
1861 emit_bicsar_imm(temp,31,temp);
1862 emit_cmpimm(temp,0xf80);
1863 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1864 emit_andimm(temp,0xf80,temp);
1865}
1866
1867static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1868{
1869 if (temp < 0) {
1870 host_tempreg_acquire();
1871 temp = HOST_TEMPREG;
1872 }
1873 do_mfc2_31_one(9,temp);
1874 emit_shrimm(temp,7,tl);
1875 do_mfc2_31_one(10,temp);
1876 emit_orrshr_imm(temp,2,tl);
1877 do_mfc2_31_one(11,temp);
1878 emit_orrshl_imm(temp,3,tl);
1879 emit_writeword(tl,&reg_cop2d[29]);
1880
1881 if (temp == HOST_TEMPREG)
1882 host_tempreg_release();
be516ebe 1883}
1884
1885static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1886{
3968e69e 1887 // case 0x18: MULT
1888 // case 0x19: MULTU
1889 // case 0x1A: DIV
1890 // case 0x1B: DIVU
cf95b4f0 1891 if(dops[i].rs1&&dops[i].rs2)
3968e69e 1892 {
cf95b4f0 1893 switch(dops[i].opcode2)
3968e69e 1894 {
1895 case 0x18: // MULT
1896 case 0x19: // MULTU
1897 {
cf95b4f0 1898 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1899 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1900 signed char hi=get_reg(i_regs->regmap,HIREG);
1901 signed char lo=get_reg(i_regs->regmap,LOREG);
1902 assert(m1>=0);
1903 assert(m2>=0);
1904 assert(hi>=0);
1905 assert(lo>=0);
1906
cf95b4f0 1907 if(dops[i].opcode2==0x18) // MULT
3968e69e 1908 emit_smull(m1,m2,hi);
1909 else // MULTU
1910 emit_umull(m1,m2,hi);
1911
1912 emit_mov(hi,lo);
1913 emit_shrimm64(hi,32,hi);
1914 break;
1915 }
1916 case 0x1A: // DIV
1917 case 0x1B: // DIVU
1918 {
cf95b4f0 1919 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1920 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
3968e69e 1921 signed char quotient=get_reg(i_regs->regmap,LOREG);
1922 signed char remainder=get_reg(i_regs->regmap,HIREG);
1923 assert(numerator>=0);
1924 assert(denominator>=0);
1925 assert(quotient>=0);
1926 assert(remainder>=0);
1927
cf95b4f0 1928 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1929 emit_sdiv(numerator,denominator,quotient);
1930 else // DIVU
1931 emit_udiv(numerator,denominator,quotient);
1932 emit_msub(quotient,denominator,numerator,remainder);
1933
1934 // div 0 quotient (remainder is already correct)
1935 host_tempreg_acquire();
cf95b4f0 1936 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1937 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1938 else
1939 emit_movimm(~0,HOST_TEMPREG);
1940 emit_test(denominator,denominator);
1941 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1942 host_tempreg_release();
1943 break;
1944 }
1945 default:
1946 assert(0);
1947 }
1948 }
1949 else
1950 {
1951 signed char hr=get_reg(i_regs->regmap,HIREG);
1952 signed char lr=get_reg(i_regs->regmap,LOREG);
cf95b4f0 1953 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
3968e69e 1954 {
cf95b4f0 1955 if (dops[i].rs1) {
1956 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
3968e69e 1957 assert(numerator >= 0);
1958 if (hr >= 0)
1959 emit_mov(numerator,hr);
1960 if (lr >= 0) {
cf95b4f0 1961 if (dops[i].opcode2 == 0x1A) // DIV
3968e69e 1962 emit_sub_asrimm(0,numerator,31,lr);
1963 else
1964 emit_movimm(~0,lr);
1965 }
1966 }
1967 else {
1968 if (hr >= 0) emit_zeroreg(hr);
1969 if (lr >= 0) emit_movimm(~0,lr);
1970 }
1971 }
1972 else
1973 {
1974 // Multiply by zero is zero.
1975 if (hr >= 0) emit_zeroreg(hr);
1976 if (lr >= 0) emit_zeroreg(lr);
1977 }
1978 }
be516ebe 1979}
1980#define multdiv_assemble multdiv_assemble_arm64
1981
d1e4ebd9 1982static void do_jump_vaddr(u_int rs)
1983{
1984 if (rs != 0)
1985 emit_mov(rs, 0);
2a014d73 1986 emit_far_call(get_addr_ht);
d1e4ebd9 1987 emit_jmpreg(0);
1988}
1989
be516ebe 1990static void do_preload_rhash(u_int r) {
1991 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1992 // register. On ARM the hash can be done with a single instruction (below)
1993}
1994
1995static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1996 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1997}
1998
1999static void do_rhash(u_int rs,u_int rh) {
2000 emit_andimm(rs, 0xf8, rh);
2001}
2002
d1e4ebd9 2003static void do_miniht_load(int ht, u_int rh) {
2004 emit_add64(ht, rh, ht);
2005 emit_ldst(0, 0, rh, ht, 0);
be516ebe 2006}
2007
d1e4ebd9 2008static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2009 emit_cmp(rh, rs);
2010 void *jaddr = out;
2011 emit_jeq(0);
2012 do_jump_vaddr(rs);
2013
2014 set_jump_target(jaddr, out);
2015 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2016 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2017 emit_jmpreg(ht);
be516ebe 2018}
2019
d1e4ebd9 2020// parsed by set_jump_target?
be516ebe 2021static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2022 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2023 emit_movk(return_address&0xffff,rt);
2024 add_to_linker(out,return_address,1);
2025 emit_adr(out,temp);
2026 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2027 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2028}
2029
919981d0 2030static void clear_cache_arm64(char *start, char *end)
be516ebe 2031{
919981d0 2032 // Don't rely on GCC's __clear_cache implementation, as it caches
2033 // icache/dcache cache line sizes, that can vary between cores on
2034 // big.LITTLE architectures.
2035 uint64_t addr, ctr_el0;
2036 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2037 size_t isize, dsize;
2038
2039 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2040 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2041 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2042
2043 // use the global minimum cache line size
2044 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2045 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2046
2047 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2048 not required for instruction to data coherence. */
2049 if ((ctr_el0 & (1 << 28)) == 0x0) {
2050 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2051 for (; addr < (uint64_t)end; addr += dsize)
2052 // use "civac" instead of "cvau", as this is the suggested workaround for
2053 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2054 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
be516ebe 2055 }
919981d0 2056 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2057
919981d0 2058 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2059 Unification is not required for instruction to data coherence. */
2060 if ((ctr_el0 & (1 << 29)) == 0x0) {
2061 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2062 for (; addr < (uint64_t)end; addr += isize)
2063 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2064
2065 __asm__ volatile("dsb ish" : : : "memory");
be516ebe 2066 }
919981d0 2067
2068 __asm__ volatile("isb" : : : "memory");
be516ebe 2069}
2070
2071// CPU-architecture-specific initialization
2a014d73 2072static void arch_init(void)
2073{
2074 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2075 struct tramp_insns *ops = ndrc->tramp.ops;
2076 size_t i;
2077 assert(!(diff & 3));
2078 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2079 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2080 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2081 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2082 }
2083 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
be516ebe 2084}
2085
2086// vim:shiftwidth=2:expandtab