drc: update according to the interpreter (3)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26/* Linker */
27static void set_jump_target(void *addr, void *target)
28{
29 u_int *ptr = NDRC_WRITE_OFFSET(addr);
30 intptr_t offset = (u_char *)target - (u_char *)addr;
31
32 if ((*ptr&0xFC000000) == 0x14000000) { // b
33 assert(offset>=-134217728LL&&offset<134217728LL);
34 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
35 }
36 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
37 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
38 // Conditional branch are limited to +/- 1MB
39 // block max size is 256k so branching beyond the +/- 1MB limit
40 // should only happen when jumping to an already compiled block (see add_jump_out)
41 // a workaround would be to do a trampoline jump via a stub at the end of the block
42 assert(-1048576 <= offset && offset < 1048576);
43 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
44 }
45 else if((*ptr&0x9f000000)==0x10000000) { // adr
46 // generated by do_miniht_insert
47 assert(offset>=-1048576LL&&offset<1048576LL);
48 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
49 }
50 else
51 abort(); // should not happen
52}
53
54// from a pointer to external jump stub (which was produced by emit_extjump2)
55// find where the jumping insn is
56static void *find_extjump_insn(void *stub)
57{
58 int *ptr = (int *)stub + 2;
59 assert((*ptr&0x9f000000) == 0x10000000); // adr
60 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
61 return ptr + offset / 4;
62}
63
64#if 0
65// find where external branch is liked to using addr of it's stub:
66// get address that the stub loads (dyna_linker arg1),
67// treat it as a pointer to branch insn,
68// return addr where that branch jumps to
69static void *get_pointer(void *stub)
70{
71 int *i_ptr = find_extjump_insn(stub);
72 if ((*i_ptr&0xfc000000) == 0x14000000) // b
73 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
74 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
75 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
76 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
77 assert(0);
78 return NULL;
79}
80#endif
81
82// Allocate a specific ARM register.
83static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
84{
85 int n;
86 int dirty=0;
87
88 // see if it's already allocated (and dealloc it)
89 for(n=0;n<HOST_REGS;n++)
90 {
91 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
92 dirty=(cur->dirty>>n)&1;
93 cur->regmap[n]=-1;
94 }
95 }
96
97 cur->regmap[hr]=reg;
98 cur->dirty&=~(1<<hr);
99 cur->dirty|=dirty<<hr;
100 cur->isconst&=~(1<<hr);
101}
102
103// Alloc cycle count into dedicated register
104static void alloc_cc(struct regstat *cur,int i)
105{
106 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
107}
108
109/* Special alloc */
110
111
112/* Assembler */
113
114static unused const char *regname[32] = {
115 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
116 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
117 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
118 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
119};
120
121static unused const char *regname64[32] = {
122 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
123 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
124 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
125 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
126};
127
128enum {
129 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
130 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
131};
132
133static unused const char *condname[16] = {
134 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
135 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
136};
137
138static void output_w32(u_int word)
139{
140 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
141 out += 4;
142}
143
144static u_int rn_rd(u_int rn, u_int rd)
145{
146 assert(rn < 31);
147 assert(rd < 31);
148 return (rn << 5) | rd;
149}
150
151static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
152{
153 assert(rm < 32);
154 assert(rn < 32);
155 assert(rd < 32);
156 return (rm << 16) | (rn << 5) | rd;
157}
158
159static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
160{
161 assert(ra < 32);
162 return rm_rn_rd(rm, rn, rd) | (ra << 10);
163}
164
165static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
166{
167 assert(imm7 < 0x80);
168 assert(rt2 < 31);
169 assert(rn < 32);
170 assert(rt < 31);
171 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
172}
173
174static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
175{
176 assert(imm6 <= 63);
177 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
178}
179
180static u_int imm16_rd(u_int imm16, u_int rd)
181{
182 assert(imm16 < 0x10000);
183 assert(rd < 31);
184 return (imm16 << 5) | rd;
185}
186
187static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
188{
189 assert(imm12 < 0x1000);
190 assert(rn < 32);
191 assert(rd < 32);
192 return (imm12 << 10) | (rn << 5) | rd;
193}
194
195static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
196{
197 assert(imm9 < 0x200);
198 assert(rn < 31);
199 assert(rd < 31);
200 return (imm9 << 12) | (rn << 5) | rd;
201}
202
203static u_int imm19_rt(u_int imm19, u_int rt)
204{
205 assert(imm19 < 0x80000);
206 assert(rt < 31);
207 return (imm19 << 5) | rt;
208}
209
210static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
211{
212 assert(n < 2);
213 assert(immr < 0x40);
214 assert(imms < 0x40);
215 assert(rn < 32);
216 assert(rd < 32);
217 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
218}
219
220static u_int genjmp(const u_char *addr)
221{
222 intptr_t offset = addr - out;
223 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
224 if (offset < -134217728 || offset > 134217727) {
225 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
226 abort();
227 return 0;
228 }
229 return ((u_int)offset >> 2) & 0x03ffffff;
230}
231
232static u_int genjmpcc(const u_char *addr)
233{
234 intptr_t offset = addr - out;
235 if ((uintptr_t)addr < 3) return 0;
236 if (offset < -1048576 || offset > 1048572) {
237 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
238 abort();
239 return 0;
240 }
241 return ((u_int)offset >> 2) & 0x7ffff;
242}
243
244static uint32_t is_mask(u_int value)
245{
246 return value && ((value + 1) & value) == 0;
247}
248
249// This function returns true if the argument contains a
250// non-empty sequence of ones (possibly rotated) with the remainder zero.
251static uint32_t is_rotated_mask(u_int value)
252{
253 if (value == 0 || value == ~0)
254 return 0;
255 if (is_mask((value - 1) | value))
256 return 1;
257 return is_mask((~value - 1) | ~value);
258}
259
260static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
261{
262 int lzeros, tzeros, ones;
263 assert(value != 0);
264 if (is_mask((value - 1) | value)) {
265 lzeros = __builtin_clz(value);
266 tzeros = __builtin_ctz(value);
267 ones = 32 - lzeros - tzeros;
268 *immr = (32 - tzeros) & 31;
269 *imms = ones - 1;
270 return;
271 }
272 value = ~value;
273 if (is_mask((value - 1) | value)) {
274 lzeros = __builtin_clz(value);
275 tzeros = __builtin_ctz(value);
276 ones = 32 - lzeros - tzeros;
277 *immr = lzeros;
278 *imms = 31 - ones;
279 return;
280 }
281 abort();
282}
283
284static void emit_mov(u_int rs, u_int rt)
285{
286 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
287 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
288}
289
290static void emit_mov64(u_int rs, u_int rt)
291{
292 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
293 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
294}
295
296static void emit_add(u_int rs1, u_int rs2, u_int rt)
297{
298 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
299 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
300}
301
302static void emit_adds(u_int rs1, u_int rs2, u_int rt)
303{
304 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
305 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
306}
307
308static void emit_add64(u_int rs1, u_int rs2, u_int rt)
309{
310 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
311 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
312}
313
314static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
315{
316 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
317 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
318}
319#define emit_adds_ptr emit_adds64
320
321static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
322{
323 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
324 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
325}
326
327static void emit_neg(u_int rs, u_int rt)
328{
329 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
330 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
331}
332
333static void emit_negs(u_int rs, u_int rt)
334{
335 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
337}
338
339static void emit_sub(u_int rs1, u_int rs2, u_int rt)
340{
341 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
342 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
343}
344
345static void emit_subs(u_int rs1, u_int rs2, u_int rt)
346{
347 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
348 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
349}
350
351static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
352{
353 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
354 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
355}
356
357static void emit_movz(u_int imm, u_int rt)
358{
359 assem_debug("movz %s,#%#x\n", regname[rt], imm);
360 output_w32(0x52800000 | imm16_rd(imm, rt));
361}
362
363static void emit_movz_lsl16(u_int imm, u_int rt)
364{
365 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
366 output_w32(0x52a00000 | imm16_rd(imm, rt));
367}
368
369static void emit_movn(u_int imm, u_int rt)
370{
371 assem_debug("movn %s,#%#x\n", regname[rt], imm);
372 output_w32(0x12800000 | imm16_rd(imm, rt));
373}
374
375static void emit_movn_lsl16(u_int imm,u_int rt)
376{
377 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
378 output_w32(0x12a00000 | imm16_rd(imm, rt));
379}
380
381static void emit_movk(u_int imm,u_int rt)
382{
383 assem_debug("movk %s,#%#x\n", regname[rt], imm);
384 output_w32(0x72800000 | imm16_rd(imm, rt));
385}
386
387static void emit_movk_lsl16(u_int imm,u_int rt)
388{
389 assert(imm<65536);
390 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
391 output_w32(0x72a00000 | imm16_rd(imm, rt));
392}
393
394static void emit_zeroreg(u_int rt)
395{
396 emit_movz(0, rt);
397}
398
399static void emit_movimm(u_int imm, u_int rt)
400{
401 if (imm < 65536)
402 emit_movz(imm, rt);
403 else if ((~imm) < 65536)
404 emit_movn(~imm, rt);
405 else if ((imm&0xffff) == 0)
406 emit_movz_lsl16(imm >> 16, rt);
407 else if (((~imm)&0xffff) == 0)
408 emit_movn_lsl16(~imm >> 16, rt);
409 else if (is_rotated_mask(imm)) {
410 u_int immr, imms;
411 gen_logical_imm(imm, &immr, &imms);
412 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
413 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
414 }
415 else {
416 emit_movz(imm & 0xffff, rt);
417 emit_movk_lsl16(imm >> 16, rt);
418 }
419}
420
421static void emit_movimm64(uint64_t imm, u_int rt)
422{
423 u_int shift, op, imm16, insns = 0;
424 for (shift = 0; shift < 4; shift++) {
425 imm16 = (imm >> shift * 16) & 0xffff;
426 if (!imm16)
427 continue;
428 op = insns ? 0xf2800000 : 0xd2800000;
429 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
430 if (shift)
431 assem_debug(",lsl #%u", shift * 16);
432 assem_debug("\n");
433 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
434 insns++;
435 }
436 if (!insns) {
437 assem_debug("movz %s,#0\n", regname64[rt]);
438 output_w32(0xd2800000 | imm16_rd(0, rt));
439 }
440}
441
442static void emit_readword(void *addr, u_int rt)
443{
444 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
445 if (!(offset & 3) && offset <= 16380) {
446 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
447 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
448 }
449 else
450 abort();
451}
452
453static void emit_readdword(void *addr, u_int rt)
454{
455 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
456 if (!(offset & 7) && offset <= 32760) {
457 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
458 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
459 }
460 else
461 abort();
462}
463#define emit_readptr emit_readdword
464
465static void emit_readshword(void *addr, u_int rt)
466{
467 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
468 if (!(offset & 1) && offset <= 8190) {
469 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
470 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
471 }
472 else
473 assert(0);
474}
475
476static void emit_loadreg(u_int r, u_int hr)
477{
478 int is64 = 0;
479 if (r == 0)
480 emit_zeroreg(hr);
481 else {
482 void *addr;
483 switch (r) {
484 //case HIREG: addr = &hi; break;
485 //case LOREG: addr = &lo; break;
486 case CCREG: addr = &cycle_count; break;
487 case INVCP: addr = &invc_ptr; is64 = 1; break;
488 case ROREG: addr = &ram_offset; is64 = 1; break;
489 default:
490 assert(r < 34);
491 addr = &psxRegs.GPR.r[r];
492 break;
493 }
494 if (is64)
495 emit_readdword(addr, hr);
496 else
497 emit_readword(addr, hr);
498 }
499}
500
501static void emit_writeword(u_int rt, void *addr)
502{
503 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
504 if (!(offset & 3) && offset <= 16380) {
505 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
506 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
507 }
508 else
509 assert(0);
510}
511
512static void emit_writedword(u_int rt, void *addr)
513{
514 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
515 if (!(offset & 7) && offset <= 32760) {
516 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
517 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
518 }
519 else
520 abort();
521}
522
523static void emit_storereg(u_int r, u_int hr)
524{
525 assert(r < 64);
526 void *addr = &psxRegs.GPR.r[r];
527 switch (r) {
528 //case HIREG: addr = &hi; break;
529 //case LOREG: addr = &lo; break;
530 case CCREG: addr = &cycle_count; break;
531 default: assert(r < 34); break;
532 }
533 emit_writeword(hr, addr);
534}
535
536static void emit_test(u_int rs, u_int rt)
537{
538 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
539 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
540}
541
542static void emit_testimm(u_int rs, u_int imm)
543{
544 u_int immr, imms;
545 assem_debug("tst %s,#%#x\n", regname[rs], imm);
546 assert(is_rotated_mask(imm)); // good enough for PCSX
547 gen_logical_imm(imm, &immr, &imms);
548 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
549}
550
551static void emit_not(u_int rs,u_int rt)
552{
553 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
554 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
555}
556
557static void emit_and(u_int rs1,u_int rs2,u_int rt)
558{
559 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
560 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
561}
562
563static void emit_or(u_int rs1,u_int rs2,u_int rt)
564{
565 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
566 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
567}
568
569static void emit_bic(u_int rs1,u_int rs2,u_int rt)
570{
571 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
572 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
573}
574
575static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
576{
577 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
578 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
579}
580
581static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
582{
583 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
584 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
585}
586
587static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
588{
589 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
590 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
591}
592
593static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
594{
595 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
596 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
597}
598
599static void emit_xor(u_int rs1,u_int rs2,u_int rt)
600{
601 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
602 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
603}
604
605static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
606{
607 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
608 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
609}
610
611static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
612{
613 unused const char *st = s ? "s" : "";
614 s = s ? 0x20000000 : 0;
615 is64 = is64 ? 0x80000000 : 0;
616 if (imm < 4096) {
617 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
618 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
619 }
620 else if (-imm < 4096) {
621 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
622 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
623 }
624 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
625 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
626 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
627 if (imm & 0xfff) {
628 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
629 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
630 }
631 }
632 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
633 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
634 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
635 if (-imm & 0xfff) {
636 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
637 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
638 }
639 }
640 else {
641 u_int tmp = rt;
642 assert(!is64);
643 if (rs == rt) {
644 host_tempreg_acquire();
645 tmp = HOST_TEMPREG;
646 }
647 emit_movimm(imm, tmp);
648 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
649 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
650 if (tmp == HOST_TEMPREG)
651 host_tempreg_release();
652 }
653}
654
655static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
656{
657 if (imm == 0) {
658 emit_mov(rs, rt);
659 return;
660 }
661 emit_addimm_s(0, 0, rs, imm, rt);
662}
663
664static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
665{
666 emit_addimm_s(0, 1, rs, imm, rt);
667}
668
669static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
670{
671 emit_addimm64(rs, imm, rt);
672}
673
674static void emit_addimm_and_set_flags(int imm, u_int rt)
675{
676 emit_addimm_s(1, 0, rt, imm, rt);
677}
678
679static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
680{
681 emit_addimm_s(1, 0, rs, imm, rt);
682}
683
684static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
685{
686 const char *names[] = { "and", "orr", "eor", "ands" };
687 const char *name = names[op];
688 u_int immr, imms;
689 op = op << 29;
690 if (is_rotated_mask(imm)) {
691 gen_logical_imm(imm, &immr, &imms);
692 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
693 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
694 }
695 else {
696 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
697 host_tempreg_acquire();
698 emit_movimm(imm, HOST_TEMPREG);
699 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
700 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
701 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
702 host_tempreg_release();
703 }
704 (void)name;
705}
706
707static void emit_andimm(u_int rs, u_int imm, u_int rt)
708{
709 if (imm == 0)
710 emit_zeroreg(rt);
711 else
712 emit_logicop_imm(0, rs, imm, rt);
713}
714
715static void emit_orimm(u_int rs, u_int imm, u_int rt)
716{
717 if (imm == 0) {
718 if (rs != rt)
719 emit_mov(rs, rt);
720 }
721 else
722 emit_logicop_imm(1, rs, imm, rt);
723}
724
725static void emit_xorimm(u_int rs, u_int imm, u_int rt)
726{
727 if (imm == 0) {
728 if (rs != rt)
729 emit_mov(rs, rt);
730 }
731 else
732 emit_logicop_imm(2, rs, imm, rt);
733}
734
735static void emit_sbfm(u_int rs,u_int imm,u_int rt)
736{
737 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
738 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
739}
740
741static void emit_ubfm(u_int rs,u_int imm,u_int rt)
742{
743 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
744 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
745}
746
747static void emit_shlimm(u_int rs,u_int imm,u_int rt)
748{
749 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
750 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
751}
752
753static void emit_shrimm(u_int rs,u_int imm,u_int rt)
754{
755 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
756 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
757}
758
759static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
760{
761 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
762 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
763}
764
765static void emit_sarimm(u_int rs,u_int imm,u_int rt)
766{
767 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
768 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
769}
770
771static void emit_rorimm(u_int rs,u_int imm,u_int rt)
772{
773 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
775}
776
777static void emit_signextend16(u_int rs, u_int rt)
778{
779 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
780 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
781}
782
783static void emit_shl(u_int rs,u_int rshift,u_int rt)
784{
785 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
786 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
787}
788
789static void emit_shr(u_int rs,u_int rshift,u_int rt)
790{
791 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
792 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
793}
794
795static void emit_sar(u_int rs,u_int rshift,u_int rt)
796{
797 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
798 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
799}
800
801static void emit_cmpimm(u_int rs, u_int imm)
802{
803 if (imm < 4096) {
804 assem_debug("cmp %s,%#x\n", regname[rs], imm);
805 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
806 }
807 else if (-imm < 4096) {
808 assem_debug("cmn %s,%#x\n", regname[rs], imm);
809 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
810 }
811 else if (imm < 16777216 && !(imm & 0xfff)) {
812 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
813 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
814 }
815 else {
816 host_tempreg_acquire();
817 emit_movimm(imm, HOST_TEMPREG);
818 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
819 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
820 host_tempreg_release();
821 }
822}
823
824static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
825{
826 assert(imm == 0 || imm == 1);
827 assert(cond0 < 0x10);
828 assert(cond1 < 0x10);
829 if (imm) {
830 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
831 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
832 } else {
833 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
834 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
835 }
836}
837
838static void emit_cmovne_imm(u_int imm,u_int rt)
839{
840 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
841}
842
843static void emit_cmovl_imm(u_int imm,u_int rt)
844{
845 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
846}
847
848static void emit_cmovb_imm(int imm,u_int rt)
849{
850 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
851}
852
853static void emit_cmoveq_reg(u_int rs,u_int rt)
854{
855 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
856 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
857}
858
859static void emit_cmovne_reg(u_int rs,u_int rt)
860{
861 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
862 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
863}
864
865static void emit_cmovl_reg(u_int rs,u_int rt)
866{
867 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
868 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
869}
870
871static void emit_cmovb_reg(u_int rs,u_int rt)
872{
873 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
874 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
875}
876
877static void emit_cmovs_reg(u_int rs,u_int rt)
878{
879 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
880 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
881}
882
883static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
884{
885 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
886 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
887}
888
889static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
890{
891 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
892 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
893}
894
895static void emit_slti32(u_int rs,int imm,u_int rt)
896{
897 if(rs!=rt) emit_zeroreg(rt);
898 emit_cmpimm(rs,imm);
899 if(rs==rt) emit_movimm(0,rt);
900 emit_cmovl_imm(1,rt);
901}
902
903static void emit_sltiu32(u_int rs,int imm,u_int rt)
904{
905 if(rs!=rt) emit_zeroreg(rt);
906 emit_cmpimm(rs,imm);
907 if(rs==rt) emit_movimm(0,rt);
908 emit_cmovb_imm(1,rt);
909}
910
911static void emit_cmp(u_int rs,u_int rt)
912{
913 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
914 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
915}
916
917static void emit_cmpcs(u_int rs,u_int rt)
918{
919 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
920 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
921}
922
923static void emit_set_gz32(u_int rs, u_int rt)
924{
925 //assem_debug("set_gz32\n");
926 emit_cmpimm(rs,1);
927 emit_movimm(1,rt);
928 emit_cmovl_imm(0,rt);
929}
930
931static void emit_set_nz32(u_int rs, u_int rt)
932{
933 //assem_debug("set_nz32\n");
934 if(rs!=rt) emit_mov(rs,rt);
935 emit_test(rs,rs);
936 emit_cmovne_imm(1,rt);
937}
938
939static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
940{
941 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
942 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
943 emit_cmp(rs1,rs2);
944 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
945 emit_cmovl_imm(1,rt);
946}
947
948static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
949{
950 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
951 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
952 emit_cmp(rs1,rs2);
953 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
954 emit_cmovb_imm(1,rt);
955}
956
957static int can_jump_or_call(const void *a)
958{
959 intptr_t diff = (u_char *)a - out;
960 return (-134217728 <= diff && diff <= 134217727);
961}
962
963static void emit_call(const void *a)
964{
965 intptr_t diff = (u_char *)a - out;
966 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
967 assert(!(diff & 3));
968 if (-134217728 <= diff && diff <= 134217727)
969 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
970 else
971 abort();
972}
973
974static void emit_jmp(const void *a)
975{
976 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
977 u_int offset = genjmp(a);
978 output_w32(0x14000000 | offset);
979}
980
981static void emit_jne(const void *a)
982{
983 assem_debug("bne %p\n", a);
984 u_int offset = genjmpcc(a);
985 output_w32(0x54000000 | (offset << 5) | COND_NE);
986}
987
988static void emit_jeq(const void *a)
989{
990 assem_debug("beq %p\n", a);
991 u_int offset = genjmpcc(a);
992 output_w32(0x54000000 | (offset << 5) | COND_EQ);
993}
994
995static void emit_js(const void *a)
996{
997 assem_debug("bmi %p\n", a);
998 u_int offset = genjmpcc(a);
999 output_w32(0x54000000 | (offset << 5) | COND_MI);
1000}
1001
1002static void emit_jns(const void *a)
1003{
1004 assem_debug("bpl %p\n", a);
1005 u_int offset = genjmpcc(a);
1006 output_w32(0x54000000 | (offset << 5) | COND_PL);
1007}
1008
1009static void emit_jl(const void *a)
1010{
1011 assem_debug("blt %p\n", a);
1012 u_int offset = genjmpcc(a);
1013 output_w32(0x54000000 | (offset << 5) | COND_LT);
1014}
1015
1016static void emit_jge(const void *a)
1017{
1018 assem_debug("bge %p\n", a);
1019 u_int offset = genjmpcc(a);
1020 output_w32(0x54000000 | (offset << 5) | COND_GE);
1021}
1022
1023static void emit_jo(const void *a)
1024{
1025 assem_debug("bvs %p\n", a);
1026 u_int offset = genjmpcc(a);
1027 output_w32(0x54000000 | (offset << 5) | COND_VS);
1028}
1029
1030static void emit_jno(const void *a)
1031{
1032 assem_debug("bvc %p\n", a);
1033 u_int offset = genjmpcc(a);
1034 output_w32(0x54000000 | (offset << 5) | COND_VC);
1035}
1036
1037static void emit_jc(const void *a)
1038{
1039 assem_debug("bcs %p\n", a);
1040 u_int offset = genjmpcc(a);
1041 output_w32(0x54000000 | (offset << 5) | COND_CS);
1042}
1043
1044static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
1045{
1046 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
1047 u_int offset = genjmpcc(a);
1048 is64 = is64 ? 0x80000000 : 0;
1049 isnz = isnz ? 0x01000000 : 0;
1050 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1051}
1052
1053static void *emit_cbz(u_int r, const void *a)
1054{
1055 void *ret = out;
1056 emit_cb(0, 0, a, r);
1057 return ret;
1058}
1059
1060static void emit_jmpreg(u_int r)
1061{
1062 assem_debug("br %s\n", regname64[r]);
1063 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1064}
1065
1066static void emit_retreg(u_int r)
1067{
1068 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1069 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1070}
1071
1072static void emit_ret(void)
1073{
1074 emit_retreg(LR);
1075}
1076
1077static void emit_adr(void *addr, u_int rt)
1078{
1079 intptr_t offset = (u_char *)addr - out;
1080 assert(-1048576 <= offset && offset < 1048576);
1081 assert(rt < 31);
1082 assem_debug("adr x%d,#%#lx\n", rt, offset);
1083 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1084}
1085
1086static void emit_adrp(void *addr, u_int rt)
1087{
1088 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1089 assert(-4294967296l <= offset && offset < 4294967296l);
1090 assert(rt < 31);
1091 offset >>= 12;
1092 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1093 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1094}
1095
1096static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1097{
1098 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1101}
1102
1103static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1104{
1105 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1106 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1107}
1108
1109static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1110{
1111 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1112 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1113}
1114
1115static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1116{
1117 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1118 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1119}
1120
1121static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1122{
1123 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1124 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1125}
1126#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1127
1128static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1129{
1130 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1131 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1132}
1133
1134static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1135{
1136 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1137 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1138}
1139
1140static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1141{
1142 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1143 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1144}
1145
1146static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1147{
1148 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1149 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1150}
1151
1152static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1153{
1154 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1155 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1156}
1157
1158static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1159{
1160 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1161 assert(-256 <= offset && offset < 256);
1162 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1163}
1164
1165static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1166{
1167 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1168 assert(-256 <= offset && offset < 256);
1169 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1170}
1171
1172static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1173{
1174 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1175 assert(-256 <= offset && offset < 256);
1176 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1177}
1178
1179static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1180{
1181 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1182 assert(-256 <= offset && offset < 256);
1183 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1184}
1185
1186static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1187{
1188 if (!(offset & 3) && (u_int)offset <= 16380) {
1189 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1190 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1191 }
1192 else if (-256 <= offset && offset < 256) {
1193 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1194 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1195 }
1196 else
1197 assert(0);
1198}
1199
1200static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1201{
1202 if (!(offset & 1) && (u_int)offset <= 8190) {
1203 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1204 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1205 }
1206 else if (-256 <= offset && offset < 256) {
1207 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1208 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1209 }
1210 else
1211 assert(0);
1212}
1213
1214static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1215{
1216 if ((u_int)offset < 4096) {
1217 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1218 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1219 }
1220 else if (-256 <= offset && offset < 256) {
1221 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1222 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1223 }
1224 else
1225 assert(0);
1226}
1227
1228static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1229{
1230 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1231 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1232}
1233
1234static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1235{
1236 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1237 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1238}
1239
1240static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1241{
1242 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1243 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1244}
1245
1246static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1247{
1248 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1249 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1250}
1251
1252static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1253{
1254 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1255 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1256}
1257
1258static void emit_clz(u_int rs, u_int rt)
1259{
1260 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1261 output_w32(0x5ac01000 | rn_rd(rs, rt));
1262}
1263
1264// special case for checking invalid_code
1265static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1266{
1267 emit_shrimm(r, 12, rt);
1268 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1269 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1270}
1271
1272// special for loadlr_assemble, rs2 is destroyed
1273static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1274{
1275 emit_shl(rs2, shift, rs2);
1276 emit_bic(rs1, rs2, rt);
1277}
1278
1279static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1280{
1281 emit_shr(rs2, shift, rs2);
1282 emit_bic(rs1, rs2, rt);
1283}
1284
1285static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1286{
1287 u_int op = 0xb9000000;
1288 unused const char *ldst = is_st ? "st" : "ld";
1289 unused char rp = is64 ? 'x' : 'w';
1290 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1291 is64 = is64 ? 1 : 0;
1292 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1293 ofs = (ofs >> (2+is64));
1294 if (!is_st) op |= 0x00400000;
1295 if (is64) op |= 0x40000000;
1296 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1297}
1298
1299static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1300{
1301 u_int op = 0x29000000;
1302 unused const char *ldst = is_st ? "st" : "ld";
1303 unused char rp = is64 ? 'x' : 'w';
1304 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1305 is64 = is64 ? 1 : 0;
1306 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1307 ofs = (ofs >> (2+is64));
1308 assert(-64 <= ofs && ofs <= 63);
1309 ofs &= 0x7f;
1310 if (!is_st) op |= 0x00400000;
1311 if (is64) op |= 0x80000000;
1312 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1313}
1314
1315static void save_load_regs_all(int is_store, u_int reglist)
1316{
1317 int ofs = 0, c = 0;
1318 u_int r, pair[2];
1319 for (r = 0; reglist; r++, reglist >>= 1) {
1320 if (reglist & 1)
1321 pair[c++] = r;
1322 if (c == 2) {
1323 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1324 ofs += 8 * 2;
1325 c = 0;
1326 }
1327 }
1328 if (c) {
1329 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1330 ofs += 8;
1331 }
1332 assert(ofs <= SSP_CALLER_REGS);
1333}
1334
1335// Save registers before function call
1336static void save_regs(u_int reglist)
1337{
1338 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1339 save_load_regs_all(1, reglist);
1340}
1341
1342// Restore registers after function call
1343static void restore_regs(u_int reglist)
1344{
1345 reglist &= CALLER_SAVE_REGS;
1346 save_load_regs_all(0, reglist);
1347}
1348
1349/* Stubs/epilogue */
1350
1351static void literal_pool(int n)
1352{
1353 (void)literals;
1354}
1355
1356static void literal_pool_jumpover(int n)
1357{
1358}
1359
1360// parsed by get_pointer, find_extjump_insn
1361static void emit_extjump(u_char *addr, u_int target)
1362{
1363 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1364
1365 emit_movz(target & 0xffff, 0);
1366 emit_movk_lsl16(target >> 16, 0);
1367
1368 // addr is in the current recompiled block (max 256k)
1369 // offset shouldn't exceed +/-1MB
1370 emit_adr(addr, 1);
1371 emit_far_jump(dyna_linker);
1372}
1373
1374static void check_extjump2(void *src)
1375{
1376 u_int *ptr = src;
1377 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1378 (void)ptr;
1379}
1380
1381// put rt_val into rt, potentially making use of rs with value rs_val
1382static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1383{
1384 int diff = rt_val - rs_val;
1385 if ((-4096 < diff && diff < 4096)
1386 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1387 emit_addimm(rs, diff, rt);
1388 else if (rt_val == ~rs_val)
1389 emit_not(rs, rt);
1390 else if (is_rotated_mask(rs_val ^ rt_val))
1391 emit_xorimm(rs, rs_val ^ rt_val, rt);
1392 else
1393 emit_movimm(rt_val, rt);
1394}
1395
1396// return 1 if the above function can do it's job cheaply
1397static int is_similar_value(u_int v1, u_int v2)
1398{
1399 int diff = v1 - v2;
1400 return (-4096 < diff && diff < 4096)
1401 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1402 || v1 == ~v2
1403 || is_rotated_mask(v1 ^ v2);
1404}
1405
1406static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1407{
1408 if (rt_val < 0x100000000ull) {
1409 emit_movimm_from(rs_val, rs, rt_val, rt);
1410 return;
1411 }
1412 // just move the whole thing. At least on Linux all addresses
1413 // seem to be 48bit, so 3 insns - not great not terrible
1414 emit_movimm64(rt_val, rt);
1415}
1416
1417// trashes x2
1418static void pass_args64(u_int a0, u_int a1)
1419{
1420 if(a0==1&&a1==0) {
1421 // must swap
1422 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1423 }
1424 else if(a0!=0&&a1==0) {
1425 emit_mov64(a1,1);
1426 if (a0>=0) emit_mov64(a0,0);
1427 }
1428 else {
1429 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1430 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1431 }
1432}
1433
1434static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1435{
1436 switch(type) {
1437 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1438 case LOADBU_STUB:
1439 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1440 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1441 case LOADHU_STUB:
1442 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1443 case LOADW_STUB:
1444 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1445 default: assert(0);
1446 }
1447}
1448
1449#include "pcsxmem.h"
1450//#include "pcsxmem_inline.c"
1451
1452static void do_readstub(int n)
1453{
1454 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1455 set_jump_target(stubs[n].addr, out);
1456 enum stub_type type = stubs[n].type;
1457 int i = stubs[n].a;
1458 int rs = stubs[n].b;
1459 const struct regstat *i_regs = (void *)stubs[n].c;
1460 u_int reglist = stubs[n].e;
1461 const signed char *i_regmap = i_regs->regmap;
1462 int rt;
1463 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1464 rt=get_reg(i_regmap,FTEMP);
1465 }else{
1466 rt=get_reg(i_regmap,dops[i].rt1);
1467 }
1468 assert(rs>=0);
1469 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1470 void *restore_jump = NULL, *handler_jump = NULL;
1471 reglist|=(1<<rs);
1472 for (r = 0; r < HOST_CCREG; r++) {
1473 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1474 temp = r;
1475 break;
1476 }
1477 }
1478 if(rt>=0&&dops[i].rt1!=0)
1479 reglist&=~(1<<rt);
1480 if(temp==-1) {
1481 save_regs(reglist);
1482 regs_saved=1;
1483 temp=(rs==0)?2:0;
1484 }
1485 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1486 temp2=1;
1487 emit_readdword(&mem_rtab,temp);
1488 emit_shrimm(rs,12,temp2);
1489 emit_readdword_dualindexedx8(temp,temp2,temp2);
1490 emit_adds64(temp2,temp2,temp2);
1491 handler_jump=out;
1492 emit_jc(0);
1493 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1494 switch(type) {
1495 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1496 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1497 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1498 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1499 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1500 default: assert(0);
1501 }
1502 }
1503 if(regs_saved) {
1504 restore_jump=out;
1505 emit_jmp(0); // jump to reg restore
1506 }
1507 else
1508 emit_jmp(stubs[n].retaddr); // return address
1509 set_jump_target(handler_jump, out);
1510
1511 if(!regs_saved)
1512 save_regs(reglist);
1513 void *handler=NULL;
1514 if(type==LOADB_STUB||type==LOADBU_STUB)
1515 handler=jump_handler_read8;
1516 if(type==LOADH_STUB||type==LOADHU_STUB)
1517 handler=jump_handler_read16;
1518 if(type==LOADW_STUB)
1519 handler=jump_handler_read32;
1520 assert(handler);
1521 pass_args64(rs,temp2);
1522 int cc=get_reg(i_regmap,CCREG);
1523 if(cc<0)
1524 emit_loadreg(CCREG,2);
1525 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1526 emit_far_call(handler);
1527 // (no cycle reload after read)
1528 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1529 loadstore_extend(type,0,rt);
1530 }
1531 if(restore_jump)
1532 set_jump_target(restore_jump, out);
1533 restore_regs(reglist);
1534 emit_jmp(stubs[n].retaddr);
1535}
1536
1537static void inline_readstub(enum stub_type type, int i, u_int addr,
1538 const signed char regmap[], int target, int adj, u_int reglist)
1539{
1540 int ra = cinfo[i].addr;
1541 int rt = get_reg(regmap, target);
1542 assert(ra >= 0);
1543 u_int is_dynamic=0;
1544 uintptr_t host_addr = 0;
1545 void *handler;
1546 int cc=get_reg(regmap,CCREG);
1547 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
1548 // return;
1549 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1550 if (handler == NULL) {
1551 if(rt<0||dops[i].rt1==0)
1552 return;
1553 if (addr != host_addr)
1554 emit_movimm_from64(addr, ra, host_addr, ra);
1555 switch(type) {
1556 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1557 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1558 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1559 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1560 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
1561 default: assert(0);
1562 }
1563 return;
1564 }
1565 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1566 if (is_dynamic) {
1567 if(type==LOADB_STUB||type==LOADBU_STUB)
1568 handler=jump_handler_read8;
1569 if(type==LOADH_STUB||type==LOADHU_STUB)
1570 handler=jump_handler_read16;
1571 if(type==LOADW_STUB)
1572 handler=jump_handler_read32;
1573 }
1574
1575 // call a memhandler
1576 if(rt>=0&&dops[i].rt1!=0)
1577 reglist&=~(1<<rt);
1578 save_regs(reglist);
1579 if(target==0)
1580 emit_movimm(addr,0);
1581 else if(ra!=0)
1582 emit_mov(ra,0);
1583 if(cc<0)
1584 emit_loadreg(CCREG,2);
1585 emit_addimm(cc<0?2:cc,adj,2);
1586 if(is_dynamic) {
1587 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1588 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1589 if (-4294967296l <= offset && offset < 4294967296l) {
1590 emit_adrp((void *)l1, 1);
1591 emit_addimm64(1, l1 & 0xfff, 1);
1592 }
1593 else
1594 emit_movimm64(l1, 1);
1595 }
1596 else
1597 emit_far_call(do_memhandler_pre);
1598
1599 emit_far_call(handler);
1600
1601 // (no cycle reload after read)
1602 if(rt>=0&&dops[i].rt1!=0)
1603 loadstore_extend(type, 0, rt);
1604 restore_regs(reglist);
1605}
1606
1607static void do_writestub(int n)
1608{
1609 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1610 set_jump_target(stubs[n].addr, out);
1611 enum stub_type type=stubs[n].type;
1612 int i=stubs[n].a;
1613 int rs=stubs[n].b;
1614 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1615 u_int reglist=stubs[n].e;
1616 signed char *i_regmap=i_regs->regmap;
1617 int rt,r;
1618 if(dops[i].itype==C2LS) {
1619 rt=get_reg(i_regmap,r=FTEMP);
1620 }else{
1621 rt=get_reg(i_regmap,r=dops[i].rs2);
1622 }
1623 assert(rs>=0);
1624 assert(rt>=0);
1625 int rtmp,temp=-1,temp2,regs_saved=0;
1626 void *restore_jump = NULL, *handler_jump = NULL;
1627 int reglist2=reglist|(1<<rs)|(1<<rt);
1628 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1629 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1630 temp = rtmp;
1631 break;
1632 }
1633 }
1634 if(temp==-1) {
1635 save_regs(reglist);
1636 regs_saved=1;
1637 for(rtmp=0;rtmp<=3;rtmp++)
1638 if(rtmp!=rs&&rtmp!=rt)
1639 {temp=rtmp;break;}
1640 }
1641 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1642 temp2=3;
1643 else {
1644 host_tempreg_acquire();
1645 temp2=HOST_TEMPREG;
1646 }
1647 emit_readdword(&mem_wtab,temp);
1648 emit_shrimm(rs,12,temp2);
1649 emit_readdword_dualindexedx8(temp,temp2,temp2);
1650 emit_adds64(temp2,temp2,temp2);
1651 handler_jump=out;
1652 emit_jc(0);
1653 switch(type) {
1654 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1655 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1656 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1657 default: assert(0);
1658 }
1659 if(regs_saved) {
1660 restore_jump=out;
1661 emit_jmp(0); // jump to reg restore
1662 }
1663 else
1664 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1665 set_jump_target(handler_jump, out);
1666
1667 if(!regs_saved)
1668 save_regs(reglist);
1669 void *handler=NULL;
1670 switch(type) {
1671 case STOREB_STUB: handler=jump_handler_write8; break;
1672 case STOREH_STUB: handler=jump_handler_write16; break;
1673 case STOREW_STUB: handler=jump_handler_write32; break;
1674 default: assert(0);
1675 }
1676 assert(handler);
1677 pass_args(rs,rt);
1678 if(temp2!=3) {
1679 emit_mov64(temp2,3);
1680 host_tempreg_release();
1681 }
1682 int cc=get_reg(i_regmap,CCREG);
1683 if(cc<0)
1684 emit_loadreg(CCREG,2);
1685 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1686 // returns new cycle_count
1687 emit_far_call(handler);
1688 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1689 if(cc<0)
1690 emit_storereg(CCREG,2);
1691 if(restore_jump)
1692 set_jump_target(restore_jump, out);
1693 restore_regs(reglist);
1694 emit_jmp(stubs[n].retaddr);
1695}
1696
1697static void inline_writestub(enum stub_type type, int i, u_int addr,
1698 const signed char regmap[], int target, int adj, u_int reglist)
1699{
1700 int ra = cinfo[i].addr;
1701 int rt = get_reg(regmap,target);
1702 assert(ra >= 0);
1703 assert(rt >= 0);
1704 uintptr_t host_addr = 0;
1705 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1706 if (handler == NULL) {
1707 if (addr != host_addr)
1708 emit_movimm_from64(addr, ra, host_addr, ra);
1709 switch (type) {
1710 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1711 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1712 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
1713 default: assert(0);
1714 }
1715 return;
1716 }
1717
1718 // call a memhandler
1719 save_regs(reglist);
1720 emit_writeword(ra, &address); // some handlers still need it
1721 loadstore_extend(type, rt, 0);
1722 int cc, cc_use;
1723 cc = cc_use = get_reg(regmap, CCREG);
1724 if (cc < 0)
1725 emit_loadreg(CCREG, (cc_use = 2));
1726 emit_addimm(cc_use, adj, 2);
1727
1728 emit_far_call(do_memhandler_pre);
1729 emit_far_call(handler);
1730 emit_far_call(do_memhandler_post);
1731 emit_addimm(0, -adj, cc_use);
1732 if (cc < 0)
1733 emit_storereg(CCREG, cc_use);
1734 restore_regs(reglist);
1735}
1736
1737/* Special assem */
1738
1739static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1740{
1741 save_load_regs_all(1, reglist);
1742 cop2_do_stall_check(op, i, i_regs, 0);
1743#ifdef PCNT
1744 emit_movimm(op, 0);
1745 emit_far_call(pcnt_gte_start);
1746#endif
1747 // pointer to cop2 regs
1748 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1749}
1750
1751static void c2op_epilogue(u_int op,u_int reglist)
1752{
1753#ifdef PCNT
1754 emit_movimm(op, 0);
1755 emit_far_call(pcnt_gte_end);
1756#endif
1757 save_load_regs_all(0, reglist);
1758}
1759
1760static void c2op_assemble(int i, const struct regstat *i_regs)
1761{
1762 u_int c2op=source[i]&0x3f;
1763 u_int hr,reglist_full=0,reglist;
1764 int need_flags,need_ir;
1765 for(hr=0;hr<HOST_REGS;hr++) {
1766 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1767 }
1768 reglist=reglist_full&CALLER_SAVE_REGS;
1769
1770 if (gte_handlers[c2op]!=NULL) {
1771 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1772 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1773 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1774 source[i],gte_unneeded[i+1],need_flags,need_ir);
1775 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1776 need_flags=0;
1777 //int shift = (source[i] >> 19) & 1;
1778 //int lm = (source[i] >> 10) & 1;
1779 switch(c2op) {
1780 default:
1781 (void)need_ir;
1782 c2op_prologue(c2op, i, i_regs, reglist);
1783 emit_movimm(source[i],1); // opcode
1784 emit_writeword(1,&psxRegs.code);
1785 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1786 break;
1787 }
1788 c2op_epilogue(c2op,reglist);
1789 }
1790}
1791
1792static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1793{
1794 //value = value & 0x7ffff000;
1795 //if (value & 0x7f87e000) value |= 0x80000000;
1796 emit_andimm(sl, 0x7fffe000, temp);
1797 emit_testimm(temp, 0xff87ffff);
1798 emit_andimm(sl, 0x7ffff000, temp);
1799 host_tempreg_acquire();
1800 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1801 emit_cmovne_reg(HOST_TEMPREG, temp);
1802 host_tempreg_release();
1803 assert(0); // testing needed
1804}
1805
1806static void do_mfc2_31_one(u_int copr,signed char temp)
1807{
1808 emit_readshword(&reg_cop2d[copr],temp);
1809 emit_bicsar_imm(temp,31,temp);
1810 emit_cmpimm(temp,0xf80);
1811 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1812 emit_andimm(temp,0xf80,temp);
1813}
1814
1815static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1816{
1817 if (temp < 0) {
1818 host_tempreg_acquire();
1819 temp = HOST_TEMPREG;
1820 }
1821 do_mfc2_31_one(9,temp);
1822 emit_shrimm(temp,7,tl);
1823 do_mfc2_31_one(10,temp);
1824 emit_orrshr_imm(temp,2,tl);
1825 do_mfc2_31_one(11,temp);
1826 emit_orrshl_imm(temp,3,tl);
1827 emit_writeword(tl,&reg_cop2d[29]);
1828
1829 if (temp == HOST_TEMPREG)
1830 host_tempreg_release();
1831}
1832
1833static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1834{
1835 // case 0x18: MULT
1836 // case 0x19: MULTU
1837 // case 0x1A: DIV
1838 // case 0x1B: DIVU
1839 if(dops[i].rs1&&dops[i].rs2)
1840 {
1841 switch(dops[i].opcode2)
1842 {
1843 case 0x18: // MULT
1844 case 0x19: // MULTU
1845 {
1846 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1847 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1848 signed char hi=get_reg(i_regs->regmap,HIREG);
1849 signed char lo=get_reg(i_regs->regmap,LOREG);
1850 assert(m1>=0);
1851 assert(m2>=0);
1852 assert(hi>=0);
1853 assert(lo>=0);
1854
1855 if(dops[i].opcode2==0x18) // MULT
1856 emit_smull(m1,m2,hi);
1857 else // MULTU
1858 emit_umull(m1,m2,hi);
1859
1860 emit_mov(hi,lo);
1861 emit_shrimm64(hi,32,hi);
1862 break;
1863 }
1864 case 0x1A: // DIV
1865 case 0x1B: // DIVU
1866 {
1867 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1868 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1869 signed char quotient=get_reg(i_regs->regmap,LOREG);
1870 signed char remainder=get_reg(i_regs->regmap,HIREG);
1871 assert(numerator>=0);
1872 assert(denominator>=0);
1873 assert(quotient>=0);
1874 assert(remainder>=0);
1875
1876 if (dops[i].opcode2 == 0x1A) // DIV
1877 emit_sdiv(numerator,denominator,quotient);
1878 else // DIVU
1879 emit_udiv(numerator,denominator,quotient);
1880 emit_msub(quotient,denominator,numerator,remainder);
1881
1882 // div 0 quotient (remainder is already correct)
1883 host_tempreg_acquire();
1884 if (dops[i].opcode2 == 0x1A) { // DIV
1885 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1886 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1887 }
1888 else
1889 emit_movimm(~0,HOST_TEMPREG);
1890 emit_test(denominator,denominator);
1891 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1892 host_tempreg_release();
1893 break;
1894 }
1895 default:
1896 assert(0);
1897 }
1898 }
1899 else
1900 {
1901 signed char hr=get_reg(i_regs->regmap,HIREG);
1902 signed char lr=get_reg(i_regs->regmap,LOREG);
1903 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1904 {
1905 if (dops[i].rs1) {
1906 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1907 assert(numerator >= 0);
1908 if (hr >= 0)
1909 emit_mov(numerator,hr);
1910 if (lr >= 0) {
1911 if (dops[i].opcode2 == 0x1A) { // DIV
1912 emit_add_lsrimm(WZR,numerator,31,lr);
1913 emit_orn_asrimm(lr,numerator,31,lr);
1914 }
1915 else
1916 emit_movimm(~0,lr);
1917 }
1918 }
1919 else {
1920 if (hr >= 0) emit_zeroreg(hr);
1921 if (lr >= 0) emit_movimm(~0,lr);
1922 }
1923 }
1924 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1925 {
1926 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1927 assert(denominator >= 0);
1928 if (hr >= 0) emit_zeroreg(hr);
1929 if (lr >= 0) {
1930 emit_zeroreg(lr);
1931 emit_test(denominator, denominator);
1932 emit_csinvne_reg(lr, lr, lr);
1933 }
1934 }
1935 else
1936 {
1937 // Multiply by zero is zero.
1938 if (hr >= 0) emit_zeroreg(hr);
1939 if (lr >= 0) emit_zeroreg(lr);
1940 }
1941 }
1942}
1943#define multdiv_assemble multdiv_assemble_arm64
1944
1945static void do_jump_vaddr(u_int rs)
1946{
1947 if (rs != 0)
1948 emit_mov(rs, 0);
1949 emit_far_call(ndrc_get_addr_ht);
1950 emit_jmpreg(0);
1951}
1952
1953static void do_preload_rhash(u_int r) {
1954 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1955 // register. On ARM the hash can be done with a single instruction (below)
1956}
1957
1958static void do_preload_rhtbl(u_int ht) {
1959 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1960}
1961
1962static void do_rhash(u_int rs,u_int rh) {
1963 emit_andimm(rs, 0xf8, rh);
1964}
1965
1966static void do_miniht_load(int ht, u_int rh) {
1967 emit_add64(ht, rh, ht);
1968 emit_ldst(0, 0, rh, ht, 0);
1969}
1970
1971static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1972 emit_cmp(rh, rs);
1973 void *jaddr = out;
1974 emit_jeq(0);
1975 do_jump_vaddr(rs);
1976
1977 set_jump_target(jaddr, out);
1978 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1979 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1980 emit_jmpreg(ht);
1981}
1982
1983// parsed by set_jump_target?
1984static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1985 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1986 emit_movk(return_address&0xffff,rt);
1987 add_to_linker(out,return_address,1);
1988 emit_adr(out,temp);
1989 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1990 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1991}
1992
1993static unused void clear_cache_arm64(char *start, char *end)
1994{
1995 // Don't rely on GCC's __clear_cache implementation, as it caches
1996 // icache/dcache cache line sizes, that can vary between cores on
1997 // big.LITTLE architectures.
1998 uint64_t addr, ctr_el0;
1999 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2000 size_t isize, dsize;
2001
2002 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2003 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2004 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2005
2006 // use the global minimum cache line size
2007 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2008 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2009
2010 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2011 not required for instruction to data coherence. */
2012 if ((ctr_el0 & (1 << 28)) == 0x0) {
2013 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2014 for (; addr < (uint64_t)end; addr += dsize)
2015 // use "civac" instead of "cvau", as this is the suggested workaround for
2016 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2017 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2018 }
2019 __asm__ volatile("dsb ish" : : : "memory");
2020
2021 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2022 Unification is not required for instruction to data coherence. */
2023 if ((ctr_el0 & (1 << 29)) == 0x0) {
2024 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2025 for (; addr < (uint64_t)end; addr += isize)
2026 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2027
2028 __asm__ volatile("dsb ish" : : : "memory");
2029 }
2030
2031 __asm__ volatile("isb" : : : "memory");
2032}
2033
2034// CPU-architecture-specific initialization
2035static void arch_init(void)
2036{
2037 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2038 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2039 size_t i;
2040 assert(!(diff & 3));
2041 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2042 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2043 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2044 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2045 }
2046 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2047}
2048
2049// vim:shiftwidth=2:expandtab