spu: rvb volume is signed
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26/* Linker */
27static void set_jump_target(void *addr, void *target)
28{
29 u_int *ptr = NDRC_WRITE_OFFSET(addr);
30 intptr_t offset = (u_char *)target - (u_char *)addr;
31
32 if ((*ptr&0xFC000000) == 0x14000000) { // b
33 assert(offset>=-134217728LL&&offset<134217728LL);
34 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
35 }
36 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
37 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
38 // Conditional branch are limited to +/- 1MB
39 // block max size is 256k so branching beyond the +/- 1MB limit
40 // should only happen when jumping to an already compiled block (see add_jump_out)
41 // a workaround would be to do a trampoline jump via a stub at the end of the block
42 assert(-1048576 <= offset && offset < 1048576);
43 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
44 }
45 else if((*ptr&0x9f000000)==0x10000000) { // adr
46 // generated by do_miniht_insert
47 assert(offset>=-1048576LL&&offset<1048576LL);
48 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
49 }
50 else
51 abort(); // should not happen
52}
53
54// from a pointer to external jump stub (which was produced by emit_extjump2)
55// find where the jumping insn is
56static void *find_extjump_insn(void *stub)
57{
58 int *ptr = (int *)stub + 2;
59 assert((*ptr&0x9f000000) == 0x10000000); // adr
60 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
61 return ptr + offset / 4;
62}
63
64#if 0
65// find where external branch is liked to using addr of it's stub:
66// get address that the stub loads (dyna_linker arg1),
67// treat it as a pointer to branch insn,
68// return addr where that branch jumps to
69static void *get_pointer(void *stub)
70{
71 int *i_ptr = find_extjump_insn(stub);
72 if ((*i_ptr&0xfc000000) == 0x14000000) // b
73 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
74 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
75 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
76 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
77 assert(0);
78 return NULL;
79}
80#endif
81
82// Allocate a specific ARM register.
83static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
84{
85 int n;
86 int dirty=0;
87
88 // see if it's already allocated (and dealloc it)
89 for(n=0;n<HOST_REGS;n++)
90 {
91 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
92 dirty=(cur->dirty>>n)&1;
93 cur->regmap[n]=-1;
94 }
95 }
96
97 cur->regmap[hr]=reg;
98 cur->dirty&=~(1<<hr);
99 cur->dirty|=dirty<<hr;
100 cur->isconst&=~(1<<hr);
101}
102
103// Alloc cycle count into dedicated register
104static void alloc_cc(struct regstat *cur, int i)
105{
106 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
107}
108
109static void alloc_cc_optional(struct regstat *cur, int i)
110{
111 if (cur->regmap[HOST_CCREG] < 0) {
112 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
113 cur->noevict &= ~(1u << HOST_CCREG);
114 }
115}
116
117/* Special alloc */
118
119
120/* Assembler */
121
122static unused const char *regname[32] = {
123 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
124 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
125 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
126 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
127};
128
129static unused const char *regname64[32] = {
130 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
131 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
132 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
133 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
134};
135
136enum {
137 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
138 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
139};
140
141static unused const char *condname[16] = {
142 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
143 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
144};
145
146static void output_w32(u_int word)
147{
148 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
149 out += 4;
150}
151
152static u_int rn_rd(u_int rn, u_int rd)
153{
154 assert(rn < 31);
155 assert(rd < 31);
156 return (rn << 5) | rd;
157}
158
159static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
160{
161 assert(rm < 32);
162 assert(rn < 32);
163 assert(rd < 32);
164 return (rm << 16) | (rn << 5) | rd;
165}
166
167static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
168{
169 assert(ra < 32);
170 return rm_rn_rd(rm, rn, rd) | (ra << 10);
171}
172
173static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
174{
175 assert(imm7 < 0x80);
176 assert(rt2 < 31);
177 assert(rn < 32);
178 assert(rt < 31);
179 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
180}
181
182static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
183{
184 assert(imm6 <= 63);
185 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
186}
187
188static u_int imm16_rd(u_int imm16, u_int rd)
189{
190 assert(imm16 < 0x10000);
191 assert(rd < 31);
192 return (imm16 << 5) | rd;
193}
194
195static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
196{
197 assert(imm12 < 0x1000);
198 assert(rn < 32);
199 assert(rd < 32);
200 return (imm12 << 10) | (rn << 5) | rd;
201}
202
203static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
204{
205 assert(imm9 < 0x200);
206 assert(rn < 31);
207 assert(rd < 31);
208 return (imm9 << 12) | (rn << 5) | rd;
209}
210
211static u_int imm19_rt(u_int imm19, u_int rt)
212{
213 assert(imm19 < 0x80000);
214 assert(rt < 31);
215 return (imm19 << 5) | rt;
216}
217
218static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
219{
220 assert(n < 2);
221 assert(immr < 0x40);
222 assert(imms < 0x40);
223 assert(rn < 32);
224 assert(rd < 32);
225 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
226}
227
228static u_int genjmp(const u_char *addr)
229{
230 intptr_t offset = addr - out;
231 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
232 if (offset < -134217728 || offset > 134217727) {
233 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
234 abort();
235 return 0;
236 }
237 return ((u_int)offset >> 2) & 0x03ffffff;
238}
239
240static u_int genjmpcc(const u_char *addr)
241{
242 intptr_t offset = addr - out;
243 if ((uintptr_t)addr < 3) return 0;
244 if (offset < -1048576 || offset > 1048572) {
245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
247 return 0;
248 }
249 return ((u_int)offset >> 2) & 0x7ffff;
250}
251
252static uint32_t is_mask(u_int value)
253{
254 return value && ((value + 1) & value) == 0;
255}
256
257// This function returns true if the argument contains a
258// non-empty sequence of ones (possibly rotated) with the remainder zero.
259static uint32_t is_rotated_mask(u_int value)
260{
261 if (value == 0 || value == ~0)
262 return 0;
263 if (is_mask((value - 1) | value))
264 return 1;
265 return is_mask((~value - 1) | ~value);
266}
267
268static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
269{
270 int lzeros, tzeros, ones;
271 assert(value != 0);
272 if (is_mask((value - 1) | value)) {
273 lzeros = __builtin_clz(value);
274 tzeros = __builtin_ctz(value);
275 ones = 32 - lzeros - tzeros;
276 *immr = (32 - tzeros) & 31;
277 *imms = ones - 1;
278 return;
279 }
280 value = ~value;
281 if (is_mask((value - 1) | value)) {
282 lzeros = __builtin_clz(value);
283 tzeros = __builtin_ctz(value);
284 ones = 32 - lzeros - tzeros;
285 *immr = lzeros;
286 *imms = 31 - ones;
287 return;
288 }
289 abort();
290}
291
292static void emit_mov(u_int rs, u_int rt)
293{
294 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
295 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
296}
297
298static void emit_mov64(u_int rs, u_int rt)
299{
300 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
301 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
302}
303
304static void emit_add(u_int rs1, u_int rs2, u_int rt)
305{
306 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
307 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
308}
309
310static void emit_adds(u_int rs1, u_int rs2, u_int rt)
311{
312 assem_debug("adds %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
313 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
314}
315
316static void emit_add64(u_int rs1, u_int rs2, u_int rt)
317{
318 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
319 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
320}
321
322static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
323{
324 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
325 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
326}
327#define emit_adds_ptr emit_adds64
328
329static void emit_add_lsrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
330{
331 assem_debug("add %s,%s,%s,lsr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
332 output_w32(0x0b400000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
333}
334
335static void emit_neg(u_int rs, u_int rt)
336{
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
339}
340
341static void emit_negs(u_int rs, u_int rt)
342{
343 assem_debug("negs %s,%s\n",regname[rt],regname[rs]);
344 output_w32(0x6b000000 | rm_rn_rd(rs, WZR, rt));
345}
346
347static void emit_sub(u_int rs1, u_int rs2, u_int rt)
348{
349 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
350 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
351}
352
353static void emit_subs(u_int rs1, u_int rs2, u_int rt)
354{
355 assem_debug("subs %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
356 output_w32(0x6b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
357}
358
359static unused void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
360{
361 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
362 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
363}
364
365static void emit_movz(u_int imm, u_int rt)
366{
367 assem_debug("movz %s,#%#x\n", regname[rt], imm);
368 output_w32(0x52800000 | imm16_rd(imm, rt));
369}
370
371static void emit_movz_lsl16(u_int imm, u_int rt)
372{
373 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x52a00000 | imm16_rd(imm, rt));
375}
376
377static void emit_movn(u_int imm, u_int rt)
378{
379 assem_debug("movn %s,#%#x\n", regname[rt], imm);
380 output_w32(0x12800000 | imm16_rd(imm, rt));
381}
382
383static void emit_movn_lsl16(u_int imm,u_int rt)
384{
385 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
386 output_w32(0x12a00000 | imm16_rd(imm, rt));
387}
388
389static void emit_movk(u_int imm,u_int rt)
390{
391 assem_debug("movk %s,#%#x\n", regname[rt], imm);
392 output_w32(0x72800000 | imm16_rd(imm, rt));
393}
394
395static void emit_movk_lsl16(u_int imm,u_int rt)
396{
397 assert(imm<65536);
398 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
399 output_w32(0x72a00000 | imm16_rd(imm, rt));
400}
401
402static void emit_zeroreg(u_int rt)
403{
404 emit_movz(0, rt);
405}
406
407static void emit_movimm(u_int imm, u_int rt)
408{
409 if (imm < 65536)
410 emit_movz(imm, rt);
411 else if ((~imm) < 65536)
412 emit_movn(~imm, rt);
413 else if ((imm&0xffff) == 0)
414 emit_movz_lsl16(imm >> 16, rt);
415 else if (((~imm)&0xffff) == 0)
416 emit_movn_lsl16(~imm >> 16, rt);
417 else if (is_rotated_mask(imm)) {
418 u_int immr, imms;
419 gen_logical_imm(imm, &immr, &imms);
420 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
421 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
422 }
423 else {
424 emit_movz(imm & 0xffff, rt);
425 emit_movk_lsl16(imm >> 16, rt);
426 }
427}
428
429static void emit_movimm64(uint64_t imm, u_int rt)
430{
431 u_int shift, op, imm16, insns = 0;
432 for (shift = 0; shift < 4; shift++) {
433 imm16 = (imm >> shift * 16) & 0xffff;
434 if (!imm16)
435 continue;
436 op = insns ? 0xf2800000 : 0xd2800000;
437 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
438 if (shift)
439 assem_debug(",lsl #%u", shift * 16);
440 assem_debug("\n");
441 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
442 insns++;
443 }
444 if (!insns) {
445 assem_debug("movz %s,#0\n", regname64[rt]);
446 output_w32(0xd2800000 | imm16_rd(0, rt));
447 }
448}
449
450static void emit_readword(void *addr, u_int rt)
451{
452 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
453 if (!(offset & 3) && offset <= 16380) {
454 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
455 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
456 }
457 else
458 abort();
459}
460
461static void emit_readdword(void *addr, u_int rt)
462{
463 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
464 if (!(offset & 7) && offset <= 32760) {
465 assem_debug("ldr %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
466 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
467 }
468 else
469 abort();
470}
471#define emit_readptr emit_readdword
472
473static void emit_readshword(void *addr, u_int rt)
474{
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 1) && offset <= 8190) {
477 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
478 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
479 }
480 else
481 assert(0);
482}
483
484static void emit_loadreg(u_int r, u_int hr)
485{
486 int is64 = 0;
487 if (r == 0)
488 emit_zeroreg(hr);
489 else {
490 void *addr;
491 switch (r) {
492 //case HIREG: addr = &hi; break;
493 //case LOREG: addr = &lo; break;
494 case CCREG: addr = &cycle_count; break;
495 case INVCP: addr = &invc_ptr; is64 = 1; break;
496 case ROREG: addr = &ram_offset; is64 = 1; break;
497 default:
498 assert(r < 34);
499 addr = &psxRegs.GPR.r[r];
500 break;
501 }
502 if (is64)
503 emit_readdword(addr, hr);
504 else
505 emit_readword(addr, hr);
506 }
507}
508
509static void emit_writeword(u_int rt, void *addr)
510{
511 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
512 if (!(offset & 3) && offset <= 16380) {
513 assem_debug("str %s,[x%d+%#lx]%s\n", regname[rt], FP, offset, fpofs_name(offset));
514 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
515 }
516 else
517 assert(0);
518}
519
520static void emit_writedword(u_int rt, void *addr)
521{
522 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
523 if (!(offset & 7) && offset <= 32760) {
524 assem_debug("str %s,[x%d+%#lx]%s\n", regname64[rt], FP, offset, fpofs_name(offset));
525 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
526 }
527 else
528 abort();
529}
530
531static void emit_storereg(u_int r, u_int hr)
532{
533 assert(r < 64);
534 void *addr = &psxRegs.GPR.r[r];
535 switch (r) {
536 //case HIREG: addr = &hi; break;
537 //case LOREG: addr = &lo; break;
538 case CCREG: addr = &cycle_count; break;
539 default: assert(r < 34); break;
540 }
541 emit_writeword(hr, addr);
542}
543
544static void emit_test(u_int rs, u_int rt)
545{
546 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
547 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
548}
549
550static void emit_testimm(u_int rs, u_int imm)
551{
552 u_int immr, imms;
553 assem_debug("tst %s,#%#x\n", regname[rs], imm);
554 assert(is_rotated_mask(imm)); // good enough for PCSX
555 gen_logical_imm(imm, &immr, &imms);
556 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
557}
558
559static void emit_not(u_int rs,u_int rt)
560{
561 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
562 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
563}
564
565static void emit_and(u_int rs1,u_int rs2,u_int rt)
566{
567 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
568 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
569}
570
571static void emit_or(u_int rs1,u_int rs2,u_int rt)
572{
573 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
574 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
575}
576
577static void emit_bic(u_int rs1,u_int rs2,u_int rt)
578{
579 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
580 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
581}
582
583static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
584{
585 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
586 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
587}
588
589static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
590{
591 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
592 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
593}
594
595static void emit_orn_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
596{
597 assem_debug("orn %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
598 output_w32(0x2aa00000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
599}
600
601static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
602{
603 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
604 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
605}
606
607static void emit_xor(u_int rs1,u_int rs2,u_int rt)
608{
609 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
610 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
611}
612
613static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
614{
615 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
616 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
617}
618
619static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
620{
621 unused const char *st = s ? "s" : "";
622 s = s ? 0x20000000 : 0;
623 is64 = is64 ? 0x80000000 : 0;
624 if (imm < 4096) {
625 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
626 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
627 }
628 else if (-imm < 4096) {
629 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
630 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
631 }
632 else if (imm < 16777216 && (!(imm & 0xfff) || !s)) {
633 assem_debug("add%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], imm&0xfff000);
634 output_w32(0x11400000 | is64 | s | imm12_rn_rd(imm >> 12, rs, rt));
635 if (imm & 0xfff) {
636 assem_debug("add %s,%s,#%#lx\n", regname[rt], regname[rt], imm&0xfff);
637 output_w32(0x11000000 | is64 | imm12_rn_rd(imm & 0xfff, rt, rt));
638 }
639 }
640 else if (-imm < 16777216 && (!(-imm & 0xfff) || !s)) {
641 assem_debug("sub%s %s,%s,#%#lx\n", st, regname[rt], regname[rs], -imm&0xfff000);
642 output_w32(0x51400000 | is64 | s | imm12_rn_rd(-imm >> 12, rs, rt));
643 if (-imm & 0xfff) {
644 assem_debug("sub %s,%s,#%#lx\n", regname[rt], regname[rt], -imm&0xfff);
645 output_w32(0x51000000 | is64 | imm12_rn_rd(-imm & 0xfff, rt, rt));
646 }
647 }
648 else {
649 u_int tmp = rt;
650 assert(!is64);
651 if (rs == rt) {
652 host_tempreg_acquire();
653 tmp = HOST_TEMPREG;
654 }
655 emit_movimm(imm, tmp);
656 assem_debug("add%s %s,%s,%s\n", st, regname[rt], regname[rs], regname[tmp]);
657 output_w32(0x0b000000 | s | rm_rn_rd(rs, tmp, rt));
658 if (tmp == HOST_TEMPREG)
659 host_tempreg_release();
660 }
661}
662
663static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
664{
665 if (imm == 0) {
666 emit_mov(rs, rt);
667 return;
668 }
669 emit_addimm_s(0, 0, rs, imm, rt);
670}
671
672static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
673{
674 emit_addimm_s(0, 1, rs, imm, rt);
675}
676
677static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
678{
679 emit_addimm64(rs, imm, rt);
680}
681
682static void emit_addimm_and_set_flags(int imm, u_int rt)
683{
684 emit_addimm_s(1, 0, rt, imm, rt);
685}
686
687static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
688{
689 emit_addimm_s(1, 0, rs, imm, rt);
690}
691
692static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
693{
694 const char *names[] = { "and", "orr", "eor", "ands" };
695 const char *name = names[op];
696 u_int immr, imms;
697 op = op << 29;
698 if (is_rotated_mask(imm)) {
699 gen_logical_imm(imm, &immr, &imms);
700 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
701 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
702 }
703 else {
704 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
705 host_tempreg_acquire();
706 emit_movimm(imm, HOST_TEMPREG);
707 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
708 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
709 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
710 host_tempreg_release();
711 }
712 (void)name;
713}
714
715static void emit_andimm(u_int rs, u_int imm, u_int rt)
716{
717 if (imm == 0)
718 emit_zeroreg(rt);
719 else
720 emit_logicop_imm(0, rs, imm, rt);
721}
722
723static void emit_orimm(u_int rs, u_int imm, u_int rt)
724{
725 if (imm == 0) {
726 if (rs != rt)
727 emit_mov(rs, rt);
728 }
729 else
730 emit_logicop_imm(1, rs, imm, rt);
731}
732
733static void emit_xorimm(u_int rs, u_int imm, u_int rt)
734{
735 if (imm == 0) {
736 if (rs != rt)
737 emit_mov(rs, rt);
738 }
739 else
740 emit_logicop_imm(2, rs, imm, rt);
741}
742
743static void emit_sbfm(u_int rs,u_int imm,u_int rt)
744{
745 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
746 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
747}
748
749static void emit_ubfm(u_int rs,u_int imm,u_int rt)
750{
751 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
752 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
753}
754
755static void emit_shlimm(u_int rs,u_int imm,u_int rt)
756{
757 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
758 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
759}
760
761static void emit_shrimm(u_int rs,u_int imm,u_int rt)
762{
763 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
764 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
765}
766
767static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
768{
769 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
770 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
771}
772
773static void emit_sarimm(u_int rs,u_int imm,u_int rt)
774{
775 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
776 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
777}
778
779static void emit_rorimm(u_int rs,u_int imm,u_int rt)
780{
781 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
783}
784
785static void emit_signextend16(u_int rs, u_int rt)
786{
787 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
788 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
789}
790
791static void emit_shl(u_int rs,u_int rshift,u_int rt)
792{
793 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
794 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
795}
796
797static void emit_shr(u_int rs,u_int rshift,u_int rt)
798{
799 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
800 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
801}
802
803static void emit_sar(u_int rs,u_int rshift,u_int rt)
804{
805 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
806 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
807}
808
809static void emit_cmpimm(u_int rs, u_int imm)
810{
811 if (imm < 4096) {
812 assem_debug("cmp %s,%#x\n", regname[rs], imm);
813 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
814 }
815 else if (-imm < 4096) {
816 assem_debug("cmn %s,%#x\n", regname[rs], imm);
817 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
818 }
819 else if (imm < 16777216 && !(imm & 0xfff)) {
820 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
821 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
822 }
823 else {
824 host_tempreg_acquire();
825 emit_movimm(imm, HOST_TEMPREG);
826 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
827 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
828 host_tempreg_release();
829 }
830}
831
832static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
833{
834 assert(imm == 0 || imm == 1);
835 assert(cond0 < 0x10);
836 assert(cond1 < 0x10);
837 if (imm) {
838 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
839 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
840 } else {
841 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
842 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
843 }
844}
845
846static void emit_cmovne_imm(u_int imm,u_int rt)
847{
848 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
849}
850
851static void emit_cmovl_imm(u_int imm,u_int rt)
852{
853 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
854}
855
856static void emit_cmovb_imm(int imm,u_int rt)
857{
858 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
859}
860
861static void emit_cmoveq_reg(u_int rs,u_int rt)
862{
863 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
864 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
865}
866
867static void emit_cmovne_reg(u_int rs,u_int rt)
868{
869 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
870 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
871}
872
873static void emit_cmovl_reg(u_int rs,u_int rt)
874{
875 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
876 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
877}
878
879static void emit_cmovb_reg(u_int rs,u_int rt)
880{
881 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
882 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
883}
884
885static void emit_cmovs_reg(u_int rs,u_int rt)
886{
887 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
888 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
889}
890
891static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
892{
893 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
894 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
895}
896
897static void emit_csinvne_reg(u_int rs1,u_int rs2,u_int rt)
898{
899 assem_debug("csinv %s,%s,%s,ne\n",regname[rt],regname[rs1],regname[rs2]);
900 output_w32(0x5a800000 | (COND_NE << 12) | rm_rn_rd(rs2, rs1, rt));
901}
902
903static void emit_slti32(u_int rs,int imm,u_int rt)
904{
905 if(rs!=rt) emit_zeroreg(rt);
906 emit_cmpimm(rs,imm);
907 if(rs==rt) emit_movimm(0,rt);
908 emit_cmovl_imm(1,rt);
909}
910
911static void emit_sltiu32(u_int rs,int imm,u_int rt)
912{
913 if(rs!=rt) emit_zeroreg(rt);
914 emit_cmpimm(rs,imm);
915 if(rs==rt) emit_movimm(0,rt);
916 emit_cmovb_imm(1,rt);
917}
918
919static void emit_cmp(u_int rs,u_int rt)
920{
921 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
922 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
923}
924
925static void emit_cmpcs(u_int rs,u_int rt)
926{
927 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
928 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
929}
930
931static void emit_set_gz32(u_int rs, u_int rt)
932{
933 //assem_debug("set_gz32\n");
934 emit_cmpimm(rs,1);
935 emit_movimm(1,rt);
936 emit_cmovl_imm(0,rt);
937}
938
939static void emit_set_nz32(u_int rs, u_int rt)
940{
941 //assem_debug("set_nz32\n");
942 if(rs!=rt) emit_mov(rs,rt);
943 emit_test(rs,rs);
944 emit_cmovne_imm(1,rt);
945}
946
947static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
948{
949 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
950 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
951 emit_cmp(rs1,rs2);
952 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
953 emit_cmovl_imm(1,rt);
954}
955
956static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
957{
958 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
959 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
960 emit_cmp(rs1,rs2);
961 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
962 emit_cmovb_imm(1,rt);
963}
964
965static int can_jump_or_call(const void *a)
966{
967 intptr_t diff = (u_char *)a - out;
968 return (-134217728 <= diff && diff <= 134217727);
969}
970
971static void emit_call(const void *a)
972{
973 intptr_t diff = (u_char *)a - out;
974 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
975 assert(!(diff & 3));
976 if (-134217728 <= diff && diff <= 134217727)
977 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
978 else
979 abort();
980}
981
982static void emit_jmp(const void *a)
983{
984 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
985 u_int offset = genjmp(a);
986 output_w32(0x14000000 | offset);
987}
988
989static void emit_jne(const void *a)
990{
991 assem_debug("bne %p\n", a);
992 u_int offset = genjmpcc(a);
993 output_w32(0x54000000 | (offset << 5) | COND_NE);
994}
995
996static void emit_jeq(const void *a)
997{
998 assem_debug("beq %p\n", a);
999 u_int offset = genjmpcc(a);
1000 output_w32(0x54000000 | (offset << 5) | COND_EQ);
1001}
1002
1003static void emit_js(const void *a)
1004{
1005 assem_debug("bmi %p\n", a);
1006 u_int offset = genjmpcc(a);
1007 output_w32(0x54000000 | (offset << 5) | COND_MI);
1008}
1009
1010static void emit_jns(const void *a)
1011{
1012 assem_debug("bpl %p\n", a);
1013 u_int offset = genjmpcc(a);
1014 output_w32(0x54000000 | (offset << 5) | COND_PL);
1015}
1016
1017static void emit_jl(const void *a)
1018{
1019 assem_debug("blt %p\n", a);
1020 u_int offset = genjmpcc(a);
1021 output_w32(0x54000000 | (offset << 5) | COND_LT);
1022}
1023
1024static void emit_jge(const void *a)
1025{
1026 assem_debug("bge %p\n", a);
1027 u_int offset = genjmpcc(a);
1028 output_w32(0x54000000 | (offset << 5) | COND_GE);
1029}
1030
1031static void emit_jo(const void *a)
1032{
1033 assem_debug("bvs %p\n", a);
1034 u_int offset = genjmpcc(a);
1035 output_w32(0x54000000 | (offset << 5) | COND_VS);
1036}
1037
1038static void emit_jno(const void *a)
1039{
1040 assem_debug("bvc %p\n", a);
1041 u_int offset = genjmpcc(a);
1042 output_w32(0x54000000 | (offset << 5) | COND_VC);
1043}
1044
1045static void emit_jc(const void *a)
1046{
1047 assem_debug("bcs %p\n", a);
1048 u_int offset = genjmpcc(a);
1049 output_w32(0x54000000 | (offset << 5) | COND_CS);
1050}
1051
1052static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
1053{
1054 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
1055 u_int offset = genjmpcc(a);
1056 is64 = is64 ? 0x80000000 : 0;
1057 isnz = isnz ? 0x01000000 : 0;
1058 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
1059}
1060
1061static void *emit_cbz(u_int r, const void *a)
1062{
1063 void *ret = out;
1064 emit_cb(0, 0, a, r);
1065 return ret;
1066}
1067
1068static void emit_jmpreg(u_int r)
1069{
1070 assem_debug("br %s\n", regname64[r]);
1071 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1072}
1073
1074static void emit_retreg(u_int r)
1075{
1076 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1077 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1078}
1079
1080static void emit_ret(void)
1081{
1082 emit_retreg(LR);
1083}
1084
1085static void emit_adr(void *addr, u_int rt)
1086{
1087 intptr_t offset = (u_char *)addr - out;
1088 assert(-1048576 <= offset && offset < 1048576);
1089 assert(rt < 31);
1090 assem_debug("adr x%d,#%#lx\n", rt, offset);
1091 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1092}
1093
1094static void emit_adrp(void *addr, u_int rt)
1095{
1096 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1097 assert(-4294967296l <= offset && offset < 4294967296l);
1098 assert(rt < 31);
1099 offset >>= 12;
1100 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1101 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1102}
1103
1104static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1105{
1106 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1107 assert(-256 <= offset && offset < 256);
1108 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1109}
1110
1111static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1112{
1113 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1114 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1115}
1116
1117static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1118{
1119 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1120 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1121}
1122
1123static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1124{
1125 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1126 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1127}
1128
1129static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1130{
1131 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1132 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1133}
1134#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1135
1136static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1137{
1138 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1139 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1140}
1141
1142static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1143{
1144 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1145 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1146}
1147
1148static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1149{
1150 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1151 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1152}
1153
1154static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1155{
1156 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1157 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1158}
1159
1160static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1161{
1162 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1163 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1164}
1165
1166static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1167{
1168 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1169 assert(-256 <= offset && offset < 256);
1170 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1171}
1172
1173static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1174{
1175 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1176 assert(-256 <= offset && offset < 256);
1177 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1178}
1179
1180static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1181{
1182 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1183 assert(-256 <= offset && offset < 256);
1184 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1185}
1186
1187static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1188{
1189 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1190 assert(-256 <= offset && offset < 256);
1191 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1192}
1193
1194static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1195{
1196 if (!(offset & 3) && (u_int)offset <= 16380) {
1197 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1198 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1199 }
1200 else if (-256 <= offset && offset < 256) {
1201 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1202 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1203 }
1204 else
1205 assert(0);
1206}
1207
1208static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1209{
1210 if (!(offset & 1) && (u_int)offset <= 8190) {
1211 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1212 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1213 }
1214 else if (-256 <= offset && offset < 256) {
1215 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1216 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1217 }
1218 else
1219 assert(0);
1220}
1221
1222static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1223{
1224 if ((u_int)offset < 4096) {
1225 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1226 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1227 }
1228 else if (-256 <= offset && offset < 256) {
1229 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1230 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1231 }
1232 else
1233 assert(0);
1234}
1235
1236static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1237{
1238 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1239 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1240}
1241
1242static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1243{
1244 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1245 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1246}
1247
1248static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1249{
1250 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1251 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1252}
1253
1254static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1255{
1256 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1257 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1258}
1259
1260static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1261{
1262 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1263 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1264}
1265
1266static void emit_clz(u_int rs, u_int rt)
1267{
1268 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1269 output_w32(0x5ac01000 | rn_rd(rs, rt));
1270}
1271
1272// special case for checking invalid_code
1273static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1274{
1275 emit_shrimm(r, 12, rt);
1276 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1277 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1278}
1279
1280// special for loadlr_assemble, rs2 is destroyed
1281static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1282{
1283 emit_shl(rs2, shift, rs2);
1284 emit_bic(rs1, rs2, rt);
1285}
1286
1287static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1288{
1289 emit_shr(rs2, shift, rs2);
1290 emit_bic(rs1, rs2, rt);
1291}
1292
1293static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1294{
1295 u_int op = 0xb9000000;
1296 unused const char *ldst = is_st ? "st" : "ld";
1297 unused char rp = is64 ? 'x' : 'w';
1298 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1299 is64 = is64 ? 1 : 0;
1300 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1301 ofs = (ofs >> (2+is64));
1302 if (!is_st) op |= 0x00400000;
1303 if (is64) op |= 0x40000000;
1304 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1305}
1306
1307static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1308{
1309 u_int op = 0x29000000;
1310 unused const char *ldst = is_st ? "st" : "ld";
1311 unused char rp = is64 ? 'x' : 'w';
1312 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1313 is64 = is64 ? 1 : 0;
1314 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1315 ofs = (ofs >> (2+is64));
1316 assert(-64 <= ofs && ofs <= 63);
1317 ofs &= 0x7f;
1318 if (!is_st) op |= 0x00400000;
1319 if (is64) op |= 0x80000000;
1320 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1321}
1322
1323static void save_load_regs_all(int is_store, u_int reglist)
1324{
1325 int ofs = 0, c = 0;
1326 u_int r, pair[2];
1327 for (r = 0; reglist; r++, reglist >>= 1) {
1328 if (reglist & 1)
1329 pair[c++] = r;
1330 if (c == 2) {
1331 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1332 ofs += 8 * 2;
1333 c = 0;
1334 }
1335 }
1336 if (c) {
1337 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1338 ofs += 8;
1339 }
1340 assert(ofs <= SSP_CALLER_REGS);
1341}
1342
1343// Save registers before function call
1344static void save_regs(u_int reglist)
1345{
1346 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1347 save_load_regs_all(1, reglist);
1348}
1349
1350// Restore registers after function call
1351static void restore_regs(u_int reglist)
1352{
1353 reglist &= CALLER_SAVE_REGS;
1354 save_load_regs_all(0, reglist);
1355}
1356
1357/* Stubs/epilogue */
1358
1359static void literal_pool(int n)
1360{
1361 (void)literals;
1362}
1363
1364static void literal_pool_jumpover(int n)
1365{
1366}
1367
1368// parsed by get_pointer, find_extjump_insn
1369static void emit_extjump(u_char *addr, u_int target)
1370{
1371 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1372
1373 emit_movz(target & 0xffff, 0);
1374 emit_movk_lsl16(target >> 16, 0);
1375
1376 // addr is in the current recompiled block (max 256k)
1377 // offset shouldn't exceed +/-1MB
1378 emit_adr(addr, 1);
1379 emit_far_jump(dyna_linker);
1380}
1381
1382static void check_extjump2(void *src)
1383{
1384 u_int *ptr = src;
1385 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1386 (void)ptr;
1387}
1388
1389// put rt_val into rt, potentially making use of rs with value rs_val
1390static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1391{
1392 int diff = rt_val - rs_val;
1393 if ((-4096 < diff && diff < 4096)
1394 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1395 emit_addimm(rs, diff, rt);
1396 else if (rt_val == ~rs_val)
1397 emit_not(rs, rt);
1398 else if (is_rotated_mask(rs_val ^ rt_val))
1399 emit_xorimm(rs, rs_val ^ rt_val, rt);
1400 else
1401 emit_movimm(rt_val, rt);
1402}
1403
1404// return 1 if the above function can do it's job cheaply
1405static int is_similar_value(u_int v1, u_int v2)
1406{
1407 int diff = v1 - v2;
1408 return (-4096 < diff && diff < 4096)
1409 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1410 || v1 == ~v2
1411 || is_rotated_mask(v1 ^ v2);
1412}
1413
1414static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1415{
1416 if (rt_val < 0x100000000ull) {
1417 emit_movimm_from(rs_val, rs, rt_val, rt);
1418 return;
1419 }
1420 // just move the whole thing. At least on Linux all addresses
1421 // seem to be 48bit, so 3 insns - not great not terrible
1422 emit_movimm64(rt_val, rt);
1423}
1424
1425// trashes x2
1426static void pass_args64(u_int a0, u_int a1)
1427{
1428 if(a0==1&&a1==0) {
1429 // must swap
1430 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1431 }
1432 else if(a0!=0&&a1==0) {
1433 emit_mov64(a1,1);
1434 if (a0>=0) emit_mov64(a0,0);
1435 }
1436 else {
1437 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1438 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1439 }
1440}
1441
1442static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1443{
1444 switch(type) {
1445 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1446 case LOADBU_STUB:
1447 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1448 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1449 case LOADHU_STUB:
1450 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1451 case LOADW_STUB:
1452 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1453 default: assert(0);
1454 }
1455}
1456
1457#include "pcsxmem.h"
1458//#include "pcsxmem_inline.c"
1459
1460static void do_readstub(int n)
1461{
1462 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1463 set_jump_target(stubs[n].addr, out);
1464 enum stub_type type = stubs[n].type;
1465 int i = stubs[n].a;
1466 int rs = stubs[n].b;
1467 const struct regstat *i_regs = (void *)stubs[n].c;
1468 u_int reglist = stubs[n].e;
1469 const signed char *i_regmap = i_regs->regmap;
1470 int rt;
1471 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1472 rt=get_reg(i_regmap,FTEMP);
1473 }else{
1474 rt=get_reg(i_regmap,dops[i].rt1);
1475 }
1476 assert(rs>=0);
1477 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1478 void *restore_jump = NULL, *handler_jump = NULL;
1479 reglist|=(1<<rs);
1480 for (r = 0; r < HOST_CCREG; r++) {
1481 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1482 temp = r;
1483 break;
1484 }
1485 }
1486 if(rt>=0&&dops[i].rt1!=0)
1487 reglist&=~(1<<rt);
1488 if(temp==-1) {
1489 save_regs(reglist);
1490 regs_saved=1;
1491 temp=(rs==0)?2:0;
1492 }
1493 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1494 temp2=1;
1495 emit_readdword(&mem_rtab,temp);
1496 emit_shrimm(rs,12,temp2);
1497 emit_readdword_dualindexedx8(temp,temp2,temp2);
1498 emit_adds64(temp2,temp2,temp2);
1499 handler_jump=out;
1500 emit_jc(0);
1501 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1502 switch(type) {
1503 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1504 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1505 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1506 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1507 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1508 default: assert(0);
1509 }
1510 }
1511 if(regs_saved) {
1512 restore_jump=out;
1513 emit_jmp(0); // jump to reg restore
1514 }
1515 else
1516 emit_jmp(stubs[n].retaddr); // return address
1517 set_jump_target(handler_jump, out);
1518
1519 if(!regs_saved)
1520 save_regs(reglist);
1521 void *handler=NULL;
1522 if(type==LOADB_STUB||type==LOADBU_STUB)
1523 handler=jump_handler_read8;
1524 if(type==LOADH_STUB||type==LOADHU_STUB)
1525 handler=jump_handler_read16;
1526 if(type==LOADW_STUB)
1527 handler=jump_handler_read32;
1528 assert(handler);
1529 pass_args64(rs,temp2);
1530 int cc=get_reg(i_regmap,CCREG);
1531 if(cc<0)
1532 emit_loadreg(CCREG,2);
1533 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1534 emit_far_call(handler);
1535 // (no cycle reload after read)
1536 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1537 loadstore_extend(type,0,rt);
1538 }
1539 if(restore_jump)
1540 set_jump_target(restore_jump, out);
1541 restore_regs(reglist);
1542 emit_jmp(stubs[n].retaddr);
1543}
1544
1545static void inline_readstub(enum stub_type type, int i, u_int addr,
1546 const signed char regmap[], int target, int adj, u_int reglist)
1547{
1548 int ra = cinfo[i].addr;
1549 int rt = get_reg(regmap, target);
1550 assert(ra >= 0);
1551 u_int is_dynamic=0;
1552 uintptr_t host_addr = 0;
1553 void *handler;
1554 int cc=get_reg(regmap,CCREG);
1555 //if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
1556 // return;
1557 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1558 if (handler == NULL) {
1559 if(rt<0||dops[i].rt1==0)
1560 return;
1561 if (addr != host_addr)
1562 emit_movimm_from64(addr, ra, host_addr, ra);
1563 switch(type) {
1564 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1565 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1566 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1567 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1568 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
1569 default: assert(0);
1570 }
1571 return;
1572 }
1573 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1574 if (is_dynamic) {
1575 if(type==LOADB_STUB||type==LOADBU_STUB)
1576 handler=jump_handler_read8;
1577 if(type==LOADH_STUB||type==LOADHU_STUB)
1578 handler=jump_handler_read16;
1579 if(type==LOADW_STUB)
1580 handler=jump_handler_read32;
1581 }
1582
1583 // call a memhandler
1584 if(rt>=0&&dops[i].rt1!=0)
1585 reglist&=~(1<<rt);
1586 save_regs(reglist);
1587 if(target==0)
1588 emit_movimm(addr,0);
1589 else if(ra!=0)
1590 emit_mov(ra,0);
1591 if(cc<0)
1592 emit_loadreg(CCREG,2);
1593 emit_addimm(cc<0?2:cc,adj,2);
1594 if(is_dynamic) {
1595 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1596 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1597 if (-4294967296l <= offset && offset < 4294967296l) {
1598 emit_adrp((void *)l1, 1);
1599 emit_addimm64(1, l1 & 0xfff, 1);
1600 }
1601 else
1602 emit_movimm64(l1, 1);
1603 }
1604 else
1605 emit_far_call(do_memhandler_pre);
1606
1607 emit_far_call(handler);
1608
1609 // (no cycle reload after read)
1610 if(rt>=0&&dops[i].rt1!=0)
1611 loadstore_extend(type, 0, rt);
1612 restore_regs(reglist);
1613}
1614
1615static void do_writestub(int n)
1616{
1617 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1618 set_jump_target(stubs[n].addr, out);
1619 enum stub_type type=stubs[n].type;
1620 int i=stubs[n].a;
1621 int rs=stubs[n].b;
1622 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1623 u_int reglist=stubs[n].e;
1624 signed char *i_regmap=i_regs->regmap;
1625 int rt,r;
1626 if(dops[i].itype==C2LS) {
1627 rt=get_reg(i_regmap,r=FTEMP);
1628 }else{
1629 rt=get_reg(i_regmap,r=dops[i].rs2);
1630 }
1631 assert(rs>=0);
1632 assert(rt>=0);
1633 int rtmp,temp=-1,temp2,regs_saved=0;
1634 void *restore_jump = NULL, *handler_jump = NULL;
1635 int reglist2=reglist|(1<<rs)|(1<<rt);
1636 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1637 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1638 temp = rtmp;
1639 break;
1640 }
1641 }
1642 if(temp==-1) {
1643 save_regs(reglist);
1644 regs_saved=1;
1645 for(rtmp=0;rtmp<=3;rtmp++)
1646 if(rtmp!=rs&&rtmp!=rt)
1647 {temp=rtmp;break;}
1648 }
1649 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1650 temp2=3;
1651 else {
1652 host_tempreg_acquire();
1653 temp2=HOST_TEMPREG;
1654 }
1655 emit_readdword(&mem_wtab,temp);
1656 emit_shrimm(rs,12,temp2);
1657 emit_readdword_dualindexedx8(temp,temp2,temp2);
1658 emit_adds64(temp2,temp2,temp2);
1659 handler_jump=out;
1660 emit_jc(0);
1661 switch(type) {
1662 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1663 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1664 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1665 default: assert(0);
1666 }
1667 if(regs_saved) {
1668 restore_jump=out;
1669 emit_jmp(0); // jump to reg restore
1670 }
1671 else
1672 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1673 set_jump_target(handler_jump, out);
1674
1675 if(!regs_saved)
1676 save_regs(reglist);
1677 void *handler=NULL;
1678 switch(type) {
1679 case STOREB_STUB: handler=jump_handler_write8; break;
1680 case STOREH_STUB: handler=jump_handler_write16; break;
1681 case STOREW_STUB: handler=jump_handler_write32; break;
1682 default: assert(0);
1683 }
1684 assert(handler);
1685 pass_args(rs,rt);
1686 if(temp2!=3) {
1687 emit_mov64(temp2,3);
1688 host_tempreg_release();
1689 }
1690 int cc=get_reg(i_regmap,CCREG);
1691 if(cc<0)
1692 emit_loadreg(CCREG,2);
1693 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1694 // returns new cycle_count
1695 emit_far_call(handler);
1696 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1697 if(cc<0)
1698 emit_storereg(CCREG,2);
1699 if(restore_jump)
1700 set_jump_target(restore_jump, out);
1701 restore_regs(reglist);
1702 emit_jmp(stubs[n].retaddr);
1703}
1704
1705static void inline_writestub(enum stub_type type, int i, u_int addr,
1706 const signed char regmap[], int target, int adj, u_int reglist)
1707{
1708 int ra = cinfo[i].addr;
1709 int rt = get_reg(regmap,target);
1710 assert(ra >= 0);
1711 assert(rt >= 0);
1712 uintptr_t host_addr = 0;
1713 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1714 if (handler == NULL) {
1715 if (addr != host_addr)
1716 emit_movimm_from64(addr, ra, host_addr, ra);
1717 switch (type) {
1718 case STOREB_STUB: emit_writebyte_indexed(rt, 0, ra); break;
1719 case STOREH_STUB: emit_writehword_indexed(rt, 0, ra); break;
1720 case STOREW_STUB: emit_writeword_indexed(rt, 0, ra); break;
1721 default: assert(0);
1722 }
1723 return;
1724 }
1725
1726 // call a memhandler
1727 save_regs(reglist);
1728 emit_writeword(ra, &address); // some handlers still need it
1729 loadstore_extend(type, rt, 0);
1730 int cc, cc_use;
1731 cc = cc_use = get_reg(regmap, CCREG);
1732 if (cc < 0)
1733 emit_loadreg(CCREG, (cc_use = 2));
1734 emit_addimm(cc_use, adj, 2);
1735
1736 emit_far_call(do_memhandler_pre);
1737 emit_far_call(handler);
1738 emit_far_call(do_memhandler_post);
1739 emit_addimm(0, -adj, cc_use);
1740 if (cc < 0)
1741 emit_storereg(CCREG, cc_use);
1742 restore_regs(reglist);
1743}
1744
1745/* Special assem */
1746
1747static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1748{
1749 save_load_regs_all(1, reglist);
1750 cop2_do_stall_check(op, i, i_regs, 0);
1751#ifdef PCNT
1752 emit_movimm(op, 0);
1753 emit_far_call(pcnt_gte_start);
1754#endif
1755 // pointer to cop2 regs
1756 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1757}
1758
1759static void c2op_epilogue(u_int op,u_int reglist)
1760{
1761#ifdef PCNT
1762 emit_movimm(op, 0);
1763 emit_far_call(pcnt_gte_end);
1764#endif
1765 save_load_regs_all(0, reglist);
1766}
1767
1768static void c2op_assemble(int i, const struct regstat *i_regs)
1769{
1770 u_int c2op=source[i]&0x3f;
1771 u_int hr,reglist_full=0,reglist;
1772 int need_flags,need_ir;
1773 for(hr=0;hr<HOST_REGS;hr++) {
1774 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1775 }
1776 reglist=reglist_full&CALLER_SAVE_REGS;
1777
1778 if (gte_handlers[c2op]!=NULL) {
1779 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1780 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1781 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1782 source[i],gte_unneeded[i+1],need_flags,need_ir);
1783 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1784 need_flags=0;
1785 //int shift = (source[i] >> 19) & 1;
1786 //int lm = (source[i] >> 10) & 1;
1787 switch(c2op) {
1788 default:
1789 (void)need_ir;
1790 c2op_prologue(c2op, i, i_regs, reglist);
1791 emit_movimm(source[i],1); // opcode
1792 emit_writeword(1,&psxRegs.code);
1793 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1794 break;
1795 }
1796 c2op_epilogue(c2op,reglist);
1797 }
1798}
1799
1800static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1801{
1802 //value = value & 0x7ffff000;
1803 //if (value & 0x7f87e000) value |= 0x80000000;
1804 emit_andimm(sl, 0x7fffe000, temp);
1805 emit_testimm(temp, 0xff87ffff);
1806 emit_andimm(sl, 0x7ffff000, temp);
1807 host_tempreg_acquire();
1808 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1809 emit_cmovne_reg(HOST_TEMPREG, temp);
1810 host_tempreg_release();
1811 assert(0); // testing needed
1812}
1813
1814static void do_mfc2_31_one(u_int copr,signed char temp)
1815{
1816 emit_readshword(&reg_cop2d[copr],temp);
1817 emit_bicsar_imm(temp,31,temp);
1818 emit_cmpimm(temp,0xf80);
1819 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1820 emit_andimm(temp,0xf80,temp);
1821}
1822
1823static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1824{
1825 if (temp < 0) {
1826 host_tempreg_acquire();
1827 temp = HOST_TEMPREG;
1828 }
1829 do_mfc2_31_one(9,temp);
1830 emit_shrimm(temp,7,tl);
1831 do_mfc2_31_one(10,temp);
1832 emit_orrshr_imm(temp,2,tl);
1833 do_mfc2_31_one(11,temp);
1834 emit_orrshl_imm(temp,3,tl);
1835 emit_writeword(tl,&reg_cop2d[29]);
1836
1837 if (temp == HOST_TEMPREG)
1838 host_tempreg_release();
1839}
1840
1841static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1842{
1843 // case 0x18: MULT
1844 // case 0x19: MULTU
1845 // case 0x1A: DIV
1846 // case 0x1B: DIVU
1847 if(dops[i].rs1&&dops[i].rs2)
1848 {
1849 switch(dops[i].opcode2)
1850 {
1851 case 0x18: // MULT
1852 case 0x19: // MULTU
1853 {
1854 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1855 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1856 signed char hi=get_reg(i_regs->regmap,HIREG);
1857 signed char lo=get_reg(i_regs->regmap,LOREG);
1858 assert(m1>=0);
1859 assert(m2>=0);
1860 assert(hi>=0);
1861 assert(lo>=0);
1862
1863 if(dops[i].opcode2==0x18) // MULT
1864 emit_smull(m1,m2,hi);
1865 else // MULTU
1866 emit_umull(m1,m2,hi);
1867
1868 emit_mov(hi,lo);
1869 emit_shrimm64(hi,32,hi);
1870 break;
1871 }
1872 case 0x1A: // DIV
1873 case 0x1B: // DIVU
1874 {
1875 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1876 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1877 signed char quotient=get_reg(i_regs->regmap,LOREG);
1878 signed char remainder=get_reg(i_regs->regmap,HIREG);
1879 assert(numerator>=0);
1880 assert(denominator>=0);
1881 assert(quotient>=0);
1882 assert(remainder>=0);
1883
1884 if (dops[i].opcode2 == 0x1A) // DIV
1885 emit_sdiv(numerator,denominator,quotient);
1886 else // DIVU
1887 emit_udiv(numerator,denominator,quotient);
1888 emit_msub(quotient,denominator,numerator,remainder);
1889
1890 // div 0 quotient (remainder is already correct)
1891 host_tempreg_acquire();
1892 if (dops[i].opcode2 == 0x1A) { // DIV
1893 emit_add_lsrimm(WZR,numerator,31,HOST_TEMPREG);
1894 emit_orn_asrimm(HOST_TEMPREG,numerator,31,HOST_TEMPREG);
1895 }
1896 else
1897 emit_movimm(~0,HOST_TEMPREG);
1898 emit_test(denominator,denominator);
1899 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1900 host_tempreg_release();
1901 break;
1902 }
1903 default:
1904 assert(0);
1905 }
1906 }
1907 else
1908 {
1909 signed char hr=get_reg(i_regs->regmap,HIREG);
1910 signed char lr=get_reg(i_regs->regmap,LOREG);
1911 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1912 {
1913 if (dops[i].rs1) {
1914 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1915 assert(numerator >= 0);
1916 if (hr >= 0)
1917 emit_mov(numerator,hr);
1918 if (lr >= 0) {
1919 if (dops[i].opcode2 == 0x1A) { // DIV
1920 emit_add_lsrimm(WZR,numerator,31,lr);
1921 emit_orn_asrimm(lr,numerator,31,lr);
1922 }
1923 else
1924 emit_movimm(~0,lr);
1925 }
1926 }
1927 else {
1928 if (hr >= 0) emit_zeroreg(hr);
1929 if (lr >= 0) emit_movimm(~0,lr);
1930 }
1931 }
1932 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
1933 {
1934 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
1935 assert(denominator >= 0);
1936 if (hr >= 0) emit_zeroreg(hr);
1937 if (lr >= 0) {
1938 emit_zeroreg(lr);
1939 emit_test(denominator, denominator);
1940 emit_csinvne_reg(lr, lr, lr);
1941 }
1942 }
1943 else
1944 {
1945 // Multiply by zero is zero.
1946 if (hr >= 0) emit_zeroreg(hr);
1947 if (lr >= 0) emit_zeroreg(lr);
1948 }
1949 }
1950}
1951#define multdiv_assemble multdiv_assemble_arm64
1952
1953// wb_dirtys making use of stp when possible
1954static void wb_dirtys(const signed char i_regmap[], u_int i_dirty)
1955{
1956 signed char mregs[34+1];
1957 int r, hr;
1958 memset(mregs, -1, sizeof(mregs));
1959 for (hr = 0; hr < HOST_REGS; hr++) {
1960 r = i_regmap[hr];
1961 if (hr == EXCLUDE_REG || r <= 0 || r == CCREG)
1962 continue;
1963 if (!((i_dirty >> hr) & 1))
1964 continue;
1965 assert(r < 34u);
1966 mregs[r] = hr;
1967 }
1968 for (r = 1; r < 34; r++) {
1969 if (mregs[r] < 0)
1970 continue;
1971 if (mregs[r+1] >= 0) {
1972 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
1973 emit_ldstp(1, 0, mregs[r], mregs[r+1], FP, offset);
1974 r++;
1975 }
1976 else
1977 emit_storereg(r, mregs[r]);
1978 }
1979}
1980#define wb_dirtys wb_dirtys
1981
1982static void load_all_regs(const signed char i_regmap[])
1983{
1984 signed char mregs[34+1];
1985 int r, hr;
1986 memset(mregs, -1, sizeof(mregs));
1987 for (hr = 0; hr < HOST_REGS; hr++) {
1988 r = i_regmap[hr];
1989 if (hr == EXCLUDE_REG || r < 0 || r == CCREG)
1990 continue;
1991 if ((u_int)r < 34u)
1992 mregs[r] = hr;
1993 else if (r < TEMPREG)
1994 emit_loadreg(r, hr);
1995 }
1996 if (mregs[0] >= 0)
1997 emit_zeroreg(mregs[0]); // we could use arm64's ZR instead of reg alloc
1998 for (r = 1; r < 34; r++) {
1999 if (mregs[r] < 0)
2000 continue;
2001 if (mregs[r+1] >= 0) {
2002 uintptr_t offset = (u_char *)&psxRegs.GPR.r[r] - (u_char *)&dynarec_local;
2003 emit_ldstp(0, 0, mregs[r], mregs[r+1], FP, offset);
2004 r++;
2005 }
2006 else
2007 emit_loadreg(r, mregs[r]);
2008 }
2009}
2010#define load_all_regs load_all_regs
2011
2012static void do_jump_vaddr(u_int rs)
2013{
2014 if (rs != 0)
2015 emit_mov(rs, 0);
2016 emit_far_call(ndrc_get_addr_ht);
2017 emit_jmpreg(0);
2018}
2019
2020static void do_preload_rhash(u_int r) {
2021 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2022 // register. On ARM the hash can be done with a single instruction (below)
2023}
2024
2025static void do_preload_rhtbl(u_int ht) {
2026 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
2027}
2028
2029static void do_rhash(u_int rs,u_int rh) {
2030 emit_andimm(rs, 0xf8, rh);
2031}
2032
2033static void do_miniht_load(int ht, u_int rh) {
2034 emit_add64(ht, rh, ht);
2035 emit_ldst(0, 0, rh, ht, 0);
2036}
2037
2038static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2039 emit_cmp(rh, rs);
2040 void *jaddr = out;
2041 emit_jeq(0);
2042 do_jump_vaddr(rs);
2043
2044 set_jump_target(jaddr, out);
2045 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2046 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2047 emit_jmpreg(ht);
2048}
2049
2050// parsed by set_jump_target?
2051static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2052 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2053 emit_movk(return_address&0xffff,rt);
2054 add_to_linker(out,return_address,1);
2055 emit_adr(out,temp);
2056 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2057 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2058}
2059
2060static unused void clear_cache_arm64(char *start, char *end)
2061{
2062 // Don't rely on GCC's __clear_cache implementation, as it caches
2063 // icache/dcache cache line sizes, that can vary between cores on
2064 // big.LITTLE architectures.
2065 uint64_t addr, ctr_el0;
2066 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2067 size_t isize, dsize;
2068
2069 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2070 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2071 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2072
2073 // use the global minimum cache line size
2074 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2075 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2076
2077 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2078 not required for instruction to data coherence. */
2079 if ((ctr_el0 & (1 << 28)) == 0x0) {
2080 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2081 for (; addr < (uint64_t)end; addr += dsize)
2082 // use "civac" instead of "cvau", as this is the suggested workaround for
2083 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2084 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2085 }
2086 __asm__ volatile("dsb ish" : : : "memory");
2087
2088 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2089 Unification is not required for instruction to data coherence. */
2090 if ((ctr_el0 & (1 << 29)) == 0x0) {
2091 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2092 for (; addr < (uint64_t)end; addr += isize)
2093 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2094
2095 __asm__ volatile("dsb ish" : : : "memory");
2096 }
2097
2098 __asm__ volatile("isb" : : : "memory");
2099}
2100
2101// CPU-architecture-specific initialization
2102static void arch_init(void)
2103{
2104 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2105 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
2106 size_t i;
2107 assert(!(diff & 3));
2108 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2109 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2110 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2111 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2112 }
2113 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2114}
2115
2116// vim:shiftwidth=2:expandtab