drc: rework smc checks again
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26void do_memhandler_pre();
27void do_memhandler_post();
28
29/* Linker */
30static void set_jump_target(void *addr, void *target)
31{
32 u_int *ptr = NDRC_WRITE_OFFSET(addr);
33 intptr_t offset = (u_char *)target - (u_char *)addr;
34
35 if ((*ptr&0xFC000000) == 0x14000000) { // b
36 assert(offset>=-134217728LL&&offset<134217728LL);
37 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
38 }
39 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
40 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
41 // Conditional branch are limited to +/- 1MB
42 // block max size is 256k so branching beyond the +/- 1MB limit
43 // should only happen when jumping to an already compiled block (see add_jump_out)
44 // a workaround would be to do a trampoline jump via a stub at the end of the block
45 assert(-1048576 <= offset && offset < 1048576);
46 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
47 }
48 else if((*ptr&0x9f000000)==0x10000000) { // adr
49 // generated by do_miniht_insert
50 assert(offset>=-1048576LL&&offset<1048576LL);
51 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
52 }
53 else
54 abort(); // should not happen
55}
56
57// from a pointer to external jump stub (which was produced by emit_extjump2)
58// find where the jumping insn is
59static void *find_extjump_insn(void *stub)
60{
61 int *ptr = (int *)stub + 2;
62 assert((*ptr&0x9f000000) == 0x10000000); // adr
63 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
64 return ptr + offset / 4;
65}
66
67#if 0
68// find where external branch is liked to using addr of it's stub:
69// get address that the stub loads (dyna_linker arg1),
70// treat it as a pointer to branch insn,
71// return addr where that branch jumps to
72static void *get_pointer(void *stub)
73{
74 int *i_ptr = find_extjump_insn(stub);
75 if ((*i_ptr&0xfc000000) == 0x14000000) // b
76 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
77 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
78 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
79 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
80 assert(0);
81 return NULL;
82}
83#endif
84
85// Allocate a specific ARM register.
86static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
87{
88 int n;
89 int dirty=0;
90
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
93 {
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
96 cur->regmap[n]=-1;
97 }
98 }
99
100 cur->regmap[hr]=reg;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
104}
105
106// Alloc cycle count into dedicated register
107static void alloc_cc(struct regstat *cur,int i)
108{
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
110}
111
112/* Special alloc */
113
114
115/* Assembler */
116
117static unused const char *regname[32] = {
118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
122};
123
124static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129};
130
131enum {
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
134};
135
136static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
139};
140
141static void output_w32(u_int word)
142{
143 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
144 out += 4;
145}
146
147static u_int rn_rd(u_int rn, u_int rd)
148{
149 assert(rn < 31);
150 assert(rd < 31);
151 return (rn << 5) | rd;
152}
153
154static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
155{
156 assert(rm < 32);
157 assert(rn < 32);
158 assert(rd < 32);
159 return (rm << 16) | (rn << 5) | rd;
160}
161
162static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
163{
164 assert(ra < 32);
165 return rm_rn_rd(rm, rn, rd) | (ra << 10);
166}
167
168static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
169{
170 assert(imm7 < 0x80);
171 assert(rt2 < 31);
172 assert(rn < 32);
173 assert(rt < 31);
174 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
175}
176
177static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
178{
179 assert(imm6 <= 63);
180 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
181}
182
183static u_int imm16_rd(u_int imm16, u_int rd)
184{
185 assert(imm16 < 0x10000);
186 assert(rd < 31);
187 return (imm16 << 5) | rd;
188}
189
190static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
191{
192 assert(imm12 < 0x1000);
193 assert(rn < 32);
194 assert(rd < 32);
195 return (imm12 << 10) | (rn << 5) | rd;
196}
197
198static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
199{
200 assert(imm9 < 0x200);
201 assert(rn < 31);
202 assert(rd < 31);
203 return (imm9 << 12) | (rn << 5) | rd;
204}
205
206static u_int imm19_rt(u_int imm19, u_int rt)
207{
208 assert(imm19 < 0x80000);
209 assert(rt < 31);
210 return (imm19 << 5) | rt;
211}
212
213static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
214{
215 assert(n < 2);
216 assert(immr < 0x40);
217 assert(imms < 0x40);
218 assert(rn < 32);
219 assert(rd < 32);
220 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
221}
222
223static u_int genjmp(const u_char *addr)
224{
225 intptr_t offset = addr - out;
226 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
227 if (offset < -134217728 || offset > 134217727) {
228 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 abort();
230 return 0;
231 }
232 return ((u_int)offset >> 2) & 0x03ffffff;
233}
234
235static u_int genjmpcc(const u_char *addr)
236{
237 intptr_t offset = addr - out;
238 if ((uintptr_t)addr < 3) return 0;
239 if (offset < -1048576 || offset > 1048572) {
240 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 abort();
242 return 0;
243 }
244 return ((u_int)offset >> 2) & 0x7ffff;
245}
246
247static uint32_t is_mask(u_int value)
248{
249 return value && ((value + 1) & value) == 0;
250}
251
252// This function returns true if the argument contains a
253// non-empty sequence of ones (possibly rotated) with the remainder zero.
254static uint32_t is_rotated_mask(u_int value)
255{
256 if (value == 0 || value == ~0)
257 return 0;
258 if (is_mask((value - 1) | value))
259 return 1;
260 return is_mask((~value - 1) | ~value);
261}
262
263static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
264{
265 int lzeros, tzeros, ones;
266 assert(value != 0);
267 if (is_mask((value - 1) | value)) {
268 lzeros = __builtin_clz(value);
269 tzeros = __builtin_ctz(value);
270 ones = 32 - lzeros - tzeros;
271 *immr = (32 - tzeros) & 31;
272 *imms = ones - 1;
273 return;
274 }
275 value = ~value;
276 if (is_mask((value - 1) | value)) {
277 lzeros = __builtin_clz(value);
278 tzeros = __builtin_ctz(value);
279 ones = 32 - lzeros - tzeros;
280 *immr = lzeros;
281 *imms = 31 - ones;
282 return;
283 }
284 abort();
285}
286
287static void emit_mov(u_int rs, u_int rt)
288{
289 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
290 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
291}
292
293static void emit_mov64(u_int rs, u_int rt)
294{
295 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
296 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
297}
298
299static void emit_add(u_int rs1, u_int rs2, u_int rt)
300{
301 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
302 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
303}
304
305static void emit_add64(u_int rs1, u_int rs2, u_int rt)
306{
307 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
308 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
309}
310
311static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
312{
313 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
314 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
315}
316#define emit_adds_ptr emit_adds64
317
318static void emit_neg(u_int rs, u_int rt)
319{
320 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
321 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
322}
323
324static void emit_sub(u_int rs1, u_int rs2, u_int rt)
325{
326 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
327 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
328}
329
330static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
331{
332 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
333 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
334}
335
336static void emit_movz(u_int imm, u_int rt)
337{
338 assem_debug("movz %s,#%#x\n", regname[rt], imm);
339 output_w32(0x52800000 | imm16_rd(imm, rt));
340}
341
342static void emit_movz_lsl16(u_int imm, u_int rt)
343{
344 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
345 output_w32(0x52a00000 | imm16_rd(imm, rt));
346}
347
348static void emit_movn(u_int imm, u_int rt)
349{
350 assem_debug("movn %s,#%#x\n", regname[rt], imm);
351 output_w32(0x12800000 | imm16_rd(imm, rt));
352}
353
354static void emit_movn_lsl16(u_int imm,u_int rt)
355{
356 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
357 output_w32(0x12a00000 | imm16_rd(imm, rt));
358}
359
360static void emit_movk(u_int imm,u_int rt)
361{
362 assem_debug("movk %s,#%#x\n", regname[rt], imm);
363 output_w32(0x72800000 | imm16_rd(imm, rt));
364}
365
366static void emit_movk_lsl16(u_int imm,u_int rt)
367{
368 assert(imm<65536);
369 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
370 output_w32(0x72a00000 | imm16_rd(imm, rt));
371}
372
373static void emit_zeroreg(u_int rt)
374{
375 emit_movz(0, rt);
376}
377
378static void emit_movimm(u_int imm, u_int rt)
379{
380 if (imm < 65536)
381 emit_movz(imm, rt);
382 else if ((~imm) < 65536)
383 emit_movn(~imm, rt);
384 else if ((imm&0xffff) == 0)
385 emit_movz_lsl16(imm >> 16, rt);
386 else if (((~imm)&0xffff) == 0)
387 emit_movn_lsl16(~imm >> 16, rt);
388 else if (is_rotated_mask(imm)) {
389 u_int immr, imms;
390 gen_logical_imm(imm, &immr, &imms);
391 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
392 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
393 }
394 else {
395 emit_movz(imm & 0xffff, rt);
396 emit_movk_lsl16(imm >> 16, rt);
397 }
398}
399
400static void emit_movimm64(uint64_t imm, u_int rt)
401{
402 u_int shift, op, imm16, insns = 0;
403 for (shift = 0; shift < 4; shift++) {
404 imm16 = (imm >> shift * 16) & 0xffff;
405 if (!imm16)
406 continue;
407 op = insns ? 0xf2800000 : 0xd2800000;
408 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
409 if (shift)
410 assem_debug(",lsl #%u", shift * 16);
411 assem_debug("\n");
412 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
413 insns++;
414 }
415 if (!insns) {
416 assem_debug("movz %s,#0\n", regname64[rt]);
417 output_w32(0xd2800000 | imm16_rd(0, rt));
418 }
419}
420
421static void emit_readword(void *addr, u_int rt)
422{
423 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
424 if (!(offset & 3) && offset <= 16380) {
425 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
426 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
427 }
428 else
429 abort();
430}
431
432static void emit_readdword(void *addr, u_int rt)
433{
434 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
435 if (!(offset & 7) && offset <= 32760) {
436 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
437 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 }
439 else
440 abort();
441}
442#define emit_readptr emit_readdword
443
444static void emit_readshword(void *addr, u_int rt)
445{
446 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
447 if (!(offset & 1) && offset <= 8190) {
448 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
449 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
450 }
451 else
452 assert(0);
453}
454
455static void emit_loadreg(u_int r, u_int hr)
456{
457 int is64 = 0;
458 if (r == 0)
459 emit_zeroreg(hr);
460 else {
461 void *addr;
462 switch (r) {
463 //case HIREG: addr = &hi; break;
464 //case LOREG: addr = &lo; break;
465 case CCREG: addr = &cycle_count; break;
466 case CSREG: addr = &Status; break;
467 case INVCP: addr = &invc_ptr; is64 = 1; break;
468 case ROREG: addr = &ram_offset; is64 = 1; break;
469 default:
470 assert(r < 34);
471 addr = &psxRegs.GPR.r[r];
472 break;
473 }
474 if (is64)
475 emit_readdword(addr, hr);
476 else
477 emit_readword(addr, hr);
478 }
479}
480
481static void emit_writeword(u_int rt, void *addr)
482{
483 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
484 if (!(offset & 3) && offset <= 16380) {
485 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
486 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
487 }
488 else
489 assert(0);
490}
491
492static void emit_writedword(u_int rt, void *addr)
493{
494 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
495 if (!(offset & 7) && offset <= 32760) {
496 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
497 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
498 }
499 else
500 abort();
501}
502
503static void emit_storereg(u_int r, u_int hr)
504{
505 assert(r < 64);
506 void *addr = &psxRegs.GPR.r[r];
507 switch (r) {
508 //case HIREG: addr = &hi; break;
509 //case LOREG: addr = &lo; break;
510 case CCREG: addr = &cycle_count; break;
511 default: assert(r < 34); break;
512 }
513 emit_writeword(hr, addr);
514}
515
516static void emit_test(u_int rs, u_int rt)
517{
518 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
519 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
520}
521
522static void emit_testimm(u_int rs, u_int imm)
523{
524 u_int immr, imms;
525 assem_debug("tst %s,#%#x\n", regname[rs], imm);
526 assert(is_rotated_mask(imm)); // good enough for PCSX
527 gen_logical_imm(imm, &immr, &imms);
528 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
529}
530
531static void emit_not(u_int rs,u_int rt)
532{
533 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
534 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
535}
536
537static void emit_and(u_int rs1,u_int rs2,u_int rt)
538{
539 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
540 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
541}
542
543static void emit_or(u_int rs1,u_int rs2,u_int rt)
544{
545 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
546 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
547}
548
549static void emit_bic(u_int rs1,u_int rs2,u_int rt)
550{
551 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
552 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
553}
554
555static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
556{
557 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
558 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
559}
560
561static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
562{
563 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
564 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
565}
566
567static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
568{
569 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
570 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
571}
572
573static void emit_xor(u_int rs1,u_int rs2,u_int rt)
574{
575 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
576 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
577}
578
579static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
580{
581 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
582 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
583}
584
585static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
586{
587 unused const char *st = s ? "s" : "";
588 s = s ? 0x20000000 : 0;
589 is64 = is64 ? 0x80000000 : 0;
590 if (imm < 4096) {
591 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
592 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
593 }
594 else if (-imm < 4096) {
595 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
596 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
597 }
598 else if (imm < 16777216) {
599 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
600 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
603 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
604 }
605 }
606 else if (-imm < 16777216) {
607 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
608 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
609 if ((imm & 0xfff) || s) {
610 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
611 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
612 }
613 }
614 else
615 abort();
616}
617
618static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
619{
620 if (imm == 0) {
621 emit_mov(rs, rt);
622 return;
623 }
624 emit_addimm_s(0, 0, rs, imm, rt);
625}
626
627static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
628{
629 emit_addimm_s(0, 1, rs, imm, rt);
630}
631
632static void emit_addimm_and_set_flags(int imm, u_int rt)
633{
634 emit_addimm_s(1, 0, rt, imm, rt);
635}
636
637static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
638{
639 const char *names[] = { "and", "orr", "eor", "ands" };
640 const char *name = names[op];
641 u_int immr, imms;
642 op = op << 29;
643 if (is_rotated_mask(imm)) {
644 gen_logical_imm(imm, &immr, &imms);
645 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
646 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
647 }
648 else {
649 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
650 host_tempreg_acquire();
651 emit_movimm(imm, HOST_TEMPREG);
652 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
653 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_release();
656 }
657 (void)name;
658}
659
660static void emit_andimm(u_int rs, u_int imm, u_int rt)
661{
662 if (imm == 0)
663 emit_zeroreg(rt);
664 else
665 emit_logicop_imm(0, rs, imm, rt);
666}
667
668static void emit_orimm(u_int rs, u_int imm, u_int rt)
669{
670 if (imm == 0) {
671 if (rs != rt)
672 emit_mov(rs, rt);
673 }
674 else
675 emit_logicop_imm(1, rs, imm, rt);
676}
677
678static void emit_xorimm(u_int rs, u_int imm, u_int rt)
679{
680 if (imm == 0) {
681 if (rs != rt)
682 emit_mov(rs, rt);
683 }
684 else
685 emit_logicop_imm(2, rs, imm, rt);
686}
687
688static void emit_sbfm(u_int rs,u_int imm,u_int rt)
689{
690 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
692}
693
694static void emit_ubfm(u_int rs,u_int imm,u_int rt)
695{
696 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
698}
699
700static void emit_shlimm(u_int rs,u_int imm,u_int rt)
701{
702 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
704}
705
706static void emit_shrimm(u_int rs,u_int imm,u_int rt)
707{
708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
710}
711
712static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
713{
714 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
716}
717
718static void emit_sarimm(u_int rs,u_int imm,u_int rt)
719{
720 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
722}
723
724static void emit_rorimm(u_int rs,u_int imm,u_int rt)
725{
726 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
727 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
728}
729
730static void emit_signextend16(u_int rs, u_int rt)
731{
732 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
733 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
734}
735
736static void emit_shl(u_int rs,u_int rshift,u_int rt)
737{
738 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
740}
741
742static void emit_shr(u_int rs,u_int rshift,u_int rt)
743{
744 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
746}
747
748static void emit_sar(u_int rs,u_int rshift,u_int rt)
749{
750 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
751 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
752}
753
754static void emit_cmpimm(u_int rs, u_int imm)
755{
756 if (imm < 4096) {
757 assem_debug("cmp %s,%#x\n", regname[rs], imm);
758 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
759 }
760 else if (-imm < 4096) {
761 assem_debug("cmn %s,%#x\n", regname[rs], imm);
762 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
763 }
764 else if (imm < 16777216 && !(imm & 0xfff)) {
765 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
766 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
767 }
768 else {
769 host_tempreg_acquire();
770 emit_movimm(imm, HOST_TEMPREG);
771 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
772 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
773 host_tempreg_release();
774 }
775}
776
777static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
778{
779 assert(imm == 0 || imm == 1);
780 assert(cond0 < 0x10);
781 assert(cond1 < 0x10);
782 if (imm) {
783 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
784 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
785 } else {
786 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
787 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
788 }
789}
790
791static void emit_cmovne_imm(u_int imm,u_int rt)
792{
793 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
794}
795
796static void emit_cmovl_imm(u_int imm,u_int rt)
797{
798 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
799}
800
801static void emit_cmovb_imm(int imm,u_int rt)
802{
803 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
804}
805
806static void emit_cmoveq_reg(u_int rs,u_int rt)
807{
808 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
810}
811
812static void emit_cmovne_reg(u_int rs,u_int rt)
813{
814 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
816}
817
818static void emit_cmovl_reg(u_int rs,u_int rt)
819{
820 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
822}
823
824static void emit_cmovb_reg(u_int rs,u_int rt)
825{
826 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
828}
829
830static void emit_cmovs_reg(u_int rs,u_int rt)
831{
832 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
833 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
834}
835
836static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
837{
838 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
839 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
840}
841
842static void emit_slti32(u_int rs,int imm,u_int rt)
843{
844 if(rs!=rt) emit_zeroreg(rt);
845 emit_cmpimm(rs,imm);
846 if(rs==rt) emit_movimm(0,rt);
847 emit_cmovl_imm(1,rt);
848}
849
850static void emit_sltiu32(u_int rs,int imm,u_int rt)
851{
852 if(rs!=rt) emit_zeroreg(rt);
853 emit_cmpimm(rs,imm);
854 if(rs==rt) emit_movimm(0,rt);
855 emit_cmovb_imm(1,rt);
856}
857
858static void emit_cmp(u_int rs,u_int rt)
859{
860 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
861 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
862}
863
864static void emit_cmpcs(u_int rs,u_int rt)
865{
866 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
867 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
868}
869
870static void emit_set_gz32(u_int rs, u_int rt)
871{
872 //assem_debug("set_gz32\n");
873 emit_cmpimm(rs,1);
874 emit_movimm(1,rt);
875 emit_cmovl_imm(0,rt);
876}
877
878static void emit_set_nz32(u_int rs, u_int rt)
879{
880 //assem_debug("set_nz32\n");
881 if(rs!=rt) emit_mov(rs,rt);
882 emit_test(rs,rs);
883 emit_cmovne_imm(1,rt);
884}
885
886static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
887{
888 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
889 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
890 emit_cmp(rs1,rs2);
891 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
892 emit_cmovl_imm(1,rt);
893}
894
895static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
896{
897 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
898 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
899 emit_cmp(rs1,rs2);
900 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
901 emit_cmovb_imm(1,rt);
902}
903
904static int can_jump_or_call(const void *a)
905{
906 intptr_t diff = (u_char *)a - out;
907 return (-134217728 <= diff && diff <= 134217727);
908}
909
910static void emit_call(const void *a)
911{
912 intptr_t diff = (u_char *)a - out;
913 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
914 assert(!(diff & 3));
915 if (-134217728 <= diff && diff <= 134217727)
916 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
917 else
918 abort();
919}
920
921static void emit_jmp(const void *a)
922{
923 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
924 u_int offset = genjmp(a);
925 output_w32(0x14000000 | offset);
926}
927
928static void emit_jne(const void *a)
929{
930 assem_debug("bne %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_NE);
933}
934
935static void emit_jeq(const void *a)
936{
937 assem_debug("beq %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_EQ);
940}
941
942static void emit_js(const void *a)
943{
944 assem_debug("bmi %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_MI);
947}
948
949static void emit_jns(const void *a)
950{
951 assem_debug("bpl %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_PL);
954}
955
956static void emit_jl(const void *a)
957{
958 assem_debug("blt %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_LT);
961}
962
963static void emit_jge(const void *a)
964{
965 assem_debug("bge %p\n", a);
966 u_int offset = genjmpcc(a);
967 output_w32(0x54000000 | (offset << 5) | COND_GE);
968}
969
970static void emit_jno(const void *a)
971{
972 assem_debug("bvc %p\n", a);
973 u_int offset = genjmpcc(a);
974 output_w32(0x54000000 | (offset << 5) | COND_VC);
975}
976
977static void emit_jc(const void *a)
978{
979 assem_debug("bcs %p\n", a);
980 u_int offset = genjmpcc(a);
981 output_w32(0x54000000 | (offset << 5) | COND_CS);
982}
983
984static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
985{
986 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
987 u_int offset = genjmpcc(a);
988 is64 = is64 ? 0x80000000 : 0;
989 isnz = isnz ? 0x01000000 : 0;
990 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
991}
992
993static void *emit_cbz(u_int r, const void *a)
994{
995 void *ret = out;
996 emit_cb(0, 0, a, r);
997 return ret;
998}
999
1000static void emit_jmpreg(u_int r)
1001{
1002 assem_debug("br %s\n", regname64[r]);
1003 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1004}
1005
1006static void emit_retreg(u_int r)
1007{
1008 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1009 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1010}
1011
1012static void emit_ret(void)
1013{
1014 emit_retreg(LR);
1015}
1016
1017static void emit_adr(void *addr, u_int rt)
1018{
1019 intptr_t offset = (u_char *)addr - out;
1020 assert(-1048576 <= offset && offset < 1048576);
1021 assert(rt < 31);
1022 assem_debug("adr x%d,#%#lx\n", rt, offset);
1023 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1024}
1025
1026static void emit_adrp(void *addr, u_int rt)
1027{
1028 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1029 assert(-4294967296l <= offset && offset < 4294967296l);
1030 assert(rt < 31);
1031 offset >>= 12;
1032 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1033 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1034}
1035
1036static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1037{
1038 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1039 assert(-256 <= offset && offset < 256);
1040 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1041}
1042
1043static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1044{
1045 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1047}
1048
1049static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1050{
1051 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1052 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1053}
1054
1055static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1056{
1057 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1058 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1059}
1060
1061static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1062{
1063 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1064 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1065}
1066#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1067
1068static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1069{
1070 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1071 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1072}
1073
1074static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1075{
1076 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1077 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1078}
1079
1080static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1081{
1082 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1083 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1084}
1085
1086static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1087{
1088 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1089 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1090}
1091
1092static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1093{
1094 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1095 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1096}
1097
1098static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1099{
1100 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1103}
1104
1105static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1106{
1107 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1108 assert(-256 <= offset && offset < 256);
1109 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1110}
1111
1112static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1113{
1114 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1115 assert(-256 <= offset && offset < 256);
1116 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1117}
1118
1119static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1120{
1121 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1122 assert(-256 <= offset && offset < 256);
1123 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1124}
1125
1126static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1127{
1128 if (!(offset & 3) && (u_int)offset <= 16380) {
1129 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1130 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1131 }
1132 else if (-256 <= offset && offset < 256) {
1133 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1134 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1135 }
1136 else
1137 assert(0);
1138}
1139
1140static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1141{
1142 if (!(offset & 1) && (u_int)offset <= 8190) {
1143 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1145 }
1146 else if (-256 <= offset && offset < 256) {
1147 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1148 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1149 }
1150 else
1151 assert(0);
1152}
1153
1154static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1155{
1156 if ((u_int)offset < 4096) {
1157 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1158 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1159 }
1160 else if (-256 <= offset && offset < 256) {
1161 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1162 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1163 }
1164 else
1165 assert(0);
1166}
1167
1168static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1169{
1170 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1172}
1173
1174static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1175{
1176 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1178}
1179
1180static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1181{
1182 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1183 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1184}
1185
1186static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1187{
1188 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1190}
1191
1192static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1193{
1194 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1196}
1197
1198static void emit_clz(u_int rs, u_int rt)
1199{
1200 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1201 output_w32(0x5ac01000 | rn_rd(rs, rt));
1202}
1203
1204// special case for checking invalid_code
1205static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1206{
1207 emit_shrimm(r, 12, rt);
1208 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1209 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1210}
1211
1212// special for loadlr_assemble, rs2 is destroyed
1213static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1214{
1215 emit_shl(rs2, shift, rs2);
1216 emit_bic(rs1, rs2, rt);
1217}
1218
1219static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1220{
1221 emit_shr(rs2, shift, rs2);
1222 emit_bic(rs1, rs2, rt);
1223}
1224
1225static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1226{
1227 u_int op = 0xb9000000;
1228 unused const char *ldst = is_st ? "st" : "ld";
1229 unused char rp = is64 ? 'x' : 'w';
1230 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1231 is64 = is64 ? 1 : 0;
1232 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1233 ofs = (ofs >> (2+is64));
1234 if (!is_st) op |= 0x00400000;
1235 if (is64) op |= 0x40000000;
1236 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1237}
1238
1239static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1240{
1241 u_int op = 0x29000000;
1242 unused const char *ldst = is_st ? "st" : "ld";
1243 unused char rp = is64 ? 'x' : 'w';
1244 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1245 is64 = is64 ? 1 : 0;
1246 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1247 ofs = (ofs >> (2+is64));
1248 assert(-64 <= ofs && ofs <= 63);
1249 ofs &= 0x7f;
1250 if (!is_st) op |= 0x00400000;
1251 if (is64) op |= 0x80000000;
1252 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1253}
1254
1255static void save_load_regs_all(int is_store, u_int reglist)
1256{
1257 int ofs = 0, c = 0;
1258 u_int r, pair[2];
1259 for (r = 0; reglist; r++, reglist >>= 1) {
1260 if (reglist & 1)
1261 pair[c++] = r;
1262 if (c == 2) {
1263 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1264 ofs += 8 * 2;
1265 c = 0;
1266 }
1267 }
1268 if (c) {
1269 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1270 ofs += 8;
1271 }
1272 assert(ofs <= SSP_CALLER_REGS);
1273}
1274
1275// Save registers before function call
1276static void save_regs(u_int reglist)
1277{
1278 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1279 save_load_regs_all(1, reglist);
1280}
1281
1282// Restore registers after function call
1283static void restore_regs(u_int reglist)
1284{
1285 reglist &= CALLER_SAVE_REGS;
1286 save_load_regs_all(0, reglist);
1287}
1288
1289/* Stubs/epilogue */
1290
1291static void literal_pool(int n)
1292{
1293 (void)literals;
1294}
1295
1296static void literal_pool_jumpover(int n)
1297{
1298}
1299
1300// parsed by get_pointer, find_extjump_insn
1301static void emit_extjump(u_char *addr, u_int target)
1302{
1303 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1304
1305 emit_movz(target & 0xffff, 0);
1306 emit_movk_lsl16(target >> 16, 0);
1307
1308 // addr is in the current recompiled block (max 256k)
1309 // offset shouldn't exceed +/-1MB
1310 emit_adr(addr, 1);
1311 emit_far_jump(dyna_linker);
1312}
1313
1314static void check_extjump2(void *src)
1315{
1316 u_int *ptr = src;
1317 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1318 (void)ptr;
1319}
1320
1321// put rt_val into rt, potentially making use of rs with value rs_val
1322static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1323{
1324 int diff = rt_val - rs_val;
1325 if ((-4096 < diff && diff < 4096)
1326 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1327 emit_addimm(rs, diff, rt);
1328 else if (rt_val == ~rs_val)
1329 emit_not(rs, rt);
1330 else if (is_rotated_mask(rs_val ^ rt_val))
1331 emit_xorimm(rs, rs_val ^ rt_val, rt);
1332 else
1333 emit_movimm(rt_val, rt);
1334}
1335
1336// return 1 if the above function can do it's job cheaply
1337static int is_similar_value(u_int v1, u_int v2)
1338{
1339 int diff = v1 - v2;
1340 return (-4096 < diff && diff < 4096)
1341 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1342 || v1 == ~v2
1343 || is_rotated_mask(v1 ^ v2);
1344}
1345
1346static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1347{
1348 if (rt_val < 0x100000000ull) {
1349 emit_movimm_from(rs_val, rs, rt_val, rt);
1350 return;
1351 }
1352 // just move the whole thing. At least on Linux all addresses
1353 // seem to be 48bit, so 3 insns - not great not terrible
1354 emit_movimm64(rt_val, rt);
1355}
1356
1357// trashes x2
1358static void pass_args64(u_int a0, u_int a1)
1359{
1360 if(a0==1&&a1==0) {
1361 // must swap
1362 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1363 }
1364 else if(a0!=0&&a1==0) {
1365 emit_mov64(a1,1);
1366 if (a0>=0) emit_mov64(a0,0);
1367 }
1368 else {
1369 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1370 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1371 }
1372}
1373
1374static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1375{
1376 switch(type) {
1377 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1378 case LOADBU_STUB:
1379 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1380 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1381 case LOADHU_STUB:
1382 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1383 case LOADW_STUB:
1384 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1385 default: assert(0);
1386 }
1387}
1388
1389#include "pcsxmem.h"
1390//#include "pcsxmem_inline.c"
1391
1392static void do_readstub(int n)
1393{
1394 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1395 set_jump_target(stubs[n].addr, out);
1396 enum stub_type type = stubs[n].type;
1397 int i = stubs[n].a;
1398 int rs = stubs[n].b;
1399 const struct regstat *i_regs = (void *)stubs[n].c;
1400 u_int reglist = stubs[n].e;
1401 const signed char *i_regmap = i_regs->regmap;
1402 int rt;
1403 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1404 rt=get_reg(i_regmap,FTEMP);
1405 }else{
1406 rt=get_reg(i_regmap,dops[i].rt1);
1407 }
1408 assert(rs>=0);
1409 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1410 void *restore_jump = NULL, *handler_jump = NULL;
1411 reglist|=(1<<rs);
1412 for (r = 0; r < HOST_CCREG; r++) {
1413 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1414 temp = r;
1415 break;
1416 }
1417 }
1418 if(rt>=0&&dops[i].rt1!=0)
1419 reglist&=~(1<<rt);
1420 if(temp==-1) {
1421 save_regs(reglist);
1422 regs_saved=1;
1423 temp=(rs==0)?2:0;
1424 }
1425 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1426 temp2=1;
1427 emit_readdword(&mem_rtab,temp);
1428 emit_shrimm(rs,12,temp2);
1429 emit_readdword_dualindexedx8(temp,temp2,temp2);
1430 emit_adds64(temp2,temp2,temp2);
1431 handler_jump=out;
1432 emit_jc(0);
1433 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1434 switch(type) {
1435 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1436 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1437 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1438 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1439 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1440 default: assert(0);
1441 }
1442 }
1443 if(regs_saved) {
1444 restore_jump=out;
1445 emit_jmp(0); // jump to reg restore
1446 }
1447 else
1448 emit_jmp(stubs[n].retaddr); // return address
1449 set_jump_target(handler_jump, out);
1450
1451 if(!regs_saved)
1452 save_regs(reglist);
1453 void *handler=NULL;
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1460 assert(handler);
1461 pass_args64(rs,temp2);
1462 int cc=get_reg(i_regmap,CCREG);
1463 if(cc<0)
1464 emit_loadreg(CCREG,2);
1465 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1466 emit_far_call(handler);
1467 // (no cycle reload after read)
1468 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1469 loadstore_extend(type,0,rt);
1470 }
1471 if(restore_jump)
1472 set_jump_target(restore_jump, out);
1473 restore_regs(reglist);
1474 emit_jmp(stubs[n].retaddr);
1475}
1476
1477static void inline_readstub(enum stub_type type, int i, u_int addr,
1478 const signed char regmap[], int target, int adj, u_int reglist)
1479{
1480 int rs=get_reg(regmap,target);
1481 int rt=get_reg(regmap,target);
1482 if(rs<0) rs=get_reg_temp(regmap);
1483 assert(rs>=0);
1484 u_int is_dynamic=0;
1485 uintptr_t host_addr = 0;
1486 void *handler;
1487 int cc=get_reg(regmap,CCREG);
1488 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1489 // return;
1490 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1491 if (handler == NULL) {
1492 if(rt<0||dops[i].rt1==0)
1493 return;
1494 if (addr != host_addr)
1495 emit_movimm_from64(addr, rs, host_addr, rs);
1496 switch(type) {
1497 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1498 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1499 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1500 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1501 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1502 default: assert(0);
1503 }
1504 return;
1505 }
1506 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1507 if (is_dynamic) {
1508 if(type==LOADB_STUB||type==LOADBU_STUB)
1509 handler=jump_handler_read8;
1510 if(type==LOADH_STUB||type==LOADHU_STUB)
1511 handler=jump_handler_read16;
1512 if(type==LOADW_STUB)
1513 handler=jump_handler_read32;
1514 }
1515
1516 // call a memhandler
1517 if(rt>=0&&dops[i].rt1!=0)
1518 reglist&=~(1<<rt);
1519 save_regs(reglist);
1520 if(target==0)
1521 emit_movimm(addr,0);
1522 else if(rs!=0)
1523 emit_mov(rs,0);
1524 if(cc<0)
1525 emit_loadreg(CCREG,2);
1526 emit_addimm(cc<0?2:cc,adj,2);
1527 if(is_dynamic) {
1528 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1529 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1530 if (-4294967296l <= offset && offset < 4294967296l) {
1531 emit_adrp((void *)l1, 1);
1532 emit_addimm64(1, l1 & 0xfff, 1);
1533 }
1534 else
1535 emit_movimm64(l1, 1);
1536 }
1537 else
1538 emit_far_call(do_memhandler_pre);
1539
1540 emit_far_call(handler);
1541
1542 // (no cycle reload after read)
1543 if(rt>=0&&dops[i].rt1!=0)
1544 loadstore_extend(type, 0, rt);
1545 restore_regs(reglist);
1546}
1547
1548static void do_writestub(int n)
1549{
1550 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1551 set_jump_target(stubs[n].addr, out);
1552 enum stub_type type=stubs[n].type;
1553 int i=stubs[n].a;
1554 int rs=stubs[n].b;
1555 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1556 u_int reglist=stubs[n].e;
1557 signed char *i_regmap=i_regs->regmap;
1558 int rt,r;
1559 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1560 rt=get_reg(i_regmap,r=FTEMP);
1561 }else{
1562 rt=get_reg(i_regmap,r=dops[i].rs2);
1563 }
1564 assert(rs>=0);
1565 assert(rt>=0);
1566 int rtmp,temp=-1,temp2,regs_saved=0;
1567 void *restore_jump = NULL, *handler_jump = NULL;
1568 int reglist2=reglist|(1<<rs)|(1<<rt);
1569 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1570 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1571 temp = rtmp;
1572 break;
1573 }
1574 }
1575 if(temp==-1) {
1576 save_regs(reglist);
1577 regs_saved=1;
1578 for(rtmp=0;rtmp<=3;rtmp++)
1579 if(rtmp!=rs&&rtmp!=rt)
1580 {temp=rtmp;break;}
1581 }
1582 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1583 temp2=3;
1584 else {
1585 host_tempreg_acquire();
1586 temp2=HOST_TEMPREG;
1587 }
1588 emit_readdword(&mem_wtab,temp);
1589 emit_shrimm(rs,12,temp2);
1590 emit_readdword_dualindexedx8(temp,temp2,temp2);
1591 emit_adds64(temp2,temp2,temp2);
1592 handler_jump=out;
1593 emit_jc(0);
1594 switch(type) {
1595 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1596 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1597 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1598 default: assert(0);
1599 }
1600 if(regs_saved) {
1601 restore_jump=out;
1602 emit_jmp(0); // jump to reg restore
1603 }
1604 else
1605 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1606 set_jump_target(handler_jump, out);
1607
1608 if(!regs_saved)
1609 save_regs(reglist);
1610 void *handler=NULL;
1611 switch(type) {
1612 case STOREB_STUB: handler=jump_handler_write8; break;
1613 case STOREH_STUB: handler=jump_handler_write16; break;
1614 case STOREW_STUB: handler=jump_handler_write32; break;
1615 default: assert(0);
1616 }
1617 assert(handler);
1618 pass_args(rs,rt);
1619 if(temp2!=3) {
1620 emit_mov64(temp2,3);
1621 host_tempreg_release();
1622 }
1623 int cc=get_reg(i_regmap,CCREG);
1624 if(cc<0)
1625 emit_loadreg(CCREG,2);
1626 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1627 // returns new cycle_count
1628 emit_far_call(handler);
1629 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1630 if(cc<0)
1631 emit_storereg(CCREG,2);
1632 if(restore_jump)
1633 set_jump_target(restore_jump, out);
1634 restore_regs(reglist);
1635 emit_jmp(stubs[n].retaddr);
1636}
1637
1638static void inline_writestub(enum stub_type type, int i, u_int addr,
1639 const signed char regmap[], int target, int adj, u_int reglist)
1640{
1641 int rs = get_reg_temp(regmap);
1642 int rt = get_reg(regmap,target);
1643 assert(rs >= 0);
1644 assert(rt >= 0);
1645 uintptr_t host_addr = 0;
1646 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1647 if (handler == NULL) {
1648 if (addr != host_addr)
1649 emit_movimm_from64(addr, rs, host_addr, rs);
1650 switch (type) {
1651 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1652 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1653 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1654 default: assert(0);
1655 }
1656 return;
1657 }
1658
1659 // call a memhandler
1660 save_regs(reglist);
1661 emit_writeword(rs, &address); // some handlers still need it
1662 loadstore_extend(type, rt, 0);
1663 int cc, cc_use;
1664 cc = cc_use = get_reg(regmap, CCREG);
1665 if (cc < 0)
1666 emit_loadreg(CCREG, (cc_use = 2));
1667 emit_addimm(cc_use, adj, 2);
1668
1669 emit_far_call(do_memhandler_pre);
1670 emit_far_call(handler);
1671 emit_far_call(do_memhandler_post);
1672 emit_addimm(0, -adj, cc_use);
1673 if (cc < 0)
1674 emit_storereg(CCREG, cc_use);
1675 restore_regs(reglist);
1676}
1677
1678/* Special assem */
1679
1680static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1681{
1682 save_load_regs_all(1, reglist);
1683 cop2_do_stall_check(op, i, i_regs, 0);
1684#ifdef PCNT
1685 emit_movimm(op, 0);
1686 emit_far_call(pcnt_gte_start);
1687#endif
1688 // pointer to cop2 regs
1689 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1690}
1691
1692static void c2op_epilogue(u_int op,u_int reglist)
1693{
1694#ifdef PCNT
1695 emit_movimm(op, 0);
1696 emit_far_call(pcnt_gte_end);
1697#endif
1698 save_load_regs_all(0, reglist);
1699}
1700
1701static void c2op_assemble(int i, const struct regstat *i_regs)
1702{
1703 u_int c2op=source[i]&0x3f;
1704 u_int hr,reglist_full=0,reglist;
1705 int need_flags,need_ir;
1706 for(hr=0;hr<HOST_REGS;hr++) {
1707 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1708 }
1709 reglist=reglist_full&CALLER_SAVE_REGS;
1710
1711 if (gte_handlers[c2op]!=NULL) {
1712 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1713 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1714 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1715 source[i],gte_unneeded[i+1],need_flags,need_ir);
1716 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1717 need_flags=0;
1718 //int shift = (source[i] >> 19) & 1;
1719 //int lm = (source[i] >> 10) & 1;
1720 switch(c2op) {
1721 default:
1722 (void)need_ir;
1723 c2op_prologue(c2op, i, i_regs, reglist);
1724 emit_movimm(source[i],1); // opcode
1725 emit_writeword(1,&psxRegs.code);
1726 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1727 break;
1728 }
1729 c2op_epilogue(c2op,reglist);
1730 }
1731}
1732
1733static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1734{
1735 //value = value & 0x7ffff000;
1736 //if (value & 0x7f87e000) value |= 0x80000000;
1737 emit_andimm(sl, 0x7fffe000, temp);
1738 emit_testimm(temp, 0xff87ffff);
1739 emit_andimm(sl, 0x7ffff000, temp);
1740 host_tempreg_acquire();
1741 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1742 emit_cmovne_reg(HOST_TEMPREG, temp);
1743 host_tempreg_release();
1744 assert(0); // testing needed
1745}
1746
1747static void do_mfc2_31_one(u_int copr,signed char temp)
1748{
1749 emit_readshword(&reg_cop2d[copr],temp);
1750 emit_bicsar_imm(temp,31,temp);
1751 emit_cmpimm(temp,0xf80);
1752 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1753 emit_andimm(temp,0xf80,temp);
1754}
1755
1756static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1757{
1758 if (temp < 0) {
1759 host_tempreg_acquire();
1760 temp = HOST_TEMPREG;
1761 }
1762 do_mfc2_31_one(9,temp);
1763 emit_shrimm(temp,7,tl);
1764 do_mfc2_31_one(10,temp);
1765 emit_orrshr_imm(temp,2,tl);
1766 do_mfc2_31_one(11,temp);
1767 emit_orrshl_imm(temp,3,tl);
1768 emit_writeword(tl,&reg_cop2d[29]);
1769
1770 if (temp == HOST_TEMPREG)
1771 host_tempreg_release();
1772}
1773
1774static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1775{
1776 // case 0x18: MULT
1777 // case 0x19: MULTU
1778 // case 0x1A: DIV
1779 // case 0x1B: DIVU
1780 if(dops[i].rs1&&dops[i].rs2)
1781 {
1782 switch(dops[i].opcode2)
1783 {
1784 case 0x18: // MULT
1785 case 0x19: // MULTU
1786 {
1787 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1788 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1789 signed char hi=get_reg(i_regs->regmap,HIREG);
1790 signed char lo=get_reg(i_regs->regmap,LOREG);
1791 assert(m1>=0);
1792 assert(m2>=0);
1793 assert(hi>=0);
1794 assert(lo>=0);
1795
1796 if(dops[i].opcode2==0x18) // MULT
1797 emit_smull(m1,m2,hi);
1798 else // MULTU
1799 emit_umull(m1,m2,hi);
1800
1801 emit_mov(hi,lo);
1802 emit_shrimm64(hi,32,hi);
1803 break;
1804 }
1805 case 0x1A: // DIV
1806 case 0x1B: // DIVU
1807 {
1808 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1809 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1810 signed char quotient=get_reg(i_regs->regmap,LOREG);
1811 signed char remainder=get_reg(i_regs->regmap,HIREG);
1812 assert(numerator>=0);
1813 assert(denominator>=0);
1814 assert(quotient>=0);
1815 assert(remainder>=0);
1816
1817 if (dops[i].opcode2 == 0x1A) // DIV
1818 emit_sdiv(numerator,denominator,quotient);
1819 else // DIVU
1820 emit_udiv(numerator,denominator,quotient);
1821 emit_msub(quotient,denominator,numerator,remainder);
1822
1823 // div 0 quotient (remainder is already correct)
1824 host_tempreg_acquire();
1825 if (dops[i].opcode2 == 0x1A) // DIV
1826 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1827 else
1828 emit_movimm(~0,HOST_TEMPREG);
1829 emit_test(denominator,denominator);
1830 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1831 host_tempreg_release();
1832 break;
1833 }
1834 default:
1835 assert(0);
1836 }
1837 }
1838 else
1839 {
1840 signed char hr=get_reg(i_regs->regmap,HIREG);
1841 signed char lr=get_reg(i_regs->regmap,LOREG);
1842 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1843 {
1844 if (dops[i].rs1) {
1845 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1846 assert(numerator >= 0);
1847 if (hr >= 0)
1848 emit_mov(numerator,hr);
1849 if (lr >= 0) {
1850 if (dops[i].opcode2 == 0x1A) // DIV
1851 emit_sub_asrimm(0,numerator,31,lr);
1852 else
1853 emit_movimm(~0,lr);
1854 }
1855 }
1856 else {
1857 if (hr >= 0) emit_zeroreg(hr);
1858 if (lr >= 0) emit_movimm(~0,lr);
1859 }
1860 }
1861 else
1862 {
1863 // Multiply by zero is zero.
1864 if (hr >= 0) emit_zeroreg(hr);
1865 if (lr >= 0) emit_zeroreg(lr);
1866 }
1867 }
1868}
1869#define multdiv_assemble multdiv_assemble_arm64
1870
1871static void do_jump_vaddr(u_int rs)
1872{
1873 if (rs != 0)
1874 emit_mov(rs, 0);
1875 emit_far_call(ndrc_get_addr_ht);
1876 emit_jmpreg(0);
1877}
1878
1879static void do_preload_rhash(u_int r) {
1880 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1881 // register. On ARM the hash can be done with a single instruction (below)
1882}
1883
1884static void do_preload_rhtbl(u_int ht) {
1885 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1886}
1887
1888static void do_rhash(u_int rs,u_int rh) {
1889 emit_andimm(rs, 0xf8, rh);
1890}
1891
1892static void do_miniht_load(int ht, u_int rh) {
1893 emit_add64(ht, rh, ht);
1894 emit_ldst(0, 0, rh, ht, 0);
1895}
1896
1897static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1898 emit_cmp(rh, rs);
1899 void *jaddr = out;
1900 emit_jeq(0);
1901 do_jump_vaddr(rs);
1902
1903 set_jump_target(jaddr, out);
1904 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1905 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1906 emit_jmpreg(ht);
1907}
1908
1909// parsed by set_jump_target?
1910static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1911 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1912 emit_movk(return_address&0xffff,rt);
1913 add_to_linker(out,return_address,1);
1914 emit_adr(out,temp);
1915 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1916 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1917}
1918
1919static unused void clear_cache_arm64(char *start, char *end)
1920{
1921 // Don't rely on GCC's __clear_cache implementation, as it caches
1922 // icache/dcache cache line sizes, that can vary between cores on
1923 // big.LITTLE architectures.
1924 uint64_t addr, ctr_el0;
1925 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1926 size_t isize, dsize;
1927
1928 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1929 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1930 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1931
1932 // use the global minimum cache line size
1933 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1934 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1935
1936 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1937 not required for instruction to data coherence. */
1938 if ((ctr_el0 & (1 << 28)) == 0x0) {
1939 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1940 for (; addr < (uint64_t)end; addr += dsize)
1941 // use "civac" instead of "cvau", as this is the suggested workaround for
1942 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1943 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1944 }
1945 __asm__ volatile("dsb ish" : : : "memory");
1946
1947 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1948 Unification is not required for instruction to data coherence. */
1949 if ((ctr_el0 & (1 << 29)) == 0x0) {
1950 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1951 for (; addr < (uint64_t)end; addr += isize)
1952 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1953
1954 __asm__ volatile("dsb ish" : : : "memory");
1955 }
1956
1957 __asm__ volatile("isb" : : : "memory");
1958}
1959
1960// CPU-architecture-specific initialization
1961static void arch_init(void)
1962{
1963 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1964 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
1965 size_t i;
1966 assert(!(diff & 3));
1967 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1968 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1969 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1970 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1971 }
1972 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1973}
1974
1975// vim:shiftwidth=2:expandtab