try to fix win32 build
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26void do_memhandler_pre();
27void do_memhandler_post();
28
29/* Linker */
30static void set_jump_target(void *addr, void *target)
31{
32 u_int *ptr = NDRC_WRITE_OFFSET(addr);
33 intptr_t offset = (u_char *)target - (u_char *)addr;
34
35 if ((*ptr&0xFC000000) == 0x14000000) { // b
36 assert(offset>=-134217728LL&&offset<134217728LL);
37 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
38 }
39 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
40 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
41 // Conditional branch are limited to +/- 1MB
42 // block max size is 256k so branching beyond the +/- 1MB limit
43 // should only happen when jumping to an already compiled block (see add_jump_out)
44 // a workaround would be to do a trampoline jump via a stub at the end of the block
45 assert(-1048576 <= offset && offset < 1048576);
46 *ptr=(*ptr&0xFF00001F)|(((offset>>2)&0x7ffff)<<5);
47 }
48 else if((*ptr&0x9f000000)==0x10000000) { // adr
49 // generated by do_miniht_insert
50 assert(offset>=-1048576LL&&offset<1048576LL);
51 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
52 }
53 else
54 abort(); // should not happen
55}
56
57// from a pointer to external jump stub (which was produced by emit_extjump2)
58// find where the jumping insn is
59static void *find_extjump_insn(void *stub)
60{
61 int *ptr = (int *)stub + 2;
62 assert((*ptr&0x9f000000) == 0x10000000); // adr
63 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
64 return ptr + offset / 4;
65}
66
67#if 0
68// find where external branch is liked to using addr of it's stub:
69// get address that the stub loads (dyna_linker arg1),
70// treat it as a pointer to branch insn,
71// return addr where that branch jumps to
72static void *get_pointer(void *stub)
73{
74 int *i_ptr = find_extjump_insn(stub);
75 if ((*i_ptr&0xfc000000) == 0x14000000) // b
76 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
77 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
78 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
79 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
80 assert(0);
81 return NULL;
82}
83#endif
84
85// Allocate a specific ARM register.
86static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
87{
88 int n;
89 int dirty=0;
90
91 // see if it's already allocated (and dealloc it)
92 for(n=0;n<HOST_REGS;n++)
93 {
94 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
95 dirty=(cur->dirty>>n)&1;
96 cur->regmap[n]=-1;
97 }
98 }
99
100 cur->regmap[hr]=reg;
101 cur->dirty&=~(1<<hr);
102 cur->dirty|=dirty<<hr;
103 cur->isconst&=~(1<<hr);
104}
105
106// Alloc cycle count into dedicated register
107static void alloc_cc(struct regstat *cur,int i)
108{
109 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
110}
111
112/* Special alloc */
113
114
115/* Assembler */
116
117static unused const char *regname[32] = {
118 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
119 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
120 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
121 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
122};
123
124static unused const char *regname64[32] = {
125 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
126 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
127 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
128 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
129};
130
131enum {
132 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
133 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
134};
135
136static unused const char *condname[16] = {
137 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
138 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
139};
140
141static void output_w32(u_int word)
142{
143 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
144 out += 4;
145}
146
147static u_int rn_rd(u_int rn, u_int rd)
148{
149 assert(rn < 31);
150 assert(rd < 31);
151 return (rn << 5) | rd;
152}
153
154static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
155{
156 assert(rm < 32);
157 assert(rn < 32);
158 assert(rd < 32);
159 return (rm << 16) | (rn << 5) | rd;
160}
161
162static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
163{
164 assert(ra < 32);
165 return rm_rn_rd(rm, rn, rd) | (ra << 10);
166}
167
168static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
169{
170 assert(imm7 < 0x80);
171 assert(rt2 < 31);
172 assert(rn < 32);
173 assert(rt < 31);
174 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
175}
176
177static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
178{
179 assert(imm6 <= 63);
180 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
181}
182
183static u_int imm16_rd(u_int imm16, u_int rd)
184{
185 assert(imm16 < 0x10000);
186 assert(rd < 31);
187 return (imm16 << 5) | rd;
188}
189
190static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
191{
192 assert(imm12 < 0x1000);
193 assert(rn < 32);
194 assert(rd < 32);
195 return (imm12 << 10) | (rn << 5) | rd;
196}
197
198static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
199{
200 assert(imm9 < 0x200);
201 assert(rn < 31);
202 assert(rd < 31);
203 return (imm9 << 12) | (rn << 5) | rd;
204}
205
206static u_int imm19_rt(u_int imm19, u_int rt)
207{
208 assert(imm19 < 0x80000);
209 assert(rt < 31);
210 return (imm19 << 5) | rt;
211}
212
213static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
214{
215 assert(n < 2);
216 assert(immr < 0x40);
217 assert(imms < 0x40);
218 assert(rn < 32);
219 assert(rd < 32);
220 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
221}
222
223static u_int genjmp(const u_char *addr)
224{
225 intptr_t offset = addr - out;
226 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
227 if (offset < -134217728 || offset > 134217727) {
228 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
229 abort();
230 return 0;
231 }
232 return ((u_int)offset >> 2) & 0x03ffffff;
233}
234
235static u_int genjmpcc(const u_char *addr)
236{
237 intptr_t offset = addr - out;
238 if ((uintptr_t)addr < 3) return 0;
239 if (offset < -1048576 || offset > 1048572) {
240 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
241 abort();
242 return 0;
243 }
244 return ((u_int)offset >> 2) & 0x7ffff;
245}
246
247static uint32_t is_mask(u_int value)
248{
249 return value && ((value + 1) & value) == 0;
250}
251
252// This function returns true if the argument contains a
253// non-empty sequence of ones (possibly rotated) with the remainder zero.
254static uint32_t is_rotated_mask(u_int value)
255{
256 if (value == 0 || value == ~0)
257 return 0;
258 if (is_mask((value - 1) | value))
259 return 1;
260 return is_mask((~value - 1) | ~value);
261}
262
263static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
264{
265 int lzeros, tzeros, ones;
266 assert(value != 0);
267 if (is_mask((value - 1) | value)) {
268 lzeros = __builtin_clz(value);
269 tzeros = __builtin_ctz(value);
270 ones = 32 - lzeros - tzeros;
271 *immr = (32 - tzeros) & 31;
272 *imms = ones - 1;
273 return;
274 }
275 value = ~value;
276 if (is_mask((value - 1) | value)) {
277 lzeros = __builtin_clz(value);
278 tzeros = __builtin_ctz(value);
279 ones = 32 - lzeros - tzeros;
280 *immr = lzeros;
281 *imms = 31 - ones;
282 return;
283 }
284 abort();
285}
286
287static void emit_mov(u_int rs, u_int rt)
288{
289 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
290 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
291}
292
293static void emit_mov64(u_int rs, u_int rt)
294{
295 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
296 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
297}
298
299static void emit_add(u_int rs1, u_int rs2, u_int rt)
300{
301 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
302 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
303}
304
305static void emit_add64(u_int rs1, u_int rs2, u_int rt)
306{
307 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
308 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
309}
310
311static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
312{
313 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
314 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
315}
316#define emit_adds_ptr emit_adds64
317
318static void emit_neg(u_int rs, u_int rt)
319{
320 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
321 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
322}
323
324static void emit_sub(u_int rs1, u_int rs2, u_int rt)
325{
326 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
327 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
328}
329
330static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
331{
332 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
333 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
334}
335
336static void emit_movz(u_int imm, u_int rt)
337{
338 assem_debug("movz %s,#%#x\n", regname[rt], imm);
339 output_w32(0x52800000 | imm16_rd(imm, rt));
340}
341
342static void emit_movz_lsl16(u_int imm, u_int rt)
343{
344 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
345 output_w32(0x52a00000 | imm16_rd(imm, rt));
346}
347
348static void emit_movn(u_int imm, u_int rt)
349{
350 assem_debug("movn %s,#%#x\n", regname[rt], imm);
351 output_w32(0x12800000 | imm16_rd(imm, rt));
352}
353
354static void emit_movn_lsl16(u_int imm,u_int rt)
355{
356 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
357 output_w32(0x12a00000 | imm16_rd(imm, rt));
358}
359
360static void emit_movk(u_int imm,u_int rt)
361{
362 assem_debug("movk %s,#%#x\n", regname[rt], imm);
363 output_w32(0x72800000 | imm16_rd(imm, rt));
364}
365
366static void emit_movk_lsl16(u_int imm,u_int rt)
367{
368 assert(imm<65536);
369 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
370 output_w32(0x72a00000 | imm16_rd(imm, rt));
371}
372
373static void emit_zeroreg(u_int rt)
374{
375 emit_movz(0, rt);
376}
377
378static void emit_movimm(u_int imm, u_int rt)
379{
380 if (imm < 65536)
381 emit_movz(imm, rt);
382 else if ((~imm) < 65536)
383 emit_movn(~imm, rt);
384 else if ((imm&0xffff) == 0)
385 emit_movz_lsl16(imm >> 16, rt);
386 else if (((~imm)&0xffff) == 0)
387 emit_movn_lsl16(~imm >> 16, rt);
388 else if (is_rotated_mask(imm)) {
389 u_int immr, imms;
390 gen_logical_imm(imm, &immr, &imms);
391 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
392 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
393 }
394 else {
395 emit_movz(imm & 0xffff, rt);
396 emit_movk_lsl16(imm >> 16, rt);
397 }
398}
399
400static void emit_movimm64(uint64_t imm, u_int rt)
401{
402 u_int shift, op, imm16, insns = 0;
403 for (shift = 0; shift < 4; shift++) {
404 imm16 = (imm >> shift * 16) & 0xffff;
405 if (!imm16)
406 continue;
407 op = insns ? 0xf2800000 : 0xd2800000;
408 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
409 if (shift)
410 assem_debug(",lsl #%u", shift * 16);
411 assem_debug("\n");
412 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
413 insns++;
414 }
415 if (!insns) {
416 assem_debug("movz %s,#0\n", regname64[rt]);
417 output_w32(0xd2800000 | imm16_rd(0, rt));
418 }
419}
420
421static void emit_readword(void *addr, u_int rt)
422{
423 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
424 if (!(offset & 3) && offset <= 16380) {
425 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
426 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
427 }
428 else
429 abort();
430}
431
432static void emit_readdword(void *addr, u_int rt)
433{
434 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
435 if (!(offset & 7) && offset <= 32760) {
436 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
437 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
438 }
439 else
440 abort();
441}
442#define emit_readptr emit_readdword
443
444static void emit_readshword(void *addr, u_int rt)
445{
446 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
447 if (!(offset & 1) && offset <= 8190) {
448 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
449 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
450 }
451 else
452 assert(0);
453}
454
455static void emit_loadreg(u_int r, u_int hr)
456{
457 int is64 = 0;
458 if (r == 0)
459 emit_zeroreg(hr);
460 else {
461 void *addr;
462 switch (r) {
463 //case HIREG: addr = &hi; break;
464 //case LOREG: addr = &lo; break;
465 case CCREG: addr = &cycle_count; break;
466 case CSREG: addr = &psxRegs.CP0.n.SR; break;
467 case INVCP: addr = &invc_ptr; is64 = 1; break;
468 case ROREG: addr = &ram_offset; is64 = 1; break;
469 default:
470 assert(r < 34);
471 addr = &psxRegs.GPR.r[r];
472 break;
473 }
474 if (is64)
475 emit_readdword(addr, hr);
476 else
477 emit_readword(addr, hr);
478 }
479}
480
481static void emit_writeword(u_int rt, void *addr)
482{
483 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
484 if (!(offset & 3) && offset <= 16380) {
485 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
486 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
487 }
488 else
489 assert(0);
490}
491
492static void emit_writedword(u_int rt, void *addr)
493{
494 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
495 if (!(offset & 7) && offset <= 32760) {
496 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
497 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
498 }
499 else
500 abort();
501}
502
503static void emit_storereg(u_int r, u_int hr)
504{
505 assert(r < 64);
506 void *addr = &psxRegs.GPR.r[r];
507 switch (r) {
508 //case HIREG: addr = &hi; break;
509 //case LOREG: addr = &lo; break;
510 case CCREG: addr = &cycle_count; break;
511 default: assert(r < 34); break;
512 }
513 emit_writeword(hr, addr);
514}
515
516static void emit_test(u_int rs, u_int rt)
517{
518 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
519 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
520}
521
522static void emit_testimm(u_int rs, u_int imm)
523{
524 u_int immr, imms;
525 assem_debug("tst %s,#%#x\n", regname[rs], imm);
526 assert(is_rotated_mask(imm)); // good enough for PCSX
527 gen_logical_imm(imm, &immr, &imms);
528 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
529}
530
531static void emit_not(u_int rs,u_int rt)
532{
533 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
534 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
535}
536
537static void emit_and(u_int rs1,u_int rs2,u_int rt)
538{
539 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
540 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
541}
542
543static void emit_or(u_int rs1,u_int rs2,u_int rt)
544{
545 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
546 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
547}
548
549static void emit_bic(u_int rs1,u_int rs2,u_int rt)
550{
551 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
552 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
553}
554
555static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
556{
557 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
558 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
559}
560
561static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
562{
563 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
564 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
565}
566
567static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
568{
569 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
570 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
571}
572
573static void emit_xor(u_int rs1,u_int rs2,u_int rt)
574{
575 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
576 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
577}
578
579static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
580{
581 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
582 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
583}
584
585static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
586{
587 unused const char *st = s ? "s" : "";
588 s = s ? 0x20000000 : 0;
589 is64 = is64 ? 0x80000000 : 0;
590 if (imm < 4096) {
591 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
592 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
593 }
594 else if (-imm < 4096) {
595 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
596 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
597 }
598 else if (imm < 16777216) {
599 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
600 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
601 if ((imm & 0xfff) || s) {
602 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
603 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
604 }
605 }
606 else if (-imm < 16777216) {
607 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
608 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
609 if ((imm & 0xfff) || s) {
610 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
611 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
612 }
613 }
614 else
615 abort();
616}
617
618static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
619{
620 if (imm == 0) {
621 emit_mov(rs, rt);
622 return;
623 }
624 emit_addimm_s(0, 0, rs, imm, rt);
625}
626
627static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
628{
629 emit_addimm_s(0, 1, rs, imm, rt);
630}
631
632static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
633{
634 emit_addimm64(rs, imm, rt);
635}
636
637static void emit_addimm_and_set_flags(int imm, u_int rt)
638{
639 emit_addimm_s(1, 0, rt, imm, rt);
640}
641
642static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
643{
644 const char *names[] = { "and", "orr", "eor", "ands" };
645 const char *name = names[op];
646 u_int immr, imms;
647 op = op << 29;
648 if (is_rotated_mask(imm)) {
649 gen_logical_imm(imm, &immr, &imms);
650 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
651 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
652 }
653 else {
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_acquire();
656 emit_movimm(imm, HOST_TEMPREG);
657 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
658 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
659 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
660 host_tempreg_release();
661 }
662 (void)name;
663}
664
665static void emit_andimm(u_int rs, u_int imm, u_int rt)
666{
667 if (imm == 0)
668 emit_zeroreg(rt);
669 else
670 emit_logicop_imm(0, rs, imm, rt);
671}
672
673static void emit_orimm(u_int rs, u_int imm, u_int rt)
674{
675 if (imm == 0) {
676 if (rs != rt)
677 emit_mov(rs, rt);
678 }
679 else
680 emit_logicop_imm(1, rs, imm, rt);
681}
682
683static void emit_xorimm(u_int rs, u_int imm, u_int rt)
684{
685 if (imm == 0) {
686 if (rs != rt)
687 emit_mov(rs, rt);
688 }
689 else
690 emit_logicop_imm(2, rs, imm, rt);
691}
692
693static void emit_sbfm(u_int rs,u_int imm,u_int rt)
694{
695 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
696 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
697}
698
699static void emit_ubfm(u_int rs,u_int imm,u_int rt)
700{
701 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
702 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
703}
704
705static void emit_shlimm(u_int rs,u_int imm,u_int rt)
706{
707 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
708 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
709}
710
711static void emit_shrimm(u_int rs,u_int imm,u_int rt)
712{
713 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
714 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
715}
716
717static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
718{
719 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
720 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
721}
722
723static void emit_sarimm(u_int rs,u_int imm,u_int rt)
724{
725 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
726 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
727}
728
729static void emit_rorimm(u_int rs,u_int imm,u_int rt)
730{
731 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
732 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
733}
734
735static void emit_signextend16(u_int rs, u_int rt)
736{
737 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
738 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
739}
740
741static void emit_shl(u_int rs,u_int rshift,u_int rt)
742{
743 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
744 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
745}
746
747static void emit_shr(u_int rs,u_int rshift,u_int rt)
748{
749 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
750 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
751}
752
753static void emit_sar(u_int rs,u_int rshift,u_int rt)
754{
755 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
756 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
757}
758
759static void emit_cmpimm(u_int rs, u_int imm)
760{
761 if (imm < 4096) {
762 assem_debug("cmp %s,%#x\n", regname[rs], imm);
763 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
764 }
765 else if (-imm < 4096) {
766 assem_debug("cmn %s,%#x\n", regname[rs], imm);
767 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
768 }
769 else if (imm < 16777216 && !(imm & 0xfff)) {
770 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
771 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
772 }
773 else {
774 host_tempreg_acquire();
775 emit_movimm(imm, HOST_TEMPREG);
776 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
777 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
778 host_tempreg_release();
779 }
780}
781
782static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
783{
784 assert(imm == 0 || imm == 1);
785 assert(cond0 < 0x10);
786 assert(cond1 < 0x10);
787 if (imm) {
788 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
789 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
790 } else {
791 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
792 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
793 }
794}
795
796static void emit_cmovne_imm(u_int imm,u_int rt)
797{
798 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
799}
800
801static void emit_cmovl_imm(u_int imm,u_int rt)
802{
803 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
804}
805
806static void emit_cmovb_imm(int imm,u_int rt)
807{
808 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
809}
810
811static void emit_cmoveq_reg(u_int rs,u_int rt)
812{
813 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
814 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
815}
816
817static void emit_cmovne_reg(u_int rs,u_int rt)
818{
819 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
820 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
821}
822
823static void emit_cmovl_reg(u_int rs,u_int rt)
824{
825 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
826 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
827}
828
829static void emit_cmovb_reg(u_int rs,u_int rt)
830{
831 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
832 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
833}
834
835static void emit_cmovs_reg(u_int rs,u_int rt)
836{
837 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
838 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
839}
840
841static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
842{
843 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
844 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
845}
846
847static void emit_slti32(u_int rs,int imm,u_int rt)
848{
849 if(rs!=rt) emit_zeroreg(rt);
850 emit_cmpimm(rs,imm);
851 if(rs==rt) emit_movimm(0,rt);
852 emit_cmovl_imm(1,rt);
853}
854
855static void emit_sltiu32(u_int rs,int imm,u_int rt)
856{
857 if(rs!=rt) emit_zeroreg(rt);
858 emit_cmpimm(rs,imm);
859 if(rs==rt) emit_movimm(0,rt);
860 emit_cmovb_imm(1,rt);
861}
862
863static void emit_cmp(u_int rs,u_int rt)
864{
865 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
866 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
867}
868
869static void emit_cmpcs(u_int rs,u_int rt)
870{
871 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
872 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
873}
874
875static void emit_set_gz32(u_int rs, u_int rt)
876{
877 //assem_debug("set_gz32\n");
878 emit_cmpimm(rs,1);
879 emit_movimm(1,rt);
880 emit_cmovl_imm(0,rt);
881}
882
883static void emit_set_nz32(u_int rs, u_int rt)
884{
885 //assem_debug("set_nz32\n");
886 if(rs!=rt) emit_mov(rs,rt);
887 emit_test(rs,rs);
888 emit_cmovne_imm(1,rt);
889}
890
891static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
892{
893 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
894 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
895 emit_cmp(rs1,rs2);
896 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
897 emit_cmovl_imm(1,rt);
898}
899
900static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
901{
902 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
903 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
904 emit_cmp(rs1,rs2);
905 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
906 emit_cmovb_imm(1,rt);
907}
908
909static int can_jump_or_call(const void *a)
910{
911 intptr_t diff = (u_char *)a - out;
912 return (-134217728 <= diff && diff <= 134217727);
913}
914
915static void emit_call(const void *a)
916{
917 intptr_t diff = (u_char *)a - out;
918 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
919 assert(!(diff & 3));
920 if (-134217728 <= diff && diff <= 134217727)
921 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
922 else
923 abort();
924}
925
926static void emit_jmp(const void *a)
927{
928 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
929 u_int offset = genjmp(a);
930 output_w32(0x14000000 | offset);
931}
932
933static void emit_jne(const void *a)
934{
935 assem_debug("bne %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_NE);
938}
939
940static void emit_jeq(const void *a)
941{
942 assem_debug("beq %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_EQ);
945}
946
947static void emit_js(const void *a)
948{
949 assem_debug("bmi %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_MI);
952}
953
954static void emit_jns(const void *a)
955{
956 assem_debug("bpl %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_PL);
959}
960
961static void emit_jl(const void *a)
962{
963 assem_debug("blt %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_LT);
966}
967
968static void emit_jge(const void *a)
969{
970 assem_debug("bge %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_GE);
973}
974
975static void emit_jno(const void *a)
976{
977 assem_debug("bvc %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_VC);
980}
981
982static void emit_jc(const void *a)
983{
984 assem_debug("bcs %p\n", a);
985 u_int offset = genjmpcc(a);
986 output_w32(0x54000000 | (offset << 5) | COND_CS);
987}
988
989static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
990{
991 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
992 u_int offset = genjmpcc(a);
993 is64 = is64 ? 0x80000000 : 0;
994 isnz = isnz ? 0x01000000 : 0;
995 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
996}
997
998static void *emit_cbz(u_int r, const void *a)
999{
1000 void *ret = out;
1001 emit_cb(0, 0, a, r);
1002 return ret;
1003}
1004
1005static void emit_jmpreg(u_int r)
1006{
1007 assem_debug("br %s\n", regname64[r]);
1008 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1009}
1010
1011static void emit_retreg(u_int r)
1012{
1013 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1014 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1015}
1016
1017static void emit_ret(void)
1018{
1019 emit_retreg(LR);
1020}
1021
1022static void emit_adr(void *addr, u_int rt)
1023{
1024 intptr_t offset = (u_char *)addr - out;
1025 assert(-1048576 <= offset && offset < 1048576);
1026 assert(rt < 31);
1027 assem_debug("adr x%d,#%#lx\n", rt, offset);
1028 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1029}
1030
1031static void emit_adrp(void *addr, u_int rt)
1032{
1033 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1034 assert(-4294967296l <= offset && offset < 4294967296l);
1035 assert(rt < 31);
1036 offset >>= 12;
1037 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1038 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1039}
1040
1041static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1042{
1043 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1044 assert(-256 <= offset && offset < 256);
1045 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1046}
1047
1048static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1049{
1050 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1051 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1052}
1053
1054static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1055{
1056 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1058}
1059
1060static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1061{
1062 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1064}
1065
1066static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1067{
1068 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1070}
1071#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1072
1073static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1074{
1075 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1076 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1077}
1078
1079static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1080{
1081 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1082 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1083}
1084
1085static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1086{
1087 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1088 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1089}
1090
1091static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1092{
1093 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1094 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1095}
1096
1097static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1098{
1099 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1100 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1101}
1102
1103static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1104{
1105 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1106 assert(-256 <= offset && offset < 256);
1107 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1108}
1109
1110static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1111{
1112 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1113 assert(-256 <= offset && offset < 256);
1114 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1115}
1116
1117static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1118{
1119 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1120 assert(-256 <= offset && offset < 256);
1121 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1122}
1123
1124static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1125{
1126 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1127 assert(-256 <= offset && offset < 256);
1128 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1129}
1130
1131static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1132{
1133 if (!(offset & 3) && (u_int)offset <= 16380) {
1134 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1135 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1136 }
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1139 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 }
1141 else
1142 assert(0);
1143}
1144
1145static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1146{
1147 if (!(offset & 1) && (u_int)offset <= 8190) {
1148 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1149 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1150 }
1151 else if (-256 <= offset && offset < 256) {
1152 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1153 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1154 }
1155 else
1156 assert(0);
1157}
1158
1159static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1160{
1161 if ((u_int)offset < 4096) {
1162 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1163 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1164 }
1165 else if (-256 <= offset && offset < 256) {
1166 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1167 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1168 }
1169 else
1170 assert(0);
1171}
1172
1173static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1174{
1175 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1176 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1177}
1178
1179static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1180{
1181 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1182 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1183}
1184
1185static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1186{
1187 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1188 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1189}
1190
1191static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1192{
1193 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1194 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1195}
1196
1197static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1198{
1199 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1200 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1201}
1202
1203static void emit_clz(u_int rs, u_int rt)
1204{
1205 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1206 output_w32(0x5ac01000 | rn_rd(rs, rt));
1207}
1208
1209// special case for checking invalid_code
1210static void emit_ldrb_indexedsr12_reg(u_int rbase, u_int r, u_int rt)
1211{
1212 emit_shrimm(r, 12, rt);
1213 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[rt],regname64[rbase],regname[rt]);
1214 output_w32(0x38604800 | rm_rn_rd(rt, rbase, rt));
1215}
1216
1217// special for loadlr_assemble, rs2 is destroyed
1218static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1219{
1220 emit_shl(rs2, shift, rs2);
1221 emit_bic(rs1, rs2, rt);
1222}
1223
1224static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1225{
1226 emit_shr(rs2, shift, rs2);
1227 emit_bic(rs1, rs2, rt);
1228}
1229
1230static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1231{
1232 u_int op = 0xb9000000;
1233 unused const char *ldst = is_st ? "st" : "ld";
1234 unused char rp = is64 ? 'x' : 'w';
1235 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1236 is64 = is64 ? 1 : 0;
1237 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1238 ofs = (ofs >> (2+is64));
1239 if (!is_st) op |= 0x00400000;
1240 if (is64) op |= 0x40000000;
1241 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1242}
1243
1244static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1245{
1246 u_int op = 0x29000000;
1247 unused const char *ldst = is_st ? "st" : "ld";
1248 unused char rp = is64 ? 'x' : 'w';
1249 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1250 is64 = is64 ? 1 : 0;
1251 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1252 ofs = (ofs >> (2+is64));
1253 assert(-64 <= ofs && ofs <= 63);
1254 ofs &= 0x7f;
1255 if (!is_st) op |= 0x00400000;
1256 if (is64) op |= 0x80000000;
1257 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1258}
1259
1260static void save_load_regs_all(int is_store, u_int reglist)
1261{
1262 int ofs = 0, c = 0;
1263 u_int r, pair[2];
1264 for (r = 0; reglist; r++, reglist >>= 1) {
1265 if (reglist & 1)
1266 pair[c++] = r;
1267 if (c == 2) {
1268 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1269 ofs += 8 * 2;
1270 c = 0;
1271 }
1272 }
1273 if (c) {
1274 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1275 ofs += 8;
1276 }
1277 assert(ofs <= SSP_CALLER_REGS);
1278}
1279
1280// Save registers before function call
1281static void save_regs(u_int reglist)
1282{
1283 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1284 save_load_regs_all(1, reglist);
1285}
1286
1287// Restore registers after function call
1288static void restore_regs(u_int reglist)
1289{
1290 reglist &= CALLER_SAVE_REGS;
1291 save_load_regs_all(0, reglist);
1292}
1293
1294/* Stubs/epilogue */
1295
1296static void literal_pool(int n)
1297{
1298 (void)literals;
1299}
1300
1301static void literal_pool_jumpover(int n)
1302{
1303}
1304
1305// parsed by get_pointer, find_extjump_insn
1306static void emit_extjump(u_char *addr, u_int target)
1307{
1308 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1309
1310 emit_movz(target & 0xffff, 0);
1311 emit_movk_lsl16(target >> 16, 0);
1312
1313 // addr is in the current recompiled block (max 256k)
1314 // offset shouldn't exceed +/-1MB
1315 emit_adr(addr, 1);
1316 emit_far_jump(dyna_linker);
1317}
1318
1319static void check_extjump2(void *src)
1320{
1321 u_int *ptr = src;
1322 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1323 (void)ptr;
1324}
1325
1326// put rt_val into rt, potentially making use of rs with value rs_val
1327static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1328{
1329 int diff = rt_val - rs_val;
1330 if ((-4096 < diff && diff < 4096)
1331 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1332 emit_addimm(rs, diff, rt);
1333 else if (rt_val == ~rs_val)
1334 emit_not(rs, rt);
1335 else if (is_rotated_mask(rs_val ^ rt_val))
1336 emit_xorimm(rs, rs_val ^ rt_val, rt);
1337 else
1338 emit_movimm(rt_val, rt);
1339}
1340
1341// return 1 if the above function can do it's job cheaply
1342static int is_similar_value(u_int v1, u_int v2)
1343{
1344 int diff = v1 - v2;
1345 return (-4096 < diff && diff < 4096)
1346 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1347 || v1 == ~v2
1348 || is_rotated_mask(v1 ^ v2);
1349}
1350
1351static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1352{
1353 if (rt_val < 0x100000000ull) {
1354 emit_movimm_from(rs_val, rs, rt_val, rt);
1355 return;
1356 }
1357 // just move the whole thing. At least on Linux all addresses
1358 // seem to be 48bit, so 3 insns - not great not terrible
1359 emit_movimm64(rt_val, rt);
1360}
1361
1362// trashes x2
1363static void pass_args64(u_int a0, u_int a1)
1364{
1365 if(a0==1&&a1==0) {
1366 // must swap
1367 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1368 }
1369 else if(a0!=0&&a1==0) {
1370 emit_mov64(a1,1);
1371 if (a0>=0) emit_mov64(a0,0);
1372 }
1373 else {
1374 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1375 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1376 }
1377}
1378
1379static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1380{
1381 switch(type) {
1382 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1383 case LOADBU_STUB:
1384 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1385 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1386 case LOADHU_STUB:
1387 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1388 case LOADW_STUB:
1389 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1390 default: assert(0);
1391 }
1392}
1393
1394#include "pcsxmem.h"
1395//#include "pcsxmem_inline.c"
1396
1397static void do_readstub(int n)
1398{
1399 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1400 set_jump_target(stubs[n].addr, out);
1401 enum stub_type type = stubs[n].type;
1402 int i = stubs[n].a;
1403 int rs = stubs[n].b;
1404 const struct regstat *i_regs = (void *)stubs[n].c;
1405 u_int reglist = stubs[n].e;
1406 const signed char *i_regmap = i_regs->regmap;
1407 int rt;
1408 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1409 rt=get_reg(i_regmap,FTEMP);
1410 }else{
1411 rt=get_reg(i_regmap,dops[i].rt1);
1412 }
1413 assert(rs>=0);
1414 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1415 void *restore_jump = NULL, *handler_jump = NULL;
1416 reglist|=(1<<rs);
1417 for (r = 0; r < HOST_CCREG; r++) {
1418 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1419 temp = r;
1420 break;
1421 }
1422 }
1423 if(rt>=0&&dops[i].rt1!=0)
1424 reglist&=~(1<<rt);
1425 if(temp==-1) {
1426 save_regs(reglist);
1427 regs_saved=1;
1428 temp=(rs==0)?2:0;
1429 }
1430 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1431 temp2=1;
1432 emit_readdword(&mem_rtab,temp);
1433 emit_shrimm(rs,12,temp2);
1434 emit_readdword_dualindexedx8(temp,temp2,temp2);
1435 emit_adds64(temp2,temp2,temp2);
1436 handler_jump=out;
1437 emit_jc(0);
1438 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1439 switch(type) {
1440 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1441 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1442 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1443 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1444 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1445 default: assert(0);
1446 }
1447 }
1448 if(regs_saved) {
1449 restore_jump=out;
1450 emit_jmp(0); // jump to reg restore
1451 }
1452 else
1453 emit_jmp(stubs[n].retaddr); // return address
1454 set_jump_target(handler_jump, out);
1455
1456 if(!regs_saved)
1457 save_regs(reglist);
1458 void *handler=NULL;
1459 if(type==LOADB_STUB||type==LOADBU_STUB)
1460 handler=jump_handler_read8;
1461 if(type==LOADH_STUB||type==LOADHU_STUB)
1462 handler=jump_handler_read16;
1463 if(type==LOADW_STUB)
1464 handler=jump_handler_read32;
1465 assert(handler);
1466 pass_args64(rs,temp2);
1467 int cc=get_reg(i_regmap,CCREG);
1468 if(cc<0)
1469 emit_loadreg(CCREG,2);
1470 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1471 emit_far_call(handler);
1472 // (no cycle reload after read)
1473 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1474 loadstore_extend(type,0,rt);
1475 }
1476 if(restore_jump)
1477 set_jump_target(restore_jump, out);
1478 restore_regs(reglist);
1479 emit_jmp(stubs[n].retaddr);
1480}
1481
1482static void inline_readstub(enum stub_type type, int i, u_int addr,
1483 const signed char regmap[], int target, int adj, u_int reglist)
1484{
1485 int rs=get_reg(regmap,target);
1486 int rt=get_reg(regmap,target);
1487 if(rs<0) rs=get_reg_temp(regmap);
1488 assert(rs>=0);
1489 u_int is_dynamic=0;
1490 uintptr_t host_addr = 0;
1491 void *handler;
1492 int cc=get_reg(regmap,CCREG);
1493 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1494 // return;
1495 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1496 if (handler == NULL) {
1497 if(rt<0||dops[i].rt1==0)
1498 return;
1499 if (addr != host_addr)
1500 emit_movimm_from64(addr, rs, host_addr, rs);
1501 switch(type) {
1502 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1503 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1504 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1505 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1506 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1507 default: assert(0);
1508 }
1509 return;
1510 }
1511 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1512 if (is_dynamic) {
1513 if(type==LOADB_STUB||type==LOADBU_STUB)
1514 handler=jump_handler_read8;
1515 if(type==LOADH_STUB||type==LOADHU_STUB)
1516 handler=jump_handler_read16;
1517 if(type==LOADW_STUB)
1518 handler=jump_handler_read32;
1519 }
1520
1521 // call a memhandler
1522 if(rt>=0&&dops[i].rt1!=0)
1523 reglist&=~(1<<rt);
1524 save_regs(reglist);
1525 if(target==0)
1526 emit_movimm(addr,0);
1527 else if(rs!=0)
1528 emit_mov(rs,0);
1529 if(cc<0)
1530 emit_loadreg(CCREG,2);
1531 emit_addimm(cc<0?2:cc,adj,2);
1532 if(is_dynamic) {
1533 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1534 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1535 if (-4294967296l <= offset && offset < 4294967296l) {
1536 emit_adrp((void *)l1, 1);
1537 emit_addimm64(1, l1 & 0xfff, 1);
1538 }
1539 else
1540 emit_movimm64(l1, 1);
1541 }
1542 else
1543 emit_far_call(do_memhandler_pre);
1544
1545 emit_far_call(handler);
1546
1547 // (no cycle reload after read)
1548 if(rt>=0&&dops[i].rt1!=0)
1549 loadstore_extend(type, 0, rt);
1550 restore_regs(reglist);
1551}
1552
1553static void do_writestub(int n)
1554{
1555 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1556 set_jump_target(stubs[n].addr, out);
1557 enum stub_type type=stubs[n].type;
1558 int i=stubs[n].a;
1559 int rs=stubs[n].b;
1560 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1561 u_int reglist=stubs[n].e;
1562 signed char *i_regmap=i_regs->regmap;
1563 int rt,r;
1564 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1565 rt=get_reg(i_regmap,r=FTEMP);
1566 }else{
1567 rt=get_reg(i_regmap,r=dops[i].rs2);
1568 }
1569 assert(rs>=0);
1570 assert(rt>=0);
1571 int rtmp,temp=-1,temp2,regs_saved=0;
1572 void *restore_jump = NULL, *handler_jump = NULL;
1573 int reglist2=reglist|(1<<rs)|(1<<rt);
1574 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1575 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1576 temp = rtmp;
1577 break;
1578 }
1579 }
1580 if(temp==-1) {
1581 save_regs(reglist);
1582 regs_saved=1;
1583 for(rtmp=0;rtmp<=3;rtmp++)
1584 if(rtmp!=rs&&rtmp!=rt)
1585 {temp=rtmp;break;}
1586 }
1587 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1588 temp2=3;
1589 else {
1590 host_tempreg_acquire();
1591 temp2=HOST_TEMPREG;
1592 }
1593 emit_readdword(&mem_wtab,temp);
1594 emit_shrimm(rs,12,temp2);
1595 emit_readdword_dualindexedx8(temp,temp2,temp2);
1596 emit_adds64(temp2,temp2,temp2);
1597 handler_jump=out;
1598 emit_jc(0);
1599 switch(type) {
1600 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1601 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1602 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1603 default: assert(0);
1604 }
1605 if(regs_saved) {
1606 restore_jump=out;
1607 emit_jmp(0); // jump to reg restore
1608 }
1609 else
1610 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1611 set_jump_target(handler_jump, out);
1612
1613 if(!regs_saved)
1614 save_regs(reglist);
1615 void *handler=NULL;
1616 switch(type) {
1617 case STOREB_STUB: handler=jump_handler_write8; break;
1618 case STOREH_STUB: handler=jump_handler_write16; break;
1619 case STOREW_STUB: handler=jump_handler_write32; break;
1620 default: assert(0);
1621 }
1622 assert(handler);
1623 pass_args(rs,rt);
1624 if(temp2!=3) {
1625 emit_mov64(temp2,3);
1626 host_tempreg_release();
1627 }
1628 int cc=get_reg(i_regmap,CCREG);
1629 if(cc<0)
1630 emit_loadreg(CCREG,2);
1631 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1632 // returns new cycle_count
1633 emit_far_call(handler);
1634 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1635 if(cc<0)
1636 emit_storereg(CCREG,2);
1637 if(restore_jump)
1638 set_jump_target(restore_jump, out);
1639 restore_regs(reglist);
1640 emit_jmp(stubs[n].retaddr);
1641}
1642
1643static void inline_writestub(enum stub_type type, int i, u_int addr,
1644 const signed char regmap[], int target, int adj, u_int reglist)
1645{
1646 int rs = get_reg_temp(regmap);
1647 int rt = get_reg(regmap,target);
1648 assert(rs >= 0);
1649 assert(rt >= 0);
1650 uintptr_t host_addr = 0;
1651 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1652 if (handler == NULL) {
1653 if (addr != host_addr)
1654 emit_movimm_from64(addr, rs, host_addr, rs);
1655 switch (type) {
1656 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1657 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1658 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1659 default: assert(0);
1660 }
1661 return;
1662 }
1663
1664 // call a memhandler
1665 save_regs(reglist);
1666 emit_writeword(rs, &address); // some handlers still need it
1667 loadstore_extend(type, rt, 0);
1668 int cc, cc_use;
1669 cc = cc_use = get_reg(regmap, CCREG);
1670 if (cc < 0)
1671 emit_loadreg(CCREG, (cc_use = 2));
1672 emit_addimm(cc_use, adj, 2);
1673
1674 emit_far_call(do_memhandler_pre);
1675 emit_far_call(handler);
1676 emit_far_call(do_memhandler_post);
1677 emit_addimm(0, -adj, cc_use);
1678 if (cc < 0)
1679 emit_storereg(CCREG, cc_use);
1680 restore_regs(reglist);
1681}
1682
1683/* Special assem */
1684
1685static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1686{
1687 save_load_regs_all(1, reglist);
1688 cop2_do_stall_check(op, i, i_regs, 0);
1689#ifdef PCNT
1690 emit_movimm(op, 0);
1691 emit_far_call(pcnt_gte_start);
1692#endif
1693 // pointer to cop2 regs
1694 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1695}
1696
1697static void c2op_epilogue(u_int op,u_int reglist)
1698{
1699#ifdef PCNT
1700 emit_movimm(op, 0);
1701 emit_far_call(pcnt_gte_end);
1702#endif
1703 save_load_regs_all(0, reglist);
1704}
1705
1706static void c2op_assemble(int i, const struct regstat *i_regs)
1707{
1708 u_int c2op=source[i]&0x3f;
1709 u_int hr,reglist_full=0,reglist;
1710 int need_flags,need_ir;
1711 for(hr=0;hr<HOST_REGS;hr++) {
1712 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1713 }
1714 reglist=reglist_full&CALLER_SAVE_REGS;
1715
1716 if (gte_handlers[c2op]!=NULL) {
1717 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1718 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1719 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1720 source[i],gte_unneeded[i+1],need_flags,need_ir);
1721 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1722 need_flags=0;
1723 //int shift = (source[i] >> 19) & 1;
1724 //int lm = (source[i] >> 10) & 1;
1725 switch(c2op) {
1726 default:
1727 (void)need_ir;
1728 c2op_prologue(c2op, i, i_regs, reglist);
1729 emit_movimm(source[i],1); // opcode
1730 emit_writeword(1,&psxRegs.code);
1731 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1732 break;
1733 }
1734 c2op_epilogue(c2op,reglist);
1735 }
1736}
1737
1738static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1739{
1740 //value = value & 0x7ffff000;
1741 //if (value & 0x7f87e000) value |= 0x80000000;
1742 emit_andimm(sl, 0x7fffe000, temp);
1743 emit_testimm(temp, 0xff87ffff);
1744 emit_andimm(sl, 0x7ffff000, temp);
1745 host_tempreg_acquire();
1746 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1747 emit_cmovne_reg(HOST_TEMPREG, temp);
1748 host_tempreg_release();
1749 assert(0); // testing needed
1750}
1751
1752static void do_mfc2_31_one(u_int copr,signed char temp)
1753{
1754 emit_readshword(&reg_cop2d[copr],temp);
1755 emit_bicsar_imm(temp,31,temp);
1756 emit_cmpimm(temp,0xf80);
1757 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1758 emit_andimm(temp,0xf80,temp);
1759}
1760
1761static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1762{
1763 if (temp < 0) {
1764 host_tempreg_acquire();
1765 temp = HOST_TEMPREG;
1766 }
1767 do_mfc2_31_one(9,temp);
1768 emit_shrimm(temp,7,tl);
1769 do_mfc2_31_one(10,temp);
1770 emit_orrshr_imm(temp,2,tl);
1771 do_mfc2_31_one(11,temp);
1772 emit_orrshl_imm(temp,3,tl);
1773 emit_writeword(tl,&reg_cop2d[29]);
1774
1775 if (temp == HOST_TEMPREG)
1776 host_tempreg_release();
1777}
1778
1779static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1780{
1781 // case 0x18: MULT
1782 // case 0x19: MULTU
1783 // case 0x1A: DIV
1784 // case 0x1B: DIVU
1785 if(dops[i].rs1&&dops[i].rs2)
1786 {
1787 switch(dops[i].opcode2)
1788 {
1789 case 0x18: // MULT
1790 case 0x19: // MULTU
1791 {
1792 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1793 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1794 signed char hi=get_reg(i_regs->regmap,HIREG);
1795 signed char lo=get_reg(i_regs->regmap,LOREG);
1796 assert(m1>=0);
1797 assert(m2>=0);
1798 assert(hi>=0);
1799 assert(lo>=0);
1800
1801 if(dops[i].opcode2==0x18) // MULT
1802 emit_smull(m1,m2,hi);
1803 else // MULTU
1804 emit_umull(m1,m2,hi);
1805
1806 emit_mov(hi,lo);
1807 emit_shrimm64(hi,32,hi);
1808 break;
1809 }
1810 case 0x1A: // DIV
1811 case 0x1B: // DIVU
1812 {
1813 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1814 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1815 signed char quotient=get_reg(i_regs->regmap,LOREG);
1816 signed char remainder=get_reg(i_regs->regmap,HIREG);
1817 assert(numerator>=0);
1818 assert(denominator>=0);
1819 assert(quotient>=0);
1820 assert(remainder>=0);
1821
1822 if (dops[i].opcode2 == 0x1A) // DIV
1823 emit_sdiv(numerator,denominator,quotient);
1824 else // DIVU
1825 emit_udiv(numerator,denominator,quotient);
1826 emit_msub(quotient,denominator,numerator,remainder);
1827
1828 // div 0 quotient (remainder is already correct)
1829 host_tempreg_acquire();
1830 if (dops[i].opcode2 == 0x1A) // DIV
1831 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1832 else
1833 emit_movimm(~0,HOST_TEMPREG);
1834 emit_test(denominator,denominator);
1835 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1836 host_tempreg_release();
1837 break;
1838 }
1839 default:
1840 assert(0);
1841 }
1842 }
1843 else
1844 {
1845 signed char hr=get_reg(i_regs->regmap,HIREG);
1846 signed char lr=get_reg(i_regs->regmap,LOREG);
1847 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1848 {
1849 if (dops[i].rs1) {
1850 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1851 assert(numerator >= 0);
1852 if (hr >= 0)
1853 emit_mov(numerator,hr);
1854 if (lr >= 0) {
1855 if (dops[i].opcode2 == 0x1A) // DIV
1856 emit_sub_asrimm(0,numerator,31,lr);
1857 else
1858 emit_movimm(~0,lr);
1859 }
1860 }
1861 else {
1862 if (hr >= 0) emit_zeroreg(hr);
1863 if (lr >= 0) emit_movimm(~0,lr);
1864 }
1865 }
1866 else
1867 {
1868 // Multiply by zero is zero.
1869 if (hr >= 0) emit_zeroreg(hr);
1870 if (lr >= 0) emit_zeroreg(lr);
1871 }
1872 }
1873}
1874#define multdiv_assemble multdiv_assemble_arm64
1875
1876static void do_jump_vaddr(u_int rs)
1877{
1878 if (rs != 0)
1879 emit_mov(rs, 0);
1880 emit_far_call(ndrc_get_addr_ht);
1881 emit_jmpreg(0);
1882}
1883
1884static void do_preload_rhash(u_int r) {
1885 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1886 // register. On ARM the hash can be done with a single instruction (below)
1887}
1888
1889static void do_preload_rhtbl(u_int ht) {
1890 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1891}
1892
1893static void do_rhash(u_int rs,u_int rh) {
1894 emit_andimm(rs, 0xf8, rh);
1895}
1896
1897static void do_miniht_load(int ht, u_int rh) {
1898 emit_add64(ht, rh, ht);
1899 emit_ldst(0, 0, rh, ht, 0);
1900}
1901
1902static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1903 emit_cmp(rh, rs);
1904 void *jaddr = out;
1905 emit_jeq(0);
1906 do_jump_vaddr(rs);
1907
1908 set_jump_target(jaddr, out);
1909 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1910 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1911 emit_jmpreg(ht);
1912}
1913
1914// parsed by set_jump_target?
1915static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1916 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1917 emit_movk(return_address&0xffff,rt);
1918 add_to_linker(out,return_address,1);
1919 emit_adr(out,temp);
1920 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1921 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1922}
1923
1924static unused void clear_cache_arm64(char *start, char *end)
1925{
1926 // Don't rely on GCC's __clear_cache implementation, as it caches
1927 // icache/dcache cache line sizes, that can vary between cores on
1928 // big.LITTLE architectures.
1929 uint64_t addr, ctr_el0;
1930 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1931 size_t isize, dsize;
1932
1933 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1934 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1935 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1936
1937 // use the global minimum cache line size
1938 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1939 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1940
1941 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1942 not required for instruction to data coherence. */
1943 if ((ctr_el0 & (1 << 28)) == 0x0) {
1944 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1945 for (; addr < (uint64_t)end; addr += dsize)
1946 // use "civac" instead of "cvau", as this is the suggested workaround for
1947 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1948 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1949 }
1950 __asm__ volatile("dsb ish" : : : "memory");
1951
1952 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1953 Unification is not required for instruction to data coherence. */
1954 if ((ctr_el0 & (1 << 29)) == 0x0) {
1955 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1956 for (; addr < (uint64_t)end; addr += isize)
1957 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1958
1959 __asm__ volatile("dsb ish" : : : "memory");
1960 }
1961
1962 __asm__ volatile("isb" : : : "memory");
1963}
1964
1965// CPU-architecture-specific initialization
1966static void arch_init(void)
1967{
1968 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1969 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
1970 size_t i;
1971 assert(!(diff & 3));
1972 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1973 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1974 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1975 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1976 }
1977 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1978}
1979
1980// vim:shiftwidth=2:expandtab