drc: botched msb bit check
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26#define CALLER_SAVE_REGS 0x0007ffff
27
28#define unused __attribute__((unused))
29
30void do_memhandler_pre();
31void do_memhandler_post();
32
33/* Linker */
34static void set_jump_target(void *addr, void *target)
35{
36 u_int *ptr = addr;
37 intptr_t offset = (u_char *)target - (u_char *)addr;
38
39 if ((*ptr&0xFC000000) == 0x14000000) { // b
40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_jump_out)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
49 assert(-1048576 <= offset && offset < 1048576);
50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
52 else if((*ptr&0x9f000000)==0x10000000) { // adr
53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
58 abort(); // should not happen
59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
69}
70
71// find where external branch is liked to using addr of it's stub:
72// get address that the stub loads (dyna_linker arg1),
73// treat it as a pointer to branch insn,
74// return addr where that branch jumps to
75static void *get_pointer(void *stub)
76{
77 int *i_ptr = find_extjump_insn(stub);
78 if ((*i_ptr&0xfc000000) == 0x14000000) // b
79 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
80 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
81 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
82 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
83 assert(0);
84 return NULL;
85}
86
87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141};
142
143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
149static void output_w64(uint64_t dword)
150{
151 *((uint64_t *)out) = dword;
152 out+=8;
153}
154
155/*
156static u_int rm_rd(u_int rm, u_int rd)
157{
158 assert(rm < 31);
159 assert(rd < 31);
160 return (rm << 16) | rd;
161}
162*/
163
164static u_int rn_rd(u_int rn, u_int rd)
165{
166 assert(rn < 31);
167 assert(rd < 31);
168 return (rn << 5) | rd;
169}
170
171static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
172{
173 assert(rm < 32);
174 assert(rn < 32);
175 assert(rd < 32);
176 return (rm << 16) | (rn << 5) | rd;
177}
178
179static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
180{
181 assert(ra < 32);
182 return rm_rn_rd(rm, rn, rd) | (ra << 10);
183}
184
185static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
186{
187 assert(imm7 < 0x80);
188 assert(rt2 < 31);
189 assert(rn < 32);
190 assert(rt < 31);
191 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
192}
193
194static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
195{
196 assert(imm6 <= 63);
197 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
198}
199
200static u_int imm16_rd(u_int imm16, u_int rd)
201{
202 assert(imm16 < 0x10000);
203 assert(rd < 31);
204 return (imm16 << 5) | rd;
205}
206
207static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
208{
209 assert(imm12 < 0x1000);
210 assert(rn < 32);
211 assert(rd < 32);
212 return (imm12 << 10) | (rn << 5) | rd;
213}
214
215static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
216{
217 assert(imm9 < 0x200);
218 assert(rn < 31);
219 assert(rd < 31);
220 return (imm9 << 12) | (rn << 5) | rd;
221}
222
223static u_int imm19_rt(u_int imm19, u_int rt)
224{
225 assert(imm19 < 0x80000);
226 assert(rt < 31);
227 return (imm19 << 5) | rt;
228}
229
230static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
231{
232 assert(n < 2);
233 assert(immr < 0x40);
234 assert(imms < 0x40);
235 assert(rn < 32);
236 assert(rd < 32);
237 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
238}
239
240static u_int genjmp(const u_char *addr)
241{
242 intptr_t offset = addr - out;
243 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
244 if (offset < -134217728 || offset > 134217727) {
245 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
246 abort();
247 return 0;
248 }
249 return ((u_int)offset >> 2) & 0x03ffffff;
250}
251
252static u_int genjmpcc(const u_char *addr)
253{
254 intptr_t offset = addr - out;
255 if ((uintptr_t)addr < 3) return 0;
256 if (offset < -1048576 || offset > 1048572) {
257 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
258 abort();
259 return 0;
260 }
261 return ((u_int)offset >> 2) & 0x7ffff;
262}
263
264static uint32_t is_mask(u_int value)
265{
266 return value && ((value + 1) & value) == 0;
267}
268
269// This function returns true if the argument contains a
270// non-empty sequence of ones (possibly rotated) with the remainder zero.
271static uint32_t is_rotated_mask(u_int value)
272{
273 if (value == 0 || value == ~0)
274 return 0;
275 if (is_mask((value - 1) | value))
276 return 1;
277 return is_mask((~value - 1) | ~value);
278}
279
280static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
281{
282 int lzeros, tzeros, ones;
283 assert(value != 0);
284 if (is_mask((value - 1) | value)) {
285 lzeros = __builtin_clz(value);
286 tzeros = __builtin_ctz(value);
287 ones = 32 - lzeros - tzeros;
288 *immr = (32 - tzeros) & 31;
289 *imms = ones - 1;
290 return;
291 }
292 value = ~value;
293 if (is_mask((value - 1) | value)) {
294 lzeros = __builtin_clz(value);
295 tzeros = __builtin_ctz(value);
296 ones = 32 - lzeros - tzeros;
297 *immr = lzeros;
298 *imms = 31 - ones;
299 return;
300 }
301 abort();
302}
303
304static void emit_mov(u_int rs, u_int rt)
305{
306 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
307 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
308}
309
310static void emit_mov64(u_int rs, u_int rt)
311{
312 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
313 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
314}
315
316static void emit_add(u_int rs1, u_int rs2, u_int rt)
317{
318 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
319 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
320}
321
322static void emit_add64(u_int rs1, u_int rs2, u_int rt)
323{
324 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
325 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
326}
327
328static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
329{
330 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
331 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
332}
333#define emit_adds_ptr emit_adds64
334
335static void emit_neg(u_int rs, u_int rt)
336{
337 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
338 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
339}
340
341static void emit_sub(u_int rs1, u_int rs2, u_int rt)
342{
343 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
344 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
345}
346
347static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
348{
349 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
350 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
351}
352
353static void emit_movz(u_int imm, u_int rt)
354{
355 assem_debug("movz %s,#%#x\n", regname[rt], imm);
356 output_w32(0x52800000 | imm16_rd(imm, rt));
357}
358
359static void emit_movz_lsl16(u_int imm, u_int rt)
360{
361 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
362 output_w32(0x52a00000 | imm16_rd(imm, rt));
363}
364
365static void emit_movn(u_int imm, u_int rt)
366{
367 assem_debug("movn %s,#%#x\n", regname[rt], imm);
368 output_w32(0x12800000 | imm16_rd(imm, rt));
369}
370
371static void emit_movn_lsl16(u_int imm,u_int rt)
372{
373 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x12a00000 | imm16_rd(imm, rt));
375}
376
377static void emit_movk(u_int imm,u_int rt)
378{
379 assem_debug("movk %s,#%#x\n", regname[rt], imm);
380 output_w32(0x72800000 | imm16_rd(imm, rt));
381}
382
383static void emit_movk_lsl16(u_int imm,u_int rt)
384{
385 assert(imm<65536);
386 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
387 output_w32(0x72a00000 | imm16_rd(imm, rt));
388}
389
390static void emit_zeroreg(u_int rt)
391{
392 emit_movz(0, rt);
393}
394
395static void emit_movimm(u_int imm, u_int rt)
396{
397 if (imm < 65536)
398 emit_movz(imm, rt);
399 else if ((~imm) < 65536)
400 emit_movn(~imm, rt);
401 else if ((imm&0xffff) == 0)
402 emit_movz_lsl16(imm >> 16, rt);
403 else if (((~imm)&0xffff) == 0)
404 emit_movn_lsl16(~imm >> 16, rt);
405 else if (is_rotated_mask(imm)) {
406 u_int immr, imms;
407 gen_logical_imm(imm, &immr, &imms);
408 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
409 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
410 }
411 else {
412 emit_movz(imm & 0xffff, rt);
413 emit_movk_lsl16(imm >> 16, rt);
414 }
415}
416
417static void emit_readword(void *addr, u_int rt)
418{
419 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
420 if (!(offset & 3) && offset <= 16380) {
421 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
422 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
423 }
424 else
425 abort();
426}
427
428static void emit_readdword(void *addr, u_int rt)
429{
430 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
431 if (!(offset & 7) && offset <= 32760) {
432 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
433 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
434 }
435 else
436 abort();
437}
438#define emit_readptr emit_readdword
439
440static void emit_readshword(void *addr, u_int rt)
441{
442 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
443 if (!(offset & 1) && offset <= 8190) {
444 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
445 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
446 }
447 else
448 assert(0);
449}
450
451static void emit_loadreg(u_int r, u_int hr)
452{
453 int is64 = 0;
454 assert(r < 64);
455 if (r == 0)
456 emit_zeroreg(hr);
457 else {
458 void *addr = &psxRegs.GPR.r[r];
459 switch (r) {
460 //case HIREG: addr = &hi; break;
461 //case LOREG: addr = &lo; break;
462 case CCREG: addr = &cycle_count; break;
463 case CSREG: addr = &Status; break;
464 case INVCP: addr = &invc_ptr; is64 = 1; break;
465 default: assert(r < 34); break;
466 }
467 if (is64)
468 emit_readdword(addr, hr);
469 else
470 emit_readword(addr, hr);
471 }
472}
473
474static void emit_writeword(u_int rt, void *addr)
475{
476 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
477 if (!(offset & 3) && offset <= 16380) {
478 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
479 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
480 }
481 else
482 assert(0);
483}
484
485static void emit_writedword(u_int rt, void *addr)
486{
487 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
488 if (!(offset & 7) && offset <= 32760) {
489 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
490 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
491 }
492 else
493 abort();
494}
495
496static void emit_storereg(u_int r, u_int hr)
497{
498 assert(r < 64);
499 void *addr = &psxRegs.GPR.r[r];
500 switch (r) {
501 //case HIREG: addr = &hi; break;
502 //case LOREG: addr = &lo; break;
503 case CCREG: addr = &cycle_count; break;
504 default: assert(r < 34); break;
505 }
506 emit_writeword(hr, addr);
507}
508
509static void emit_test(u_int rs, u_int rt)
510{
511 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
512 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
513}
514
515static void emit_testimm(u_int rs, u_int imm)
516{
517 u_int immr, imms;
518 assem_debug("tst %s,#%#x\n", regname[rs], imm);
519 assert(is_rotated_mask(imm)); // good enough for PCSX
520 gen_logical_imm(imm, &immr, &imms);
521 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
522}
523
524static void emit_not(u_int rs,u_int rt)
525{
526 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
527 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
528}
529
530static void emit_and(u_int rs1,u_int rs2,u_int rt)
531{
532 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
533 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
534}
535
536static void emit_or(u_int rs1,u_int rs2,u_int rt)
537{
538 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
539 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
540}
541
542static void emit_bic(u_int rs1,u_int rs2,u_int rt)
543{
544 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
545 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
546}
547
548static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
549{
550 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
551 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
552}
553
554static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
555{
556 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
557 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
558}
559
560static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
561{
562 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
563 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
564}
565
566static void emit_xor(u_int rs1,u_int rs2,u_int rt)
567{
568 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
569 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
570}
571
572static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
573{
574 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
575 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
576}
577
578static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
579{
580 unused const char *st = s ? "s" : "";
581 s = s ? 0x20000000 : 0;
582 is64 = is64 ? 0x80000000 : 0;
583 if (imm < 4096) {
584 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
585 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
586 }
587 else if (-imm < 4096) {
588 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
589 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
590 }
591 else if (imm < 16777216) {
592 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
593 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
594 if ((imm & 0xfff) || s) {
595 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
596 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
597 }
598 }
599 else if (-imm < 16777216) {
600 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
601 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
602 if ((imm & 0xfff) || s) {
603 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
604 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
605 }
606 }
607 else
608 abort();
609}
610
611static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
612{
613 emit_addimm_s(0, 0, rs, imm, rt);
614}
615
616static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
617{
618 emit_addimm_s(0, 1, rs, imm, rt);
619}
620
621static void emit_addimm_and_set_flags(int imm, u_int rt)
622{
623 emit_addimm_s(1, 0, rt, imm, rt);
624}
625
626static void emit_addimm_no_flags(u_int imm,u_int rt)
627{
628 emit_addimm(rt,imm,rt);
629}
630
631static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
632{
633 const char *names[] = { "and", "orr", "eor", "ands" };
634 const char *name = names[op];
635 u_int immr, imms;
636 op = op << 29;
637 if (is_rotated_mask(imm)) {
638 gen_logical_imm(imm, &immr, &imms);
639 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
640 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
641 }
642 else {
643 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
644 host_tempreg_acquire();
645 emit_movimm(imm, HOST_TEMPREG);
646 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
647 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
648 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
649 host_tempreg_release();
650 }
651 (void)name;
652}
653
654static void emit_andimm(u_int rs, u_int imm, u_int rt)
655{
656 if (imm == 0)
657 emit_zeroreg(rt);
658 else
659 emit_logicop_imm(0, rs, imm, rt);
660}
661
662static void emit_orimm(u_int rs, u_int imm, u_int rt)
663{
664 if (imm == 0) {
665 if (rs != rt)
666 emit_mov(rs, rt);
667 }
668 else
669 emit_logicop_imm(1, rs, imm, rt);
670}
671
672static void emit_xorimm(u_int rs, u_int imm, u_int rt)
673{
674 if (imm == 0) {
675 if (rs != rt)
676 emit_mov(rs, rt);
677 }
678 else
679 emit_logicop_imm(2, rs, imm, rt);
680}
681
682static void emit_sbfm(u_int rs,u_int imm,u_int rt)
683{
684 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
685 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
686}
687
688static void emit_ubfm(u_int rs,u_int imm,u_int rt)
689{
690 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
692}
693
694static void emit_shlimm(u_int rs,u_int imm,u_int rt)
695{
696 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
698}
699
700static void emit_shrimm(u_int rs,u_int imm,u_int rt)
701{
702 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
704}
705
706static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
707{
708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
710}
711
712static void emit_sarimm(u_int rs,u_int imm,u_int rt)
713{
714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
716}
717
718static void emit_rorimm(u_int rs,u_int imm,u_int rt)
719{
720 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
722}
723
724static void emit_signextend16(u_int rs, u_int rt)
725{
726 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
727 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
728}
729
730static void emit_shl(u_int rs,u_int rshift,u_int rt)
731{
732 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
733 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
734}
735
736static void emit_shr(u_int rs,u_int rshift,u_int rt)
737{
738 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
739 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
740}
741
742static void emit_sar(u_int rs,u_int rshift,u_int rt)
743{
744 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
746}
747
748static void emit_cmpimm(u_int rs, u_int imm)
749{
750 if (imm < 4096) {
751 assem_debug("cmp %s,%#x\n", regname[rs], imm);
752 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
753 }
754 else if (-imm < 4096) {
755 assem_debug("cmn %s,%#x\n", regname[rs], imm);
756 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
757 }
758 else if (imm < 16777216 && !(imm & 0xfff)) {
759 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
760 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
761 }
762 else {
763 host_tempreg_acquire();
764 emit_movimm(imm, HOST_TEMPREG);
765 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
766 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
767 host_tempreg_release();
768 }
769}
770
771static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
772{
773 assert(imm == 0 || imm == 1);
774 assert(cond0 < 0x10);
775 assert(cond1 < 0x10);
776 if (imm) {
777 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
778 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
779 } else {
780 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
781 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
782 }
783}
784
785static void emit_cmovne_imm(u_int imm,u_int rt)
786{
787 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
788}
789
790static void emit_cmovl_imm(u_int imm,u_int rt)
791{
792 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
793}
794
795static void emit_cmovb_imm(int imm,u_int rt)
796{
797 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
798}
799
800static void emit_cmoveq_reg(u_int rs,u_int rt)
801{
802 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
803 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
804}
805
806static void emit_cmovne_reg(u_int rs,u_int rt)
807{
808 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
810}
811
812static void emit_cmovl_reg(u_int rs,u_int rt)
813{
814 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
816}
817
818static void emit_cmovb_reg(u_int rs,u_int rt)
819{
820 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
822}
823
824static void emit_cmovs_reg(u_int rs,u_int rt)
825{
826 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
828}
829
830static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
831{
832 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
833 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
834}
835
836static void emit_slti32(u_int rs,int imm,u_int rt)
837{
838 if(rs!=rt) emit_zeroreg(rt);
839 emit_cmpimm(rs,imm);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovl_imm(1,rt);
842}
843
844static void emit_sltiu32(u_int rs,int imm,u_int rt)
845{
846 if(rs!=rt) emit_zeroreg(rt);
847 emit_cmpimm(rs,imm);
848 if(rs==rt) emit_movimm(0,rt);
849 emit_cmovb_imm(1,rt);
850}
851
852static void emit_cmp(u_int rs,u_int rt)
853{
854 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
855 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
856}
857
858static void emit_set_gz32(u_int rs, u_int rt)
859{
860 //assem_debug("set_gz32\n");
861 emit_cmpimm(rs,1);
862 emit_movimm(1,rt);
863 emit_cmovl_imm(0,rt);
864}
865
866static void emit_set_nz32(u_int rs, u_int rt)
867{
868 //assem_debug("set_nz32\n");
869 if(rs!=rt) emit_mov(rs,rt);
870 emit_test(rs,rs);
871 emit_cmovne_imm(1,rt);
872}
873
874static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
875{
876 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
878 emit_cmp(rs1,rs2);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovl_imm(1,rt);
881}
882
883static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
884{
885 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
887 emit_cmp(rs1,rs2);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovb_imm(1,rt);
890}
891
892static int can_jump_or_call(const void *a)
893{
894 intptr_t diff = (u_char *)a - out;
895 return (-134217728 <= diff && diff <= 134217727);
896}
897
898static void emit_call(const void *a)
899{
900 intptr_t diff = (u_char *)a - out;
901 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
902 assert(!(diff & 3));
903 if (-134217728 <= diff && diff <= 134217727)
904 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
905 else
906 abort();
907}
908
909static void emit_jmp(const void *a)
910{
911 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
912 u_int offset = genjmp(a);
913 output_w32(0x14000000 | offset);
914}
915
916static void emit_jne(const void *a)
917{
918 assem_debug("bne %p\n", a);
919 u_int offset = genjmpcc(a);
920 output_w32(0x54000000 | (offset << 5) | COND_NE);
921}
922
923static void emit_jeq(const void *a)
924{
925 assem_debug("beq %p\n", a);
926 u_int offset = genjmpcc(a);
927 output_w32(0x54000000 | (offset << 5) | COND_EQ);
928}
929
930static void emit_js(const void *a)
931{
932 assem_debug("bmi %p\n", a);
933 u_int offset = genjmpcc(a);
934 output_w32(0x54000000 | (offset << 5) | COND_MI);
935}
936
937static void emit_jns(const void *a)
938{
939 assem_debug("bpl %p\n", a);
940 u_int offset = genjmpcc(a);
941 output_w32(0x54000000 | (offset << 5) | COND_PL);
942}
943
944static void emit_jl(const void *a)
945{
946 assem_debug("blt %p\n", a);
947 u_int offset = genjmpcc(a);
948 output_w32(0x54000000 | (offset << 5) | COND_LT);
949}
950
951static void emit_jge(const void *a)
952{
953 assem_debug("bge %p\n", a);
954 u_int offset = genjmpcc(a);
955 output_w32(0x54000000 | (offset << 5) | COND_GE);
956}
957
958static void emit_jno(const void *a)
959{
960 assem_debug("bvc %p\n", a);
961 u_int offset = genjmpcc(a);
962 output_w32(0x54000000 | (offset << 5) | COND_VC);
963}
964
965static void emit_jc(const void *a)
966{
967 assem_debug("bcs %p\n", a);
968 u_int offset = genjmpcc(a);
969 output_w32(0x54000000 | (offset << 5) | COND_CS);
970}
971
972static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
973{
974 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
975 u_int offset = genjmpcc(a);
976 is64 = is64 ? 0x80000000 : 0;
977 isnz = isnz ? 0x01000000 : 0;
978 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
979}
980
981static void emit_cbz(const void *a, u_int r)
982{
983 emit_cb(0, 0, a, r);
984}
985
986static void emit_jmpreg(u_int r)
987{
988 assem_debug("br %s\n", regname64[r]);
989 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
990}
991
992static void emit_retreg(u_int r)
993{
994 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
995 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
996}
997
998static void emit_ret(void)
999{
1000 emit_retreg(LR);
1001}
1002
1003static void emit_adr(void *addr, u_int rt)
1004{
1005 intptr_t offset = (u_char *)addr - out;
1006 assert(-1048576 <= offset && offset < 1048576);
1007 assert(rt < 31);
1008 assem_debug("adr x%d,#%#lx\n", rt, offset);
1009 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1010}
1011
1012static void emit_adrp(void *addr, u_int rt)
1013{
1014 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1015 assert(-4294967296l <= offset && offset < 4294967296l);
1016 assert(rt < 31);
1017 offset >>= 12;
1018 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1019 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1020}
1021
1022static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1023{
1024 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1025 assert(-256 <= offset && offset < 256);
1026 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1027}
1028
1029static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1030{
1031 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1033}
1034
1035static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1036{
1037 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1039}
1040
1041static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1042{
1043 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1045}
1046
1047static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1048{
1049 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1050 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1051}
1052#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1053
1054static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1055{
1056 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1057 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1058}
1059
1060static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1061{
1062 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1063 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1064}
1065
1066static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1067{
1068 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1069 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1070}
1071
1072static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1073{
1074 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1075 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1076}
1077
1078static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1079{
1080 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1081 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1082}
1083
1084static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1085{
1086 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1087 assert(-256 <= offset && offset < 256);
1088 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1089}
1090
1091static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1092{
1093 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1094 assert(-256 <= offset && offset < 256);
1095 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1096}
1097
1098static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1099{
1100 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1103}
1104
1105static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1106{
1107 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1108 assert(-256 <= offset && offset < 256);
1109 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1110}
1111
1112static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1113{
1114 if (!(offset & 3) && (u_int)offset <= 16380) {
1115 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1116 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1117 }
1118 else if (-256 <= offset && offset < 256) {
1119 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1120 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1121 }
1122 else
1123 assert(0);
1124}
1125
1126static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1127{
1128 if (!(offset & 1) && (u_int)offset <= 8190) {
1129 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1130 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1131 }
1132 else if (-256 <= offset && offset < 256) {
1133 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1134 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1135 }
1136 else
1137 assert(0);
1138}
1139
1140static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1141{
1142 if ((u_int)offset < 4096) {
1143 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1145 }
1146 else if (-256 <= offset && offset < 256) {
1147 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1148 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1149 }
1150 else
1151 assert(0);
1152}
1153
1154static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1155{
1156 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1157 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1158}
1159
1160static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1161{
1162 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1163 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1164}
1165
1166static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1167{
1168 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1169 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1170}
1171
1172static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1173{
1174 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1176}
1177
1178static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1179{
1180 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1182}
1183
1184static void emit_clz(u_int rs, u_int rt)
1185{
1186 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1187 output_w32(0x5ac01000 | rn_rd(rs, rt));
1188}
1189
1190// special case for checking invalid_code
1191static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1192{
1193 host_tempreg_acquire();
1194 emit_shrimm(r, 12, HOST_TEMPREG);
1195 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1196 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1197 emit_cmpimm(HOST_TEMPREG, imm);
1198 host_tempreg_release();
1199}
1200
1201// special for loadlr_assemble, rs2 is destroyed
1202static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1203{
1204 emit_shl(rs2, shift, rs2);
1205 emit_bic(rs1, rs2, rt);
1206}
1207
1208static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1209{
1210 emit_shr(rs2, shift, rs2);
1211 emit_bic(rs1, rs2, rt);
1212}
1213
1214static void emit_loadlp_ofs(u_int ofs, u_int rt)
1215{
1216 output_w32(0x58000000 | imm19_rt(ofs, rt));
1217}
1218
1219static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1220{
1221 u_int op = 0xb9000000;
1222 unused const char *ldst = is_st ? "st" : "ld";
1223 unused char rp = is64 ? 'x' : 'w';
1224 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1225 is64 = is64 ? 1 : 0;
1226 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1227 ofs = (ofs >> (2+is64));
1228 if (!is_st) op |= 0x00400000;
1229 if (is64) op |= 0x40000000;
1230 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1231}
1232
1233static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1234{
1235 u_int op = 0x29000000;
1236 unused const char *ldst = is_st ? "st" : "ld";
1237 unused char rp = is64 ? 'x' : 'w';
1238 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1239 is64 = is64 ? 1 : 0;
1240 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1241 ofs = (ofs >> (2+is64));
1242 assert(-64 <= ofs && ofs <= 63);
1243 ofs &= 0x7f;
1244 if (!is_st) op |= 0x00400000;
1245 if (is64) op |= 0x80000000;
1246 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1247}
1248
1249static void save_load_regs_all(int is_store, u_int reglist)
1250{
1251 int ofs = 0, c = 0;
1252 u_int r, pair[2];
1253 for (r = 0; reglist; r++, reglist >>= 1) {
1254 if (reglist & 1)
1255 pair[c++] = r;
1256 if (c == 2) {
1257 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1258 ofs += 8 * 2;
1259 c = 0;
1260 }
1261 }
1262 if (c) {
1263 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1264 ofs += 8;
1265 }
1266 assert(ofs <= SSP_CALLER_REGS);
1267}
1268
1269// Save registers before function call
1270static void save_regs(u_int reglist)
1271{
1272 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1273 save_load_regs_all(1, reglist);
1274}
1275
1276// Restore registers after function call
1277static void restore_regs(u_int reglist)
1278{
1279 reglist &= CALLER_SAVE_REGS;
1280 save_load_regs_all(0, reglist);
1281}
1282
1283/* Stubs/epilogue */
1284
1285static void literal_pool(int n)
1286{
1287 (void)literals;
1288}
1289
1290static void literal_pool_jumpover(int n)
1291{
1292}
1293
1294// parsed by get_pointer, find_extjump_insn
1295static void emit_extjump2(u_char *addr, u_int target, void *linker)
1296{
1297 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1298
1299 emit_movz(target & 0xffff, 0);
1300 emit_movk_lsl16(target >> 16, 0);
1301
1302 // addr is in the current recompiled block (max 256k)
1303 // offset shouldn't exceed +/-1MB
1304 emit_adr(addr, 1);
1305 emit_far_jump(linker);
1306}
1307
1308static void check_extjump2(void *src)
1309{
1310 u_int *ptr = src;
1311 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1312 (void)ptr;
1313}
1314
1315// put rt_val into rt, potentially making use of rs with value rs_val
1316static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1317{
1318 int diff = rt_val - rs_val;
1319 if ((-4096 < diff && diff < 4096)
1320 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1321 emit_addimm(rs, diff, rt);
1322 else if (rt_val == ~rs_val)
1323 emit_not(rs, rt);
1324 else if (is_rotated_mask(rs_val ^ rt_val))
1325 emit_xorimm(rs, rs_val ^ rt_val, rt);
1326 else
1327 emit_movimm(rt_val, rt);
1328}
1329
1330// return 1 if the above function can do it's job cheaply
1331static int is_similar_value(u_int v1, u_int v2)
1332{
1333 int diff = v1 - v2;
1334 return (-4096 < diff && diff < 4096)
1335 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1336 || v1 == ~v2
1337 || is_rotated_mask(v1 ^ v2);
1338}
1339
1340// trashes r2
1341static void pass_args64(u_int a0, u_int a1)
1342{
1343 if(a0==1&&a1==0) {
1344 // must swap
1345 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1346 }
1347 else if(a0!=0&&a1==0) {
1348 emit_mov64(a1,1);
1349 if (a0>=0) emit_mov64(a0,0);
1350 }
1351 else {
1352 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1353 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1354 }
1355}
1356
1357static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1358{
1359 switch(type) {
1360 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1361 case LOADBU_STUB:
1362 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1363 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1364 case LOADHU_STUB:
1365 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1366 case LOADW_STUB:
1367 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1368 default: assert(0);
1369 }
1370}
1371
1372#include "pcsxmem.h"
1373//#include "pcsxmem_inline.c"
1374
1375static void do_readstub(int n)
1376{
1377 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1378 set_jump_target(stubs[n].addr, out);
1379 enum stub_type type = stubs[n].type;
1380 int i = stubs[n].a;
1381 int rs = stubs[n].b;
1382 const struct regstat *i_regs = (void *)stubs[n].c;
1383 u_int reglist = stubs[n].e;
1384 const signed char *i_regmap = i_regs->regmap;
1385 int rt;
1386 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1387 rt=get_reg(i_regmap,FTEMP);
1388 }else{
1389 rt=get_reg(i_regmap,dops[i].rt1);
1390 }
1391 assert(rs>=0);
1392 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1393 void *restore_jump = NULL, *handler_jump = NULL;
1394 reglist|=(1<<rs);
1395 for (r = 0; r < HOST_CCREG; r++) {
1396 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1397 temp = r;
1398 break;
1399 }
1400 }
1401 if(rt>=0&&dops[i].rt1!=0)
1402 reglist&=~(1<<rt);
1403 if(temp==-1) {
1404 save_regs(reglist);
1405 regs_saved=1;
1406 temp=(rs==0)?2:0;
1407 }
1408 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1409 temp2=1;
1410 emit_readdword(&mem_rtab,temp);
1411 emit_shrimm(rs,12,temp2);
1412 emit_readdword_dualindexedx8(temp,temp2,temp2);
1413 emit_adds64(temp2,temp2,temp2);
1414 handler_jump=out;
1415 emit_jc(0);
1416 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1417 switch(type) {
1418 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1419 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1420 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1421 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1422 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1423 default: assert(0);
1424 }
1425 }
1426 if(regs_saved) {
1427 restore_jump=out;
1428 emit_jmp(0); // jump to reg restore
1429 }
1430 else
1431 emit_jmp(stubs[n].retaddr); // return address
1432 set_jump_target(handler_jump, out);
1433
1434 if(!regs_saved)
1435 save_regs(reglist);
1436 void *handler=NULL;
1437 if(type==LOADB_STUB||type==LOADBU_STUB)
1438 handler=jump_handler_read8;
1439 if(type==LOADH_STUB||type==LOADHU_STUB)
1440 handler=jump_handler_read16;
1441 if(type==LOADW_STUB)
1442 handler=jump_handler_read32;
1443 assert(handler);
1444 pass_args64(rs,temp2);
1445 int cc=get_reg(i_regmap,CCREG);
1446 if(cc<0)
1447 emit_loadreg(CCREG,2);
1448 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1449 emit_far_call(handler);
1450 // (no cycle reload after read)
1451 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1452 loadstore_extend(type,0,rt);
1453 }
1454 if(restore_jump)
1455 set_jump_target(restore_jump, out);
1456 restore_regs(reglist);
1457 emit_jmp(stubs[n].retaddr);
1458}
1459
1460static void inline_readstub(enum stub_type type, int i, u_int addr,
1461 const signed char regmap[], int target, int adj, u_int reglist)
1462{
1463 int rs=get_reg(regmap,target);
1464 int rt=get_reg(regmap,target);
1465 if(rs<0) rs=get_reg(regmap,-1);
1466 assert(rs>=0);
1467 u_int is_dynamic=0;
1468 uintptr_t host_addr = 0;
1469 void *handler;
1470 int cc=get_reg(regmap,CCREG);
1471 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
1472 // return;
1473 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1474 if (handler == NULL) {
1475 if(rt<0||dops[i].rt1==0)
1476 return;
1477 if (addr != host_addr) {
1478 if (host_addr >= 0x100000000ull)
1479 abort(); // ROREG not implemented
1480 emit_movimm_from(addr, rs, host_addr, rs);
1481 }
1482 switch(type) {
1483 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1484 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1485 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1486 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1487 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1488 default: assert(0);
1489 }
1490 return;
1491 }
1492 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1493 if(is_dynamic) {
1494 if(type==LOADB_STUB||type==LOADBU_STUB)
1495 handler=jump_handler_read8;
1496 if(type==LOADH_STUB||type==LOADHU_STUB)
1497 handler=jump_handler_read16;
1498 if(type==LOADW_STUB)
1499 handler=jump_handler_read32;
1500 }
1501
1502 // call a memhandler
1503 if(rt>=0&&dops[i].rt1!=0)
1504 reglist&=~(1<<rt);
1505 save_regs(reglist);
1506 if(target==0)
1507 emit_movimm(addr,0);
1508 else if(rs!=0)
1509 emit_mov(rs,0);
1510 if(cc<0)
1511 emit_loadreg(CCREG,2);
1512 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
1513 if(is_dynamic) {
1514 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1515 emit_adrp((void *)l1, 1);
1516 emit_addimm64(1, l1 & 0xfff, 1);
1517 }
1518 else
1519 emit_far_call(do_memhandler_pre);
1520
1521 emit_far_call(handler);
1522
1523 // (no cycle reload after read)
1524 if(rt>=0&&dops[i].rt1!=0)
1525 loadstore_extend(type, 0, rt);
1526 restore_regs(reglist);
1527}
1528
1529static void do_writestub(int n)
1530{
1531 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1532 set_jump_target(stubs[n].addr, out);
1533 enum stub_type type=stubs[n].type;
1534 int i=stubs[n].a;
1535 int rs=stubs[n].b;
1536 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1537 u_int reglist=stubs[n].e;
1538 signed char *i_regmap=i_regs->regmap;
1539 int rt,r;
1540 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1541 rt=get_reg(i_regmap,r=FTEMP);
1542 }else{
1543 rt=get_reg(i_regmap,r=dops[i].rs2);
1544 }
1545 assert(rs>=0);
1546 assert(rt>=0);
1547 int rtmp,temp=-1,temp2,regs_saved=0;
1548 void *restore_jump = NULL, *handler_jump = NULL;
1549 int reglist2=reglist|(1<<rs)|(1<<rt);
1550 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1551 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1552 temp = rtmp;
1553 break;
1554 }
1555 }
1556 if(temp==-1) {
1557 save_regs(reglist);
1558 regs_saved=1;
1559 for(rtmp=0;rtmp<=3;rtmp++)
1560 if(rtmp!=rs&&rtmp!=rt)
1561 {temp=rtmp;break;}
1562 }
1563 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1564 temp2=3;
1565 else {
1566 host_tempreg_acquire();
1567 temp2=HOST_TEMPREG;
1568 }
1569 emit_readdword(&mem_wtab,temp);
1570 emit_shrimm(rs,12,temp2);
1571 emit_readdword_dualindexedx8(temp,temp2,temp2);
1572 emit_adds64(temp2,temp2,temp2);
1573 handler_jump=out;
1574 emit_jc(0);
1575 switch(type) {
1576 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1577 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1578 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1579 default: assert(0);
1580 }
1581 if(regs_saved) {
1582 restore_jump=out;
1583 emit_jmp(0); // jump to reg restore
1584 }
1585 else
1586 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1587 set_jump_target(handler_jump, out);
1588
1589 // TODO FIXME: regalloc should prefer callee-saved regs
1590 if(!regs_saved)
1591 save_regs(reglist);
1592 void *handler=NULL;
1593 switch(type) {
1594 case STOREB_STUB: handler=jump_handler_write8; break;
1595 case STOREH_STUB: handler=jump_handler_write16; break;
1596 case STOREW_STUB: handler=jump_handler_write32; break;
1597 default: assert(0);
1598 }
1599 assert(handler);
1600 pass_args(rs,rt);
1601 if(temp2!=3) {
1602 emit_mov64(temp2,3);
1603 host_tempreg_release();
1604 }
1605 int cc=get_reg(i_regmap,CCREG);
1606 if(cc<0)
1607 emit_loadreg(CCREG,2);
1608 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
1609 // returns new cycle_count
1610 emit_far_call(handler);
1611 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
1612 if(cc<0)
1613 emit_storereg(CCREG,2);
1614 if(restore_jump)
1615 set_jump_target(restore_jump, out);
1616 restore_regs(reglist);
1617 emit_jmp(stubs[n].retaddr);
1618}
1619
1620static void inline_writestub(enum stub_type type, int i, u_int addr,
1621 const signed char regmap[], int target, int adj, u_int reglist)
1622{
1623 int rs = get_reg(regmap,-1);
1624 int rt = get_reg(regmap,target);
1625 assert(rs >= 0);
1626 assert(rt >= 0);
1627 uintptr_t host_addr = 0;
1628 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1629 if (handler == NULL) {
1630 if (addr != host_addr) {
1631 if (host_addr >= 0x100000000ull)
1632 abort(); // ROREG not implemented
1633 emit_movimm_from(addr, rs, host_addr, rs);
1634 }
1635 switch (type) {
1636 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1637 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1638 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1639 default: assert(0);
1640 }
1641 return;
1642 }
1643
1644 // call a memhandler
1645 save_regs(reglist);
1646 emit_writeword(rs, &address); // some handlers still need it
1647 loadstore_extend(type, rt, 0);
1648 int cc, cc_use;
1649 cc = cc_use = get_reg(regmap, CCREG);
1650 if (cc < 0)
1651 emit_loadreg(CCREG, (cc_use = 2));
1652 emit_addimm(cc_use, CLOCK_ADJUST(adj), 2);
1653
1654 emit_far_call(do_memhandler_pre);
1655 emit_far_call(handler);
1656 emit_far_call(do_memhandler_post);
1657 emit_addimm(0, -CLOCK_ADJUST(adj), cc_use);
1658 if (cc < 0)
1659 emit_storereg(CCREG, cc_use);
1660 restore_regs(reglist);
1661}
1662
1663static int verify_code_arm64(const void *source, const void *copy, u_int size)
1664{
1665 int ret = memcmp(source, copy, size);
1666 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1667 return ret;
1668}
1669
1670// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1671static void do_dirty_stub_base(u_int vaddr, u_int source_len)
1672{
1673 assert(source_len <= MAXBLOCK*4);
1674 emit_loadlp_ofs(0, 0); // ldr x1, source
1675 emit_loadlp_ofs(0, 1); // ldr x2, copy
1676 emit_movz(source_len, 2);
1677 emit_far_call(verify_code_arm64);
1678 void *jmp = out;
1679 emit_cbz(0, 0);
1680 emit_movz(vaddr & 0xffff, 0);
1681 emit_movk_lsl16(vaddr >> 16, 0);
1682 emit_far_call(get_addr);
1683 emit_jmpreg(0);
1684 set_jump_target(jmp, out);
1685}
1686
1687static void assert_dirty_stub(const u_int *ptr)
1688{
1689 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1690 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1691 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #source_len
1692 assert( ptr[8] == 0xd61f0000); // br x0
1693}
1694
1695static void set_loadlp(u_int *loadl, void *lit)
1696{
1697 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1698 assert((*loadl & ~0x1f) == 0x58000000);
1699 assert((ofs & 3) == 0);
1700 assert(ofs < 0x100000);
1701 *loadl |= (ofs >> 2) << 5;
1702}
1703
1704static void do_dirty_stub_emit_literals(u_int *loadlps)
1705{
1706 set_loadlp(&loadlps[0], out);
1707 output_w64((uintptr_t)source);
1708 set_loadlp(&loadlps[1], out);
1709 output_w64((uintptr_t)copy);
1710}
1711
1712static void *do_dirty_stub(int i, u_int source_len)
1713{
1714 assem_debug("do_dirty_stub %x\n",start+i*4);
1715 u_int *loadlps = (void *)out;
1716 do_dirty_stub_base(start + i*4, source_len);
1717 void *entry = out;
1718 load_regs_entry(i);
1719 if (entry == out)
1720 entry = instr_addr[i];
1721 emit_jmp(instr_addr[i]);
1722 do_dirty_stub_emit_literals(loadlps);
1723 return entry;
1724}
1725
1726static void do_dirty_stub_ds(u_int source_len)
1727{
1728 u_int *loadlps = (void *)out;
1729 do_dirty_stub_base(start + 1, source_len);
1730 void *lit_jumpover = out;
1731 emit_jmp(out + 8*2);
1732 do_dirty_stub_emit_literals(loadlps);
1733 set_jump_target(lit_jumpover, out);
1734}
1735
1736static uint64_t get_from_ldr_literal(const u_int *i)
1737{
1738 signed int ofs;
1739 assert((i[0] & 0xff000000) == 0x58000000);
1740 ofs = i[0] << 8;
1741 ofs >>= 5+8;
1742 return *(uint64_t *)(i + ofs);
1743}
1744
1745static uint64_t get_from_movz(const u_int *i)
1746{
1747 assert((i[0] & 0x7fe00000) == 0x52800000);
1748 return (i[0] >> 5) & 0xffff;
1749}
1750
1751// Find the "clean" entry point from a "dirty" entry point
1752// by skipping past the call to verify_code
1753static void *get_clean_addr(u_int *addr)
1754{
1755 assert_dirty_stub(addr);
1756 return addr + 9;
1757}
1758
1759static int verify_dirty(const u_int *ptr)
1760{
1761 const void *source, *copy;
1762 u_int len;
1763 assert_dirty_stub(ptr);
1764 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1765 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1766 len = get_from_movz(&ptr[2]); // movz w3, #source_len
1767 return !memcmp(source, copy, len);
1768}
1769
1770static int isclean(void *addr)
1771{
1772 const u_int *ptr = addr;
1773 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1774 assert_dirty_stub(ptr);
1775 return 0;
1776 }
1777 return 1;
1778}
1779
1780// get source that block at addr was compiled from (host pointers)
1781static void get_bounds(void *addr, u_char **start, u_char **end)
1782{
1783 const u_int *ptr = addr;
1784 assert_dirty_stub(ptr);
1785 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1786 *end = *start + get_from_movz(&ptr[2]); // movz w3, #source_len
1787}
1788
1789/* Special assem */
1790
1791static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1792{
1793 save_load_regs_all(1, reglist);
1794 cop2_do_stall_check(op, i, i_regs, 0);
1795#ifdef PCNT
1796 emit_movimm(op, 0);
1797 emit_far_call(pcnt_gte_start);
1798#endif
1799 // pointer to cop2 regs
1800 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1801}
1802
1803static void c2op_epilogue(u_int op,u_int reglist)
1804{
1805#ifdef PCNT
1806 emit_movimm(op, 0);
1807 emit_far_call(pcnt_gte_end);
1808#endif
1809 save_load_regs_all(0, reglist);
1810}
1811
1812static void c2op_assemble(int i, const struct regstat *i_regs)
1813{
1814 u_int c2op=source[i]&0x3f;
1815 u_int hr,reglist_full=0,reglist;
1816 int need_flags,need_ir;
1817 for(hr=0;hr<HOST_REGS;hr++) {
1818 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1819 }
1820 reglist=reglist_full&CALLER_SAVE_REGS;
1821
1822 if (gte_handlers[c2op]!=NULL) {
1823 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1824 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1825 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1826 source[i],gte_unneeded[i+1],need_flags,need_ir);
1827 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1828 need_flags=0;
1829 //int shift = (source[i] >> 19) & 1;
1830 //int lm = (source[i] >> 10) & 1;
1831 switch(c2op) {
1832 default:
1833 (void)need_ir;
1834 c2op_prologue(c2op, i, i_regs, reglist);
1835 emit_movimm(source[i],1); // opcode
1836 emit_writeword(1,&psxRegs.code);
1837 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1838 break;
1839 }
1840 c2op_epilogue(c2op,reglist);
1841 }
1842}
1843
1844static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1845{
1846 //value = value & 0x7ffff000;
1847 //if (value & 0x7f87e000) value |= 0x80000000;
1848 emit_andimm(sl, 0x7fffe000, temp);
1849 emit_testimm(temp, 0xff87ffff);
1850 emit_andimm(sl, 0x7ffff000, temp);
1851 host_tempreg_acquire();
1852 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1853 emit_cmovne_reg(HOST_TEMPREG, temp);
1854 host_tempreg_release();
1855 assert(0); // testing needed
1856}
1857
1858static void do_mfc2_31_one(u_int copr,signed char temp)
1859{
1860 emit_readshword(&reg_cop2d[copr],temp);
1861 emit_bicsar_imm(temp,31,temp);
1862 emit_cmpimm(temp,0xf80);
1863 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1864 emit_andimm(temp,0xf80,temp);
1865}
1866
1867static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1868{
1869 if (temp < 0) {
1870 host_tempreg_acquire();
1871 temp = HOST_TEMPREG;
1872 }
1873 do_mfc2_31_one(9,temp);
1874 emit_shrimm(temp,7,tl);
1875 do_mfc2_31_one(10,temp);
1876 emit_orrshr_imm(temp,2,tl);
1877 do_mfc2_31_one(11,temp);
1878 emit_orrshl_imm(temp,3,tl);
1879 emit_writeword(tl,&reg_cop2d[29]);
1880
1881 if (temp == HOST_TEMPREG)
1882 host_tempreg_release();
1883}
1884
1885static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1886{
1887 // case 0x18: MULT
1888 // case 0x19: MULTU
1889 // case 0x1A: DIV
1890 // case 0x1B: DIVU
1891 if(dops[i].rs1&&dops[i].rs2)
1892 {
1893 switch(dops[i].opcode2)
1894 {
1895 case 0x18: // MULT
1896 case 0x19: // MULTU
1897 {
1898 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1899 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1900 signed char hi=get_reg(i_regs->regmap,HIREG);
1901 signed char lo=get_reg(i_regs->regmap,LOREG);
1902 assert(m1>=0);
1903 assert(m2>=0);
1904 assert(hi>=0);
1905 assert(lo>=0);
1906
1907 if(dops[i].opcode2==0x18) // MULT
1908 emit_smull(m1,m2,hi);
1909 else // MULTU
1910 emit_umull(m1,m2,hi);
1911
1912 emit_mov(hi,lo);
1913 emit_shrimm64(hi,32,hi);
1914 break;
1915 }
1916 case 0x1A: // DIV
1917 case 0x1B: // DIVU
1918 {
1919 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1920 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1921 signed char quotient=get_reg(i_regs->regmap,LOREG);
1922 signed char remainder=get_reg(i_regs->regmap,HIREG);
1923 assert(numerator>=0);
1924 assert(denominator>=0);
1925 assert(quotient>=0);
1926 assert(remainder>=0);
1927
1928 if (dops[i].opcode2 == 0x1A) // DIV
1929 emit_sdiv(numerator,denominator,quotient);
1930 else // DIVU
1931 emit_udiv(numerator,denominator,quotient);
1932 emit_msub(quotient,denominator,numerator,remainder);
1933
1934 // div 0 quotient (remainder is already correct)
1935 host_tempreg_acquire();
1936 if (dops[i].opcode2 == 0x1A) // DIV
1937 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1938 else
1939 emit_movimm(~0,HOST_TEMPREG);
1940 emit_test(denominator,denominator);
1941 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1942 host_tempreg_release();
1943 break;
1944 }
1945 default:
1946 assert(0);
1947 }
1948 }
1949 else
1950 {
1951 signed char hr=get_reg(i_regs->regmap,HIREG);
1952 signed char lr=get_reg(i_regs->regmap,LOREG);
1953 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1954 {
1955 if (dops[i].rs1) {
1956 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1957 assert(numerator >= 0);
1958 if (hr >= 0)
1959 emit_mov(numerator,hr);
1960 if (lr >= 0) {
1961 if (dops[i].opcode2 == 0x1A) // DIV
1962 emit_sub_asrimm(0,numerator,31,lr);
1963 else
1964 emit_movimm(~0,lr);
1965 }
1966 }
1967 else {
1968 if (hr >= 0) emit_zeroreg(hr);
1969 if (lr >= 0) emit_movimm(~0,lr);
1970 }
1971 }
1972 else
1973 {
1974 // Multiply by zero is zero.
1975 if (hr >= 0) emit_zeroreg(hr);
1976 if (lr >= 0) emit_zeroreg(lr);
1977 }
1978 }
1979}
1980#define multdiv_assemble multdiv_assemble_arm64
1981
1982static void do_jump_vaddr(u_int rs)
1983{
1984 if (rs != 0)
1985 emit_mov(rs, 0);
1986 emit_far_call(get_addr_ht);
1987 emit_jmpreg(0);
1988}
1989
1990static void do_preload_rhash(u_int r) {
1991 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1992 // register. On ARM the hash can be done with a single instruction (below)
1993}
1994
1995static void do_preload_rhtbl(u_int ht) {
1996 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1997}
1998
1999static void do_rhash(u_int rs,u_int rh) {
2000 emit_andimm(rs, 0xf8, rh);
2001}
2002
2003static void do_miniht_load(int ht, u_int rh) {
2004 emit_add64(ht, rh, ht);
2005 emit_ldst(0, 0, rh, ht, 0);
2006}
2007
2008static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
2009 emit_cmp(rh, rs);
2010 void *jaddr = out;
2011 emit_jeq(0);
2012 do_jump_vaddr(rs);
2013
2014 set_jump_target(jaddr, out);
2015 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2016 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2017 emit_jmpreg(ht);
2018}
2019
2020// parsed by set_jump_target?
2021static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
2022 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2023 emit_movk(return_address&0xffff,rt);
2024 add_to_linker(out,return_address,1);
2025 emit_adr(out,temp);
2026 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2027 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2028}
2029
2030static void clear_cache_arm64(char *start, char *end)
2031{
2032 // Don't rely on GCC's __clear_cache implementation, as it caches
2033 // icache/dcache cache line sizes, that can vary between cores on
2034 // big.LITTLE architectures.
2035 uint64_t addr, ctr_el0;
2036 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
2037 size_t isize, dsize;
2038
2039 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
2040 isize = 4 << ((ctr_el0 >> 0) & 0xf);
2041 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
2042
2043 // use the global minimum cache line size
2044 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
2045 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
2046
2047 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
2048 not required for instruction to data coherence. */
2049 if ((ctr_el0 & (1 << 28)) == 0x0) {
2050 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
2051 for (; addr < (uint64_t)end; addr += dsize)
2052 // use "civac" instead of "cvau", as this is the suggested workaround for
2053 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
2054 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
2055 }
2056 __asm__ volatile("dsb ish" : : : "memory");
2057
2058 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
2059 Unification is not required for instruction to data coherence. */
2060 if ((ctr_el0 & (1 << 29)) == 0x0) {
2061 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
2062 for (; addr < (uint64_t)end; addr += isize)
2063 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
2064
2065 __asm__ volatile("dsb ish" : : : "memory");
2066 }
2067
2068 __asm__ volatile("isb" : : : "memory");
2069}
2070
2071// CPU-architecture-specific initialization
2072static void arch_init(void)
2073{
2074 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
2075 struct tramp_insns *ops = ndrc->tramp.ops;
2076 size_t i;
2077 assert(!(diff & 3));
2078 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2079 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
2080 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
2081 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
2082 }
2083 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2084}
2085
2086// vim:shiftwidth=2:expandtab