drc: simplify cache flush for some platforms
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26#define unused __attribute__((unused))
27
28void do_memhandler_pre();
29void do_memhandler_post();
30
31/* Linker */
32static void set_jump_target(void *addr, void *target)
33{
34 u_int *ptr = addr;
35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
37 ptr += ndrc_write_ofs / sizeof(ptr[0]);
38
39 if ((*ptr&0xFC000000) == 0x14000000) { // b
40 assert(offset>=-134217728LL&&offset<134217728LL);
41 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
42 }
43 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
44 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
45 // Conditional branch are limited to +/- 1MB
46 // block max size is 256k so branching beyond the +/- 1MB limit
47 // should only happen when jumping to an already compiled block (see add_jump_out)
48 // a workaround would be to do a trampoline jump via a stub at the end of the block
49 assert(-1048576 <= offset && offset < 1048576);
50 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
51 }
52 else if((*ptr&0x9f000000)==0x10000000) { // adr
53 // generated by do_miniht_insert
54 assert(offset>=-1048576LL&&offset<1048576LL);
55 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
56 }
57 else
58 abort(); // should not happen
59}
60
61// from a pointer to external jump stub (which was produced by emit_extjump2)
62// find where the jumping insn is
63static void *find_extjump_insn(void *stub)
64{
65 int *ptr = (int *)stub + 2;
66 assert((*ptr&0x9f000000) == 0x10000000); // adr
67 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
68 return ptr + offset / 4;
69}
70
71#if 0
72// find where external branch is liked to using addr of it's stub:
73// get address that the stub loads (dyna_linker arg1),
74// treat it as a pointer to branch insn,
75// return addr where that branch jumps to
76static void *get_pointer(void *stub)
77{
78 int *i_ptr = find_extjump_insn(stub);
79 if ((*i_ptr&0xfc000000) == 0x14000000) // b
80 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
81 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
82 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
83 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
84 assert(0);
85 return NULL;
86}
87#endif
88
89// Allocate a specific ARM register.
90static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
91{
92 int n;
93 int dirty=0;
94
95 // see if it's already allocated (and dealloc it)
96 for(n=0;n<HOST_REGS;n++)
97 {
98 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
99 dirty=(cur->dirty>>n)&1;
100 cur->regmap[n]=-1;
101 }
102 }
103
104 cur->regmap[hr]=reg;
105 cur->dirty&=~(1<<hr);
106 cur->dirty|=dirty<<hr;
107 cur->isconst&=~(1<<hr);
108}
109
110// Alloc cycle count into dedicated register
111static void alloc_cc(struct regstat *cur,int i)
112{
113 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
114}
115
116/* Special alloc */
117
118
119/* Assembler */
120
121static unused const char *regname[32] = {
122 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
123 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
124 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
125 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
126};
127
128static unused const char *regname64[32] = {
129 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
130 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
131 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
132 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
133};
134
135enum {
136 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
137 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
138};
139
140static unused const char *condname[16] = {
141 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
142 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
143};
144
145static void output_w32(u_int word)
146{
147 *((u_int *)(out + ndrc_write_ofs)) = word;
148 out += 4;
149}
150
151static u_int rn_rd(u_int rn, u_int rd)
152{
153 assert(rn < 31);
154 assert(rd < 31);
155 return (rn << 5) | rd;
156}
157
158static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
159{
160 assert(rm < 32);
161 assert(rn < 32);
162 assert(rd < 32);
163 return (rm << 16) | (rn << 5) | rd;
164}
165
166static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
167{
168 assert(ra < 32);
169 return rm_rn_rd(rm, rn, rd) | (ra << 10);
170}
171
172static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
173{
174 assert(imm7 < 0x80);
175 assert(rt2 < 31);
176 assert(rn < 32);
177 assert(rt < 31);
178 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
179}
180
181static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
182{
183 assert(imm6 <= 63);
184 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
185}
186
187static u_int imm16_rd(u_int imm16, u_int rd)
188{
189 assert(imm16 < 0x10000);
190 assert(rd < 31);
191 return (imm16 << 5) | rd;
192}
193
194static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
195{
196 assert(imm12 < 0x1000);
197 assert(rn < 32);
198 assert(rd < 32);
199 return (imm12 << 10) | (rn << 5) | rd;
200}
201
202static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
203{
204 assert(imm9 < 0x200);
205 assert(rn < 31);
206 assert(rd < 31);
207 return (imm9 << 12) | (rn << 5) | rd;
208}
209
210static u_int imm19_rt(u_int imm19, u_int rt)
211{
212 assert(imm19 < 0x80000);
213 assert(rt < 31);
214 return (imm19 << 5) | rt;
215}
216
217static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
218{
219 assert(n < 2);
220 assert(immr < 0x40);
221 assert(imms < 0x40);
222 assert(rn < 32);
223 assert(rd < 32);
224 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
225}
226
227static u_int genjmp(const u_char *addr)
228{
229 intptr_t offset = addr - out;
230 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
231 if (offset < -134217728 || offset > 134217727) {
232 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
233 abort();
234 return 0;
235 }
236 return ((u_int)offset >> 2) & 0x03ffffff;
237}
238
239static u_int genjmpcc(const u_char *addr)
240{
241 intptr_t offset = addr - out;
242 if ((uintptr_t)addr < 3) return 0;
243 if (offset < -1048576 || offset > 1048572) {
244 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
245 abort();
246 return 0;
247 }
248 return ((u_int)offset >> 2) & 0x7ffff;
249}
250
251static uint32_t is_mask(u_int value)
252{
253 return value && ((value + 1) & value) == 0;
254}
255
256// This function returns true if the argument contains a
257// non-empty sequence of ones (possibly rotated) with the remainder zero.
258static uint32_t is_rotated_mask(u_int value)
259{
260 if (value == 0 || value == ~0)
261 return 0;
262 if (is_mask((value - 1) | value))
263 return 1;
264 return is_mask((~value - 1) | ~value);
265}
266
267static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
268{
269 int lzeros, tzeros, ones;
270 assert(value != 0);
271 if (is_mask((value - 1) | value)) {
272 lzeros = __builtin_clz(value);
273 tzeros = __builtin_ctz(value);
274 ones = 32 - lzeros - tzeros;
275 *immr = (32 - tzeros) & 31;
276 *imms = ones - 1;
277 return;
278 }
279 value = ~value;
280 if (is_mask((value - 1) | value)) {
281 lzeros = __builtin_clz(value);
282 tzeros = __builtin_ctz(value);
283 ones = 32 - lzeros - tzeros;
284 *immr = lzeros;
285 *imms = 31 - ones;
286 return;
287 }
288 abort();
289}
290
291static void emit_mov(u_int rs, u_int rt)
292{
293 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
294 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
295}
296
297static void emit_mov64(u_int rs, u_int rt)
298{
299 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
300 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
301}
302
303static void emit_add(u_int rs1, u_int rs2, u_int rt)
304{
305 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
306 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
307}
308
309static void emit_add64(u_int rs1, u_int rs2, u_int rt)
310{
311 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
312 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
313}
314
315static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
316{
317 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
318 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
319}
320#define emit_adds_ptr emit_adds64
321
322static void emit_neg(u_int rs, u_int rt)
323{
324 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
325 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
326}
327
328static void emit_sub(u_int rs1, u_int rs2, u_int rt)
329{
330 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
331 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
332}
333
334static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
335{
336 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
337 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
338}
339
340static void emit_movz(u_int imm, u_int rt)
341{
342 assem_debug("movz %s,#%#x\n", regname[rt], imm);
343 output_w32(0x52800000 | imm16_rd(imm, rt));
344}
345
346static void emit_movz_lsl16(u_int imm, u_int rt)
347{
348 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
349 output_w32(0x52a00000 | imm16_rd(imm, rt));
350}
351
352static void emit_movn(u_int imm, u_int rt)
353{
354 assem_debug("movn %s,#%#x\n", regname[rt], imm);
355 output_w32(0x12800000 | imm16_rd(imm, rt));
356}
357
358static void emit_movn_lsl16(u_int imm,u_int rt)
359{
360 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
361 output_w32(0x12a00000 | imm16_rd(imm, rt));
362}
363
364static void emit_movk(u_int imm,u_int rt)
365{
366 assem_debug("movk %s,#%#x\n", regname[rt], imm);
367 output_w32(0x72800000 | imm16_rd(imm, rt));
368}
369
370static void emit_movk_lsl16(u_int imm,u_int rt)
371{
372 assert(imm<65536);
373 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
374 output_w32(0x72a00000 | imm16_rd(imm, rt));
375}
376
377static void emit_zeroreg(u_int rt)
378{
379 emit_movz(0, rt);
380}
381
382static void emit_movimm(u_int imm, u_int rt)
383{
384 if (imm < 65536)
385 emit_movz(imm, rt);
386 else if ((~imm) < 65536)
387 emit_movn(~imm, rt);
388 else if ((imm&0xffff) == 0)
389 emit_movz_lsl16(imm >> 16, rt);
390 else if (((~imm)&0xffff) == 0)
391 emit_movn_lsl16(~imm >> 16, rt);
392 else if (is_rotated_mask(imm)) {
393 u_int immr, imms;
394 gen_logical_imm(imm, &immr, &imms);
395 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
396 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
397 }
398 else {
399 emit_movz(imm & 0xffff, rt);
400 emit_movk_lsl16(imm >> 16, rt);
401 }
402}
403
404static void emit_readword(void *addr, u_int rt)
405{
406 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
407 if (!(offset & 3) && offset <= 16380) {
408 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
409 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
410 }
411 else
412 abort();
413}
414
415static void emit_readdword(void *addr, u_int rt)
416{
417 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
418 if (!(offset & 7) && offset <= 32760) {
419 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
420 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
421 }
422 else
423 abort();
424}
425#define emit_readptr emit_readdword
426
427static void emit_readshword(void *addr, u_int rt)
428{
429 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
430 if (!(offset & 1) && offset <= 8190) {
431 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
432 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
433 }
434 else
435 assert(0);
436}
437
438static void emit_loadreg(u_int r, u_int hr)
439{
440 int is64 = 0;
441 if (r == 0)
442 emit_zeroreg(hr);
443 else {
444 void *addr;
445 switch (r) {
446 //case HIREG: addr = &hi; break;
447 //case LOREG: addr = &lo; break;
448 case CCREG: addr = &cycle_count; break;
449 case CSREG: addr = &Status; break;
450 case INVCP: addr = &invc_ptr; is64 = 1; break;
451 case ROREG: addr = &ram_offset; is64 = 1; break;
452 default:
453 assert(r < 34);
454 addr = &psxRegs.GPR.r[r];
455 break;
456 }
457 if (is64)
458 emit_readdword(addr, hr);
459 else
460 emit_readword(addr, hr);
461 }
462}
463
464static void emit_writeword(u_int rt, void *addr)
465{
466 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
467 if (!(offset & 3) && offset <= 16380) {
468 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
469 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
470 }
471 else
472 assert(0);
473}
474
475static void emit_writedword(u_int rt, void *addr)
476{
477 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
478 if (!(offset & 7) && offset <= 32760) {
479 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
480 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
481 }
482 else
483 abort();
484}
485
486static void emit_storereg(u_int r, u_int hr)
487{
488 assert(r < 64);
489 void *addr = &psxRegs.GPR.r[r];
490 switch (r) {
491 //case HIREG: addr = &hi; break;
492 //case LOREG: addr = &lo; break;
493 case CCREG: addr = &cycle_count; break;
494 default: assert(r < 34); break;
495 }
496 emit_writeword(hr, addr);
497}
498
499static void emit_test(u_int rs, u_int rt)
500{
501 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
502 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
503}
504
505static void emit_testimm(u_int rs, u_int imm)
506{
507 u_int immr, imms;
508 assem_debug("tst %s,#%#x\n", regname[rs], imm);
509 assert(is_rotated_mask(imm)); // good enough for PCSX
510 gen_logical_imm(imm, &immr, &imms);
511 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
512}
513
514static void emit_not(u_int rs,u_int rt)
515{
516 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
517 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
518}
519
520static void emit_and(u_int rs1,u_int rs2,u_int rt)
521{
522 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
523 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
524}
525
526static void emit_or(u_int rs1,u_int rs2,u_int rt)
527{
528 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
529 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
530}
531
532static void emit_bic(u_int rs1,u_int rs2,u_int rt)
533{
534 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
535 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
536}
537
538static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
539{
540 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
541 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
542}
543
544static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
545{
546 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
547 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
548}
549
550static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
551{
552 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
553 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
554}
555
556static void emit_xor(u_int rs1,u_int rs2,u_int rt)
557{
558 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
559 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
560}
561
562static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
563{
564 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
565 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
566}
567
568static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
569{
570 unused const char *st = s ? "s" : "";
571 s = s ? 0x20000000 : 0;
572 is64 = is64 ? 0x80000000 : 0;
573 if (imm < 4096) {
574 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
575 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
576 }
577 else if (-imm < 4096) {
578 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
579 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
580 }
581 else if (imm < 16777216) {
582 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
583 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
584 if ((imm & 0xfff) || s) {
585 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
586 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
587 }
588 }
589 else if (-imm < 16777216) {
590 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
591 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
592 if ((imm & 0xfff) || s) {
593 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
594 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
595 }
596 }
597 else
598 abort();
599}
600
601static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
602{
603 emit_addimm_s(0, 0, rs, imm, rt);
604}
605
606static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
607{
608 emit_addimm_s(0, 1, rs, imm, rt);
609}
610
611static void emit_addimm_and_set_flags(int imm, u_int rt)
612{
613 emit_addimm_s(1, 0, rt, imm, rt);
614}
615
616static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
617{
618 const char *names[] = { "and", "orr", "eor", "ands" };
619 const char *name = names[op];
620 u_int immr, imms;
621 op = op << 29;
622 if (is_rotated_mask(imm)) {
623 gen_logical_imm(imm, &immr, &imms);
624 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
625 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
626 }
627 else {
628 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
629 host_tempreg_acquire();
630 emit_movimm(imm, HOST_TEMPREG);
631 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
632 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
633 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
634 host_tempreg_release();
635 }
636 (void)name;
637}
638
639static void emit_andimm(u_int rs, u_int imm, u_int rt)
640{
641 if (imm == 0)
642 emit_zeroreg(rt);
643 else
644 emit_logicop_imm(0, rs, imm, rt);
645}
646
647static void emit_orimm(u_int rs, u_int imm, u_int rt)
648{
649 if (imm == 0) {
650 if (rs != rt)
651 emit_mov(rs, rt);
652 }
653 else
654 emit_logicop_imm(1, rs, imm, rt);
655}
656
657static void emit_xorimm(u_int rs, u_int imm, u_int rt)
658{
659 if (imm == 0) {
660 if (rs != rt)
661 emit_mov(rs, rt);
662 }
663 else
664 emit_logicop_imm(2, rs, imm, rt);
665}
666
667static void emit_sbfm(u_int rs,u_int imm,u_int rt)
668{
669 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
670 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
671}
672
673static void emit_ubfm(u_int rs,u_int imm,u_int rt)
674{
675 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
676 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
677}
678
679static void emit_shlimm(u_int rs,u_int imm,u_int rt)
680{
681 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
682 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
683}
684
685static void emit_shrimm(u_int rs,u_int imm,u_int rt)
686{
687 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
688 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
689}
690
691static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
692{
693 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
694 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
695}
696
697static void emit_sarimm(u_int rs,u_int imm,u_int rt)
698{
699 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
700 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
701}
702
703static void emit_rorimm(u_int rs,u_int imm,u_int rt)
704{
705 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
706 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
707}
708
709static void emit_signextend16(u_int rs, u_int rt)
710{
711 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
712 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
713}
714
715static void emit_shl(u_int rs,u_int rshift,u_int rt)
716{
717 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
718 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
719}
720
721static void emit_shr(u_int rs,u_int rshift,u_int rt)
722{
723 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
724 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
725}
726
727static void emit_sar(u_int rs,u_int rshift,u_int rt)
728{
729 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
730 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
731}
732
733static void emit_cmpimm(u_int rs, u_int imm)
734{
735 if (imm < 4096) {
736 assem_debug("cmp %s,%#x\n", regname[rs], imm);
737 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
738 }
739 else if (-imm < 4096) {
740 assem_debug("cmn %s,%#x\n", regname[rs], imm);
741 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
742 }
743 else if (imm < 16777216 && !(imm & 0xfff)) {
744 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
745 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
746 }
747 else {
748 host_tempreg_acquire();
749 emit_movimm(imm, HOST_TEMPREG);
750 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
751 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
752 host_tempreg_release();
753 }
754}
755
756static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
757{
758 assert(imm == 0 || imm == 1);
759 assert(cond0 < 0x10);
760 assert(cond1 < 0x10);
761 if (imm) {
762 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
763 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
764 } else {
765 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
766 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
767 }
768}
769
770static void emit_cmovne_imm(u_int imm,u_int rt)
771{
772 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
773}
774
775static void emit_cmovl_imm(u_int imm,u_int rt)
776{
777 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
778}
779
780static void emit_cmovb_imm(int imm,u_int rt)
781{
782 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
783}
784
785static void emit_cmoveq_reg(u_int rs,u_int rt)
786{
787 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
788 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
789}
790
791static void emit_cmovne_reg(u_int rs,u_int rt)
792{
793 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
794 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
795}
796
797static void emit_cmovl_reg(u_int rs,u_int rt)
798{
799 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
800 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
801}
802
803static void emit_cmovb_reg(u_int rs,u_int rt)
804{
805 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
806 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
807}
808
809static void emit_cmovs_reg(u_int rs,u_int rt)
810{
811 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
812 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
813}
814
815static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
816{
817 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
818 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
819}
820
821static void emit_slti32(u_int rs,int imm,u_int rt)
822{
823 if(rs!=rt) emit_zeroreg(rt);
824 emit_cmpimm(rs,imm);
825 if(rs==rt) emit_movimm(0,rt);
826 emit_cmovl_imm(1,rt);
827}
828
829static void emit_sltiu32(u_int rs,int imm,u_int rt)
830{
831 if(rs!=rt) emit_zeroreg(rt);
832 emit_cmpimm(rs,imm);
833 if(rs==rt) emit_movimm(0,rt);
834 emit_cmovb_imm(1,rt);
835}
836
837static void emit_cmp(u_int rs,u_int rt)
838{
839 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
840 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
841}
842
843static void emit_cmpcs(u_int rs,u_int rt)
844{
845 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
846 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
847}
848
849static void emit_set_gz32(u_int rs, u_int rt)
850{
851 //assem_debug("set_gz32\n");
852 emit_cmpimm(rs,1);
853 emit_movimm(1,rt);
854 emit_cmovl_imm(0,rt);
855}
856
857static void emit_set_nz32(u_int rs, u_int rt)
858{
859 //assem_debug("set_nz32\n");
860 if(rs!=rt) emit_mov(rs,rt);
861 emit_test(rs,rs);
862 emit_cmovne_imm(1,rt);
863}
864
865static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
866{
867 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
868 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
869 emit_cmp(rs1,rs2);
870 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
871 emit_cmovl_imm(1,rt);
872}
873
874static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
875{
876 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
878 emit_cmp(rs1,rs2);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovb_imm(1,rt);
881}
882
883static int can_jump_or_call(const void *a)
884{
885 intptr_t diff = (u_char *)a - out;
886 return (-134217728 <= diff && diff <= 134217727);
887}
888
889static void emit_call(const void *a)
890{
891 intptr_t diff = (u_char *)a - out;
892 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
893 assert(!(diff & 3));
894 if (-134217728 <= diff && diff <= 134217727)
895 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
896 else
897 abort();
898}
899
900static void emit_jmp(const void *a)
901{
902 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
903 u_int offset = genjmp(a);
904 output_w32(0x14000000 | offset);
905}
906
907static void emit_jne(const void *a)
908{
909 assem_debug("bne %p\n", a);
910 u_int offset = genjmpcc(a);
911 output_w32(0x54000000 | (offset << 5) | COND_NE);
912}
913
914static void emit_jeq(const void *a)
915{
916 assem_debug("beq %p\n", a);
917 u_int offset = genjmpcc(a);
918 output_w32(0x54000000 | (offset << 5) | COND_EQ);
919}
920
921static void emit_js(const void *a)
922{
923 assem_debug("bmi %p\n", a);
924 u_int offset = genjmpcc(a);
925 output_w32(0x54000000 | (offset << 5) | COND_MI);
926}
927
928static void emit_jns(const void *a)
929{
930 assem_debug("bpl %p\n", a);
931 u_int offset = genjmpcc(a);
932 output_w32(0x54000000 | (offset << 5) | COND_PL);
933}
934
935static void emit_jl(const void *a)
936{
937 assem_debug("blt %p\n", a);
938 u_int offset = genjmpcc(a);
939 output_w32(0x54000000 | (offset << 5) | COND_LT);
940}
941
942static void emit_jge(const void *a)
943{
944 assem_debug("bge %p\n", a);
945 u_int offset = genjmpcc(a);
946 output_w32(0x54000000 | (offset << 5) | COND_GE);
947}
948
949static void emit_jno(const void *a)
950{
951 assem_debug("bvc %p\n", a);
952 u_int offset = genjmpcc(a);
953 output_w32(0x54000000 | (offset << 5) | COND_VC);
954}
955
956static void emit_jc(const void *a)
957{
958 assem_debug("bcs %p\n", a);
959 u_int offset = genjmpcc(a);
960 output_w32(0x54000000 | (offset << 5) | COND_CS);
961}
962
963static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
964{
965 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
966 u_int offset = genjmpcc(a);
967 is64 = is64 ? 0x80000000 : 0;
968 isnz = isnz ? 0x01000000 : 0;
969 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
970}
971
972static unused void emit_cbz(const void *a, u_int r)
973{
974 emit_cb(0, 0, a, r);
975}
976
977static void emit_jmpreg(u_int r)
978{
979 assem_debug("br %s\n", regname64[r]);
980 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
981}
982
983static void emit_retreg(u_int r)
984{
985 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
986 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
987}
988
989static void emit_ret(void)
990{
991 emit_retreg(LR);
992}
993
994static void emit_adr(void *addr, u_int rt)
995{
996 intptr_t offset = (u_char *)addr - out;
997 assert(-1048576 <= offset && offset < 1048576);
998 assert(rt < 31);
999 assem_debug("adr x%d,#%#lx\n", rt, offset);
1000 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1001}
1002
1003static void emit_adrp(void *addr, u_int rt)
1004{
1005 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1006 assert(-4294967296l <= offset && offset < 4294967296l);
1007 assert(rt < 31);
1008 offset >>= 12;
1009 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1010 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1011}
1012
1013static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1014{
1015 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1016 assert(-256 <= offset && offset < 256);
1017 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1018}
1019
1020static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1021{
1022 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1023 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1024}
1025
1026static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1027{
1028 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1030}
1031
1032static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1033{
1034 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1036}
1037
1038static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1039{
1040 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1042}
1043#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1044
1045static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1061}
1062
1063static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1064{
1065 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1066 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1067}
1068
1069static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1070{
1071 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1072 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1073}
1074
1075static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1076{
1077 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1080}
1081
1082static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1083{
1084 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1087}
1088
1089static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1090{
1091 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1094}
1095
1096static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1097{
1098 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1099 assert(-256 <= offset && offset < 256);
1100 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1101}
1102
1103static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1104{
1105 if (!(offset & 3) && (u_int)offset <= 16380) {
1106 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1107 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1108 }
1109 else if (-256 <= offset && offset < 256) {
1110 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1111 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1112 }
1113 else
1114 assert(0);
1115}
1116
1117static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1118{
1119 if (!(offset & 1) && (u_int)offset <= 8190) {
1120 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1121 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1122 }
1123 else if (-256 <= offset && offset < 256) {
1124 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1125 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1126 }
1127 else
1128 assert(0);
1129}
1130
1131static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1132{
1133 if ((u_int)offset < 4096) {
1134 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1135 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1136 }
1137 else if (-256 <= offset && offset < 256) {
1138 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1139 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1140 }
1141 else
1142 assert(0);
1143}
1144
1145static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1146{
1147 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1149}
1150
1151static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1152{
1153 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1154 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1155}
1156
1157static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1158{
1159 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1160 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1161}
1162
1163static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1164{
1165 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1167}
1168
1169static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1170{
1171 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1173}
1174
1175static void emit_clz(u_int rs, u_int rt)
1176{
1177 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1178 output_w32(0x5ac01000 | rn_rd(rs, rt));
1179}
1180
1181// special case for checking invalid_code
1182static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1183{
1184 host_tempreg_acquire();
1185 emit_shrimm(r, 12, HOST_TEMPREG);
1186 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1187 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1188 emit_cmpimm(HOST_TEMPREG, imm);
1189 host_tempreg_release();
1190}
1191
1192// special for loadlr_assemble, rs2 is destroyed
1193static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1194{
1195 emit_shl(rs2, shift, rs2);
1196 emit_bic(rs1, rs2, rt);
1197}
1198
1199static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1200{
1201 emit_shr(rs2, shift, rs2);
1202 emit_bic(rs1, rs2, rt);
1203}
1204
1205static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1206{
1207 u_int op = 0xb9000000;
1208 unused const char *ldst = is_st ? "st" : "ld";
1209 unused char rp = is64 ? 'x' : 'w';
1210 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1211 is64 = is64 ? 1 : 0;
1212 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1213 ofs = (ofs >> (2+is64));
1214 if (!is_st) op |= 0x00400000;
1215 if (is64) op |= 0x40000000;
1216 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1217}
1218
1219static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1220{
1221 u_int op = 0x29000000;
1222 unused const char *ldst = is_st ? "st" : "ld";
1223 unused char rp = is64 ? 'x' : 'w';
1224 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1225 is64 = is64 ? 1 : 0;
1226 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1227 ofs = (ofs >> (2+is64));
1228 assert(-64 <= ofs && ofs <= 63);
1229 ofs &= 0x7f;
1230 if (!is_st) op |= 0x00400000;
1231 if (is64) op |= 0x80000000;
1232 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1233}
1234
1235static void save_load_regs_all(int is_store, u_int reglist)
1236{
1237 int ofs = 0, c = 0;
1238 u_int r, pair[2];
1239 for (r = 0; reglist; r++, reglist >>= 1) {
1240 if (reglist & 1)
1241 pair[c++] = r;
1242 if (c == 2) {
1243 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1244 ofs += 8 * 2;
1245 c = 0;
1246 }
1247 }
1248 if (c) {
1249 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1250 ofs += 8;
1251 }
1252 assert(ofs <= SSP_CALLER_REGS);
1253}
1254
1255// Save registers before function call
1256static void save_regs(u_int reglist)
1257{
1258 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1259 save_load_regs_all(1, reglist);
1260}
1261
1262// Restore registers after function call
1263static void restore_regs(u_int reglist)
1264{
1265 reglist &= CALLER_SAVE_REGS;
1266 save_load_regs_all(0, reglist);
1267}
1268
1269/* Stubs/epilogue */
1270
1271static void literal_pool(int n)
1272{
1273 (void)literals;
1274}
1275
1276static void literal_pool_jumpover(int n)
1277{
1278}
1279
1280// parsed by get_pointer, find_extjump_insn
1281static void emit_extjump(u_char *addr, u_int target)
1282{
1283 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1284
1285 emit_movz(target & 0xffff, 0);
1286 emit_movk_lsl16(target >> 16, 0);
1287
1288 // addr is in the current recompiled block (max 256k)
1289 // offset shouldn't exceed +/-1MB
1290 emit_adr(addr, 1);
1291 emit_far_jump(dyna_linker);
1292}
1293
1294static void check_extjump2(void *src)
1295{
1296 u_int *ptr = src;
1297 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1298 (void)ptr;
1299}
1300
1301// put rt_val into rt, potentially making use of rs with value rs_val
1302static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1303{
1304 int diff = rt_val - rs_val;
1305 if ((-4096 < diff && diff < 4096)
1306 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1307 emit_addimm(rs, diff, rt);
1308 else if (rt_val == ~rs_val)
1309 emit_not(rs, rt);
1310 else if (is_rotated_mask(rs_val ^ rt_val))
1311 emit_xorimm(rs, rs_val ^ rt_val, rt);
1312 else
1313 emit_movimm(rt_val, rt);
1314}
1315
1316// return 1 if the above function can do it's job cheaply
1317static int is_similar_value(u_int v1, u_int v2)
1318{
1319 int diff = v1 - v2;
1320 return (-4096 < diff && diff < 4096)
1321 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1322 || v1 == ~v2
1323 || is_rotated_mask(v1 ^ v2);
1324}
1325
1326static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1327{
1328 if (rt_val < 0x100000000ull) {
1329 emit_movimm_from(rs_val, rs, rt_val, rt);
1330 return;
1331 }
1332 // just move the whole thing. At least on Linux all addresses
1333 // seem to be 48bit, so 3 insns - not great not terrible
1334 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1335 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1336 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1337 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1338 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1339 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1340 if (rt_val >> 48) {
1341 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1342 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1343 }
1344}
1345
1346// trashes x2
1347static void pass_args64(u_int a0, u_int a1)
1348{
1349 if(a0==1&&a1==0) {
1350 // must swap
1351 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1352 }
1353 else if(a0!=0&&a1==0) {
1354 emit_mov64(a1,1);
1355 if (a0>=0) emit_mov64(a0,0);
1356 }
1357 else {
1358 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1359 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1360 }
1361}
1362
1363static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1364{
1365 switch(type) {
1366 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1367 case LOADBU_STUB:
1368 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1369 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1370 case LOADHU_STUB:
1371 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1372 case LOADW_STUB:
1373 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1374 default: assert(0);
1375 }
1376}
1377
1378#include "pcsxmem.h"
1379//#include "pcsxmem_inline.c"
1380
1381static void do_readstub(int n)
1382{
1383 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1384 set_jump_target(stubs[n].addr, out);
1385 enum stub_type type = stubs[n].type;
1386 int i = stubs[n].a;
1387 int rs = stubs[n].b;
1388 const struct regstat *i_regs = (void *)stubs[n].c;
1389 u_int reglist = stubs[n].e;
1390 const signed char *i_regmap = i_regs->regmap;
1391 int rt;
1392 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1393 rt=get_reg(i_regmap,FTEMP);
1394 }else{
1395 rt=get_reg(i_regmap,dops[i].rt1);
1396 }
1397 assert(rs>=0);
1398 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1399 void *restore_jump = NULL, *handler_jump = NULL;
1400 reglist|=(1<<rs);
1401 for (r = 0; r < HOST_CCREG; r++) {
1402 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1403 temp = r;
1404 break;
1405 }
1406 }
1407 if(rt>=0&&dops[i].rt1!=0)
1408 reglist&=~(1<<rt);
1409 if(temp==-1) {
1410 save_regs(reglist);
1411 regs_saved=1;
1412 temp=(rs==0)?2:0;
1413 }
1414 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1415 temp2=1;
1416 emit_readdword(&mem_rtab,temp);
1417 emit_shrimm(rs,12,temp2);
1418 emit_readdword_dualindexedx8(temp,temp2,temp2);
1419 emit_adds64(temp2,temp2,temp2);
1420 handler_jump=out;
1421 emit_jc(0);
1422 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1423 switch(type) {
1424 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1425 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1426 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1427 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1428 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1429 default: assert(0);
1430 }
1431 }
1432 if(regs_saved) {
1433 restore_jump=out;
1434 emit_jmp(0); // jump to reg restore
1435 }
1436 else
1437 emit_jmp(stubs[n].retaddr); // return address
1438 set_jump_target(handler_jump, out);
1439
1440 if(!regs_saved)
1441 save_regs(reglist);
1442 void *handler=NULL;
1443 if(type==LOADB_STUB||type==LOADBU_STUB)
1444 handler=jump_handler_read8;
1445 if(type==LOADH_STUB||type==LOADHU_STUB)
1446 handler=jump_handler_read16;
1447 if(type==LOADW_STUB)
1448 handler=jump_handler_read32;
1449 assert(handler);
1450 pass_args64(rs,temp2);
1451 int cc=get_reg(i_regmap,CCREG);
1452 if(cc<0)
1453 emit_loadreg(CCREG,2);
1454 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1455 emit_far_call(handler);
1456 // (no cycle reload after read)
1457 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1458 loadstore_extend(type,0,rt);
1459 }
1460 if(restore_jump)
1461 set_jump_target(restore_jump, out);
1462 restore_regs(reglist);
1463 emit_jmp(stubs[n].retaddr);
1464}
1465
1466static void inline_readstub(enum stub_type type, int i, u_int addr,
1467 const signed char regmap[], int target, int adj, u_int reglist)
1468{
1469 int rs=get_reg(regmap,target);
1470 int rt=get_reg(regmap,target);
1471 if(rs<0) rs=get_reg_temp(regmap);
1472 assert(rs>=0);
1473 u_int is_dynamic=0;
1474 uintptr_t host_addr = 0;
1475 void *handler;
1476 int cc=get_reg(regmap,CCREG);
1477 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1478 // return;
1479 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1480 if (handler == NULL) {
1481 if(rt<0||dops[i].rt1==0)
1482 return;
1483 if (addr != host_addr)
1484 emit_movimm_from64(addr, rs, host_addr, rs);
1485 switch(type) {
1486 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1487 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1488 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1489 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1490 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1491 default: assert(0);
1492 }
1493 return;
1494 }
1495 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1496 if (is_dynamic) {
1497 if(type==LOADB_STUB||type==LOADBU_STUB)
1498 handler=jump_handler_read8;
1499 if(type==LOADH_STUB||type==LOADHU_STUB)
1500 handler=jump_handler_read16;
1501 if(type==LOADW_STUB)
1502 handler=jump_handler_read32;
1503 }
1504
1505 // call a memhandler
1506 if(rt>=0&&dops[i].rt1!=0)
1507 reglist&=~(1<<rt);
1508 save_regs(reglist);
1509 if(target==0)
1510 emit_movimm(addr,0);
1511 else if(rs!=0)
1512 emit_mov(rs,0);
1513 if(cc<0)
1514 emit_loadreg(CCREG,2);
1515 emit_addimm(cc<0?2:cc,adj,2);
1516 if(is_dynamic) {
1517 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1518 emit_adrp((void *)l1, 1);
1519 emit_addimm64(1, l1 & 0xfff, 1);
1520 }
1521 else
1522 emit_far_call(do_memhandler_pre);
1523
1524 emit_far_call(handler);
1525
1526 // (no cycle reload after read)
1527 if(rt>=0&&dops[i].rt1!=0)
1528 loadstore_extend(type, 0, rt);
1529 restore_regs(reglist);
1530}
1531
1532static void do_writestub(int n)
1533{
1534 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1535 set_jump_target(stubs[n].addr, out);
1536 enum stub_type type=stubs[n].type;
1537 int i=stubs[n].a;
1538 int rs=stubs[n].b;
1539 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1540 u_int reglist=stubs[n].e;
1541 signed char *i_regmap=i_regs->regmap;
1542 int rt,r;
1543 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1544 rt=get_reg(i_regmap,r=FTEMP);
1545 }else{
1546 rt=get_reg(i_regmap,r=dops[i].rs2);
1547 }
1548 assert(rs>=0);
1549 assert(rt>=0);
1550 int rtmp,temp=-1,temp2,regs_saved=0;
1551 void *restore_jump = NULL, *handler_jump = NULL;
1552 int reglist2=reglist|(1<<rs)|(1<<rt);
1553 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1554 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1555 temp = rtmp;
1556 break;
1557 }
1558 }
1559 if(temp==-1) {
1560 save_regs(reglist);
1561 regs_saved=1;
1562 for(rtmp=0;rtmp<=3;rtmp++)
1563 if(rtmp!=rs&&rtmp!=rt)
1564 {temp=rtmp;break;}
1565 }
1566 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1567 temp2=3;
1568 else {
1569 host_tempreg_acquire();
1570 temp2=HOST_TEMPREG;
1571 }
1572 emit_readdword(&mem_wtab,temp);
1573 emit_shrimm(rs,12,temp2);
1574 emit_readdword_dualindexedx8(temp,temp2,temp2);
1575 emit_adds64(temp2,temp2,temp2);
1576 handler_jump=out;
1577 emit_jc(0);
1578 switch(type) {
1579 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1580 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1581 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1582 default: assert(0);
1583 }
1584 if(regs_saved) {
1585 restore_jump=out;
1586 emit_jmp(0); // jump to reg restore
1587 }
1588 else
1589 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1590 set_jump_target(handler_jump, out);
1591
1592 if(!regs_saved)
1593 save_regs(reglist);
1594 void *handler=NULL;
1595 switch(type) {
1596 case STOREB_STUB: handler=jump_handler_write8; break;
1597 case STOREH_STUB: handler=jump_handler_write16; break;
1598 case STOREW_STUB: handler=jump_handler_write32; break;
1599 default: assert(0);
1600 }
1601 assert(handler);
1602 pass_args(rs,rt);
1603 if(temp2!=3) {
1604 emit_mov64(temp2,3);
1605 host_tempreg_release();
1606 }
1607 int cc=get_reg(i_regmap,CCREG);
1608 if(cc<0)
1609 emit_loadreg(CCREG,2);
1610 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1611 // returns new cycle_count
1612 emit_far_call(handler);
1613 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1614 if(cc<0)
1615 emit_storereg(CCREG,2);
1616 if(restore_jump)
1617 set_jump_target(restore_jump, out);
1618 restore_regs(reglist);
1619 emit_jmp(stubs[n].retaddr);
1620}
1621
1622static void inline_writestub(enum stub_type type, int i, u_int addr,
1623 const signed char regmap[], int target, int adj, u_int reglist)
1624{
1625 int rs = get_reg_temp(regmap);
1626 int rt = get_reg(regmap,target);
1627 assert(rs >= 0);
1628 assert(rt >= 0);
1629 uintptr_t host_addr = 0;
1630 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1631 if (handler == NULL) {
1632 if (addr != host_addr)
1633 emit_movimm_from64(addr, rs, host_addr, rs);
1634 switch (type) {
1635 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1636 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1637 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1638 default: assert(0);
1639 }
1640 return;
1641 }
1642
1643 // call a memhandler
1644 save_regs(reglist);
1645 emit_writeword(rs, &address); // some handlers still need it
1646 loadstore_extend(type, rt, 0);
1647 int cc, cc_use;
1648 cc = cc_use = get_reg(regmap, CCREG);
1649 if (cc < 0)
1650 emit_loadreg(CCREG, (cc_use = 2));
1651 emit_addimm(cc_use, adj, 2);
1652
1653 emit_far_call(do_memhandler_pre);
1654 emit_far_call(handler);
1655 emit_far_call(do_memhandler_post);
1656 emit_addimm(0, -adj, cc_use);
1657 if (cc < 0)
1658 emit_storereg(CCREG, cc_use);
1659 restore_regs(reglist);
1660}
1661
1662/* Special assem */
1663
1664static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1665{
1666 save_load_regs_all(1, reglist);
1667 cop2_do_stall_check(op, i, i_regs, 0);
1668#ifdef PCNT
1669 emit_movimm(op, 0);
1670 emit_far_call(pcnt_gte_start);
1671#endif
1672 // pointer to cop2 regs
1673 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1674}
1675
1676static void c2op_epilogue(u_int op,u_int reglist)
1677{
1678#ifdef PCNT
1679 emit_movimm(op, 0);
1680 emit_far_call(pcnt_gte_end);
1681#endif
1682 save_load_regs_all(0, reglist);
1683}
1684
1685static void c2op_assemble(int i, const struct regstat *i_regs)
1686{
1687 u_int c2op=source[i]&0x3f;
1688 u_int hr,reglist_full=0,reglist;
1689 int need_flags,need_ir;
1690 for(hr=0;hr<HOST_REGS;hr++) {
1691 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1692 }
1693 reglist=reglist_full&CALLER_SAVE_REGS;
1694
1695 if (gte_handlers[c2op]!=NULL) {
1696 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1697 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1698 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1699 source[i],gte_unneeded[i+1],need_flags,need_ir);
1700 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1701 need_flags=0;
1702 //int shift = (source[i] >> 19) & 1;
1703 //int lm = (source[i] >> 10) & 1;
1704 switch(c2op) {
1705 default:
1706 (void)need_ir;
1707 c2op_prologue(c2op, i, i_regs, reglist);
1708 emit_movimm(source[i],1); // opcode
1709 emit_writeword(1,&psxRegs.code);
1710 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1711 break;
1712 }
1713 c2op_epilogue(c2op,reglist);
1714 }
1715}
1716
1717static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1718{
1719 //value = value & 0x7ffff000;
1720 //if (value & 0x7f87e000) value |= 0x80000000;
1721 emit_andimm(sl, 0x7fffe000, temp);
1722 emit_testimm(temp, 0xff87ffff);
1723 emit_andimm(sl, 0x7ffff000, temp);
1724 host_tempreg_acquire();
1725 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1726 emit_cmovne_reg(HOST_TEMPREG, temp);
1727 host_tempreg_release();
1728 assert(0); // testing needed
1729}
1730
1731static void do_mfc2_31_one(u_int copr,signed char temp)
1732{
1733 emit_readshword(&reg_cop2d[copr],temp);
1734 emit_bicsar_imm(temp,31,temp);
1735 emit_cmpimm(temp,0xf80);
1736 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1737 emit_andimm(temp,0xf80,temp);
1738}
1739
1740static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1741{
1742 if (temp < 0) {
1743 host_tempreg_acquire();
1744 temp = HOST_TEMPREG;
1745 }
1746 do_mfc2_31_one(9,temp);
1747 emit_shrimm(temp,7,tl);
1748 do_mfc2_31_one(10,temp);
1749 emit_orrshr_imm(temp,2,tl);
1750 do_mfc2_31_one(11,temp);
1751 emit_orrshl_imm(temp,3,tl);
1752 emit_writeword(tl,&reg_cop2d[29]);
1753
1754 if (temp == HOST_TEMPREG)
1755 host_tempreg_release();
1756}
1757
1758static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1759{
1760 // case 0x18: MULT
1761 // case 0x19: MULTU
1762 // case 0x1A: DIV
1763 // case 0x1B: DIVU
1764 if(dops[i].rs1&&dops[i].rs2)
1765 {
1766 switch(dops[i].opcode2)
1767 {
1768 case 0x18: // MULT
1769 case 0x19: // MULTU
1770 {
1771 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1772 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1773 signed char hi=get_reg(i_regs->regmap,HIREG);
1774 signed char lo=get_reg(i_regs->regmap,LOREG);
1775 assert(m1>=0);
1776 assert(m2>=0);
1777 assert(hi>=0);
1778 assert(lo>=0);
1779
1780 if(dops[i].opcode2==0x18) // MULT
1781 emit_smull(m1,m2,hi);
1782 else // MULTU
1783 emit_umull(m1,m2,hi);
1784
1785 emit_mov(hi,lo);
1786 emit_shrimm64(hi,32,hi);
1787 break;
1788 }
1789 case 0x1A: // DIV
1790 case 0x1B: // DIVU
1791 {
1792 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1793 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1794 signed char quotient=get_reg(i_regs->regmap,LOREG);
1795 signed char remainder=get_reg(i_regs->regmap,HIREG);
1796 assert(numerator>=0);
1797 assert(denominator>=0);
1798 assert(quotient>=0);
1799 assert(remainder>=0);
1800
1801 if (dops[i].opcode2 == 0x1A) // DIV
1802 emit_sdiv(numerator,denominator,quotient);
1803 else // DIVU
1804 emit_udiv(numerator,denominator,quotient);
1805 emit_msub(quotient,denominator,numerator,remainder);
1806
1807 // div 0 quotient (remainder is already correct)
1808 host_tempreg_acquire();
1809 if (dops[i].opcode2 == 0x1A) // DIV
1810 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1811 else
1812 emit_movimm(~0,HOST_TEMPREG);
1813 emit_test(denominator,denominator);
1814 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1815 host_tempreg_release();
1816 break;
1817 }
1818 default:
1819 assert(0);
1820 }
1821 }
1822 else
1823 {
1824 signed char hr=get_reg(i_regs->regmap,HIREG);
1825 signed char lr=get_reg(i_regs->regmap,LOREG);
1826 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1827 {
1828 if (dops[i].rs1) {
1829 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1830 assert(numerator >= 0);
1831 if (hr >= 0)
1832 emit_mov(numerator,hr);
1833 if (lr >= 0) {
1834 if (dops[i].opcode2 == 0x1A) // DIV
1835 emit_sub_asrimm(0,numerator,31,lr);
1836 else
1837 emit_movimm(~0,lr);
1838 }
1839 }
1840 else {
1841 if (hr >= 0) emit_zeroreg(hr);
1842 if (lr >= 0) emit_movimm(~0,lr);
1843 }
1844 }
1845 else
1846 {
1847 // Multiply by zero is zero.
1848 if (hr >= 0) emit_zeroreg(hr);
1849 if (lr >= 0) emit_zeroreg(lr);
1850 }
1851 }
1852}
1853#define multdiv_assemble multdiv_assemble_arm64
1854
1855static void do_jump_vaddr(u_int rs)
1856{
1857 if (rs != 0)
1858 emit_mov(rs, 0);
1859 emit_far_call(ndrc_get_addr_ht);
1860 emit_jmpreg(0);
1861}
1862
1863static void do_preload_rhash(u_int r) {
1864 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1865 // register. On ARM the hash can be done with a single instruction (below)
1866}
1867
1868static void do_preload_rhtbl(u_int ht) {
1869 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1870}
1871
1872static void do_rhash(u_int rs,u_int rh) {
1873 emit_andimm(rs, 0xf8, rh);
1874}
1875
1876static void do_miniht_load(int ht, u_int rh) {
1877 emit_add64(ht, rh, ht);
1878 emit_ldst(0, 0, rh, ht, 0);
1879}
1880
1881static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1882 emit_cmp(rh, rs);
1883 void *jaddr = out;
1884 emit_jeq(0);
1885 do_jump_vaddr(rs);
1886
1887 set_jump_target(jaddr, out);
1888 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1889 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1890 emit_jmpreg(ht);
1891}
1892
1893// parsed by set_jump_target?
1894static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1895 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1896 emit_movk(return_address&0xffff,rt);
1897 add_to_linker(out,return_address,1);
1898 emit_adr(out,temp);
1899 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1900 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1901}
1902
1903static void clear_cache_arm64(char *start, char *end)
1904{
1905 // Don't rely on GCC's __clear_cache implementation, as it caches
1906 // icache/dcache cache line sizes, that can vary between cores on
1907 // big.LITTLE architectures.
1908 uint64_t addr, ctr_el0;
1909 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1910 size_t isize, dsize;
1911
1912 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1913 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1914 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1915
1916 // use the global minimum cache line size
1917 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1918 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1919
1920 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1921 not required for instruction to data coherence. */
1922 if ((ctr_el0 & (1 << 28)) == 0x0) {
1923 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1924 for (; addr < (uint64_t)end; addr += dsize)
1925 // use "civac" instead of "cvau", as this is the suggested workaround for
1926 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1927 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1928 }
1929 __asm__ volatile("dsb ish" : : : "memory");
1930
1931 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1932 Unification is not required for instruction to data coherence. */
1933 if ((ctr_el0 & (1 << 29)) == 0x0) {
1934 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1935 for (; addr < (uint64_t)end; addr += isize)
1936 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1937
1938 __asm__ volatile("dsb ish" : : : "memory");
1939 }
1940
1941 __asm__ volatile("isb" : : : "memory");
1942}
1943
1944// CPU-architecture-specific initialization
1945static void arch_init(void)
1946{
1947 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1948 struct tramp_insns *ops = ndrc->tramp.ops, *opsw;
1949 size_t i;
1950 assert(!(diff & 3));
1951 opsw = start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1952 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1953 opsw[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1954 opsw[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1955 }
1956 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1957}
1958
1959// vim:shiftwidth=2:expandtab