cspace: add forgotten length decrement
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26#define unused __attribute__((unused))
27
28void do_memhandler_pre();
29void do_memhandler_post();
30
31/* Linker */
32static void set_jump_target(void *addr, void *target)
33{
34 u_int *ptr = NDRC_WRITE_OFFSET(addr);
35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
37 if ((*ptr&0xFC000000) == 0x14000000) { // b
38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
40 }
41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
45 // should only happen when jumping to an already compiled block (see add_jump_out)
46 // a workaround would be to do a trampoline jump via a stub at the end of the block
47 assert(-1048576 <= offset && offset < 1048576);
48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
49 }
50 else if((*ptr&0x9f000000)==0x10000000) { // adr
51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
54 }
55 else
56 abort(); // should not happen
57}
58
59// from a pointer to external jump stub (which was produced by emit_extjump2)
60// find where the jumping insn is
61static void *find_extjump_insn(void *stub)
62{
63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
67}
68
69#if 0
70// find where external branch is liked to using addr of it's stub:
71// get address that the stub loads (dyna_linker arg1),
72// treat it as a pointer to branch insn,
73// return addr where that branch jumps to
74static void *get_pointer(void *stub)
75{
76 int *i_ptr = find_extjump_insn(stub);
77 if ((*i_ptr&0xfc000000) == 0x14000000) // b
78 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
79 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
80 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
81 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
82 assert(0);
83 return NULL;
84}
85#endif
86
87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141};
142
143static void output_w32(u_int word)
144{
145 *((u_int *)NDRC_WRITE_OFFSET(out)) = word;
146 out += 4;
147}
148
149static u_int rn_rd(u_int rn, u_int rd)
150{
151 assert(rn < 31);
152 assert(rd < 31);
153 return (rn << 5) | rd;
154}
155
156static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
157{
158 assert(rm < 32);
159 assert(rn < 32);
160 assert(rd < 32);
161 return (rm << 16) | (rn << 5) | rd;
162}
163
164static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
165{
166 assert(ra < 32);
167 return rm_rn_rd(rm, rn, rd) | (ra << 10);
168}
169
170static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
171{
172 assert(imm7 < 0x80);
173 assert(rt2 < 31);
174 assert(rn < 32);
175 assert(rt < 31);
176 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
177}
178
179static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
180{
181 assert(imm6 <= 63);
182 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
183}
184
185static u_int imm16_rd(u_int imm16, u_int rd)
186{
187 assert(imm16 < 0x10000);
188 assert(rd < 31);
189 return (imm16 << 5) | rd;
190}
191
192static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
193{
194 assert(imm12 < 0x1000);
195 assert(rn < 32);
196 assert(rd < 32);
197 return (imm12 << 10) | (rn << 5) | rd;
198}
199
200static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
201{
202 assert(imm9 < 0x200);
203 assert(rn < 31);
204 assert(rd < 31);
205 return (imm9 << 12) | (rn << 5) | rd;
206}
207
208static u_int imm19_rt(u_int imm19, u_int rt)
209{
210 assert(imm19 < 0x80000);
211 assert(rt < 31);
212 return (imm19 << 5) | rt;
213}
214
215static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
216{
217 assert(n < 2);
218 assert(immr < 0x40);
219 assert(imms < 0x40);
220 assert(rn < 32);
221 assert(rd < 32);
222 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
223}
224
225static u_int genjmp(const u_char *addr)
226{
227 intptr_t offset = addr - out;
228 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
229 if (offset < -134217728 || offset > 134217727) {
230 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
231 abort();
232 return 0;
233 }
234 return ((u_int)offset >> 2) & 0x03ffffff;
235}
236
237static u_int genjmpcc(const u_char *addr)
238{
239 intptr_t offset = addr - out;
240 if ((uintptr_t)addr < 3) return 0;
241 if (offset < -1048576 || offset > 1048572) {
242 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
243 abort();
244 return 0;
245 }
246 return ((u_int)offset >> 2) & 0x7ffff;
247}
248
249static uint32_t is_mask(u_int value)
250{
251 return value && ((value + 1) & value) == 0;
252}
253
254// This function returns true if the argument contains a
255// non-empty sequence of ones (possibly rotated) with the remainder zero.
256static uint32_t is_rotated_mask(u_int value)
257{
258 if (value == 0 || value == ~0)
259 return 0;
260 if (is_mask((value - 1) | value))
261 return 1;
262 return is_mask((~value - 1) | ~value);
263}
264
265static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
266{
267 int lzeros, tzeros, ones;
268 assert(value != 0);
269 if (is_mask((value - 1) | value)) {
270 lzeros = __builtin_clz(value);
271 tzeros = __builtin_ctz(value);
272 ones = 32 - lzeros - tzeros;
273 *immr = (32 - tzeros) & 31;
274 *imms = ones - 1;
275 return;
276 }
277 value = ~value;
278 if (is_mask((value - 1) | value)) {
279 lzeros = __builtin_clz(value);
280 tzeros = __builtin_ctz(value);
281 ones = 32 - lzeros - tzeros;
282 *immr = lzeros;
283 *imms = 31 - ones;
284 return;
285 }
286 abort();
287}
288
289static void emit_mov(u_int rs, u_int rt)
290{
291 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
292 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
293}
294
295static void emit_mov64(u_int rs, u_int rt)
296{
297 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
298 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
299}
300
301static void emit_add(u_int rs1, u_int rs2, u_int rt)
302{
303 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
304 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
305}
306
307static void emit_add64(u_int rs1, u_int rs2, u_int rt)
308{
309 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
310 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
311}
312
313static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
314{
315 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
316 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
317}
318#define emit_adds_ptr emit_adds64
319
320static void emit_neg(u_int rs, u_int rt)
321{
322 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
323 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
324}
325
326static void emit_sub(u_int rs1, u_int rs2, u_int rt)
327{
328 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
329 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
330}
331
332static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
333{
334 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
335 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
336}
337
338static void emit_movz(u_int imm, u_int rt)
339{
340 assem_debug("movz %s,#%#x\n", regname[rt], imm);
341 output_w32(0x52800000 | imm16_rd(imm, rt));
342}
343
344static void emit_movz_lsl16(u_int imm, u_int rt)
345{
346 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
347 output_w32(0x52a00000 | imm16_rd(imm, rt));
348}
349
350static void emit_movn(u_int imm, u_int rt)
351{
352 assem_debug("movn %s,#%#x\n", regname[rt], imm);
353 output_w32(0x12800000 | imm16_rd(imm, rt));
354}
355
356static void emit_movn_lsl16(u_int imm,u_int rt)
357{
358 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
359 output_w32(0x12a00000 | imm16_rd(imm, rt));
360}
361
362static void emit_movk(u_int imm,u_int rt)
363{
364 assem_debug("movk %s,#%#x\n", regname[rt], imm);
365 output_w32(0x72800000 | imm16_rd(imm, rt));
366}
367
368static void emit_movk_lsl16(u_int imm,u_int rt)
369{
370 assert(imm<65536);
371 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x72a00000 | imm16_rd(imm, rt));
373}
374
375static void emit_zeroreg(u_int rt)
376{
377 emit_movz(0, rt);
378}
379
380static void emit_movimm(u_int imm, u_int rt)
381{
382 if (imm < 65536)
383 emit_movz(imm, rt);
384 else if ((~imm) < 65536)
385 emit_movn(~imm, rt);
386 else if ((imm&0xffff) == 0)
387 emit_movz_lsl16(imm >> 16, rt);
388 else if (((~imm)&0xffff) == 0)
389 emit_movn_lsl16(~imm >> 16, rt);
390 else if (is_rotated_mask(imm)) {
391 u_int immr, imms;
392 gen_logical_imm(imm, &immr, &imms);
393 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
394 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
395 }
396 else {
397 emit_movz(imm & 0xffff, rt);
398 emit_movk_lsl16(imm >> 16, rt);
399 }
400}
401
402static void emit_movimm64(uint64_t imm, u_int rt)
403{
404 u_int shift, op, imm16, insns = 0;
405 for (shift = 0; shift < 4; shift++) {
406 imm16 = (imm >> shift * 16) & 0xffff;
407 if (!imm16)
408 continue;
409 op = insns ? 0xf2800000 : 0xd2800000;
410 assem_debug("mov%c %s,#%#x", insns ? 'k' : 'z', regname64[rt], imm16);
411 if (shift)
412 assem_debug(",lsl #%u", shift * 16);
413 assem_debug("\n");
414 output_w32(op | (shift << 21) | imm16_rd(imm16, rt));
415 insns++;
416 }
417 if (!insns) {
418 assem_debug("movz %s,#0\n", regname64[rt]);
419 output_w32(0xd2800000 | imm16_rd(0, rt));
420 }
421}
422
423static void emit_readword(void *addr, u_int rt)
424{
425 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
426 if (!(offset & 3) && offset <= 16380) {
427 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
428 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
429 }
430 else
431 abort();
432}
433
434static void emit_readdword(void *addr, u_int rt)
435{
436 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
437 if (!(offset & 7) && offset <= 32760) {
438 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
439 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
440 }
441 else
442 abort();
443}
444#define emit_readptr emit_readdword
445
446static void emit_readshword(void *addr, u_int rt)
447{
448 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
449 if (!(offset & 1) && offset <= 8190) {
450 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
451 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
452 }
453 else
454 assert(0);
455}
456
457static void emit_loadreg(u_int r, u_int hr)
458{
459 int is64 = 0;
460 if (r == 0)
461 emit_zeroreg(hr);
462 else {
463 void *addr;
464 switch (r) {
465 //case HIREG: addr = &hi; break;
466 //case LOREG: addr = &lo; break;
467 case CCREG: addr = &cycle_count; break;
468 case CSREG: addr = &Status; break;
469 case INVCP: addr = &invc_ptr; is64 = 1; break;
470 case ROREG: addr = &ram_offset; is64 = 1; break;
471 default:
472 assert(r < 34);
473 addr = &psxRegs.GPR.r[r];
474 break;
475 }
476 if (is64)
477 emit_readdword(addr, hr);
478 else
479 emit_readword(addr, hr);
480 }
481}
482
483static void emit_writeword(u_int rt, void *addr)
484{
485 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
486 if (!(offset & 3) && offset <= 16380) {
487 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
488 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
489 }
490 else
491 assert(0);
492}
493
494static void emit_writedword(u_int rt, void *addr)
495{
496 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
497 if (!(offset & 7) && offset <= 32760) {
498 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
499 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
500 }
501 else
502 abort();
503}
504
505static void emit_storereg(u_int r, u_int hr)
506{
507 assert(r < 64);
508 void *addr = &psxRegs.GPR.r[r];
509 switch (r) {
510 //case HIREG: addr = &hi; break;
511 //case LOREG: addr = &lo; break;
512 case CCREG: addr = &cycle_count; break;
513 default: assert(r < 34); break;
514 }
515 emit_writeword(hr, addr);
516}
517
518static void emit_test(u_int rs, u_int rt)
519{
520 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
521 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
522}
523
524static void emit_testimm(u_int rs, u_int imm)
525{
526 u_int immr, imms;
527 assem_debug("tst %s,#%#x\n", regname[rs], imm);
528 assert(is_rotated_mask(imm)); // good enough for PCSX
529 gen_logical_imm(imm, &immr, &imms);
530 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
531}
532
533static void emit_not(u_int rs,u_int rt)
534{
535 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
536 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
537}
538
539static void emit_and(u_int rs1,u_int rs2,u_int rt)
540{
541 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
542 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
543}
544
545static void emit_or(u_int rs1,u_int rs2,u_int rt)
546{
547 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
548 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
549}
550
551static void emit_bic(u_int rs1,u_int rs2,u_int rt)
552{
553 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
554 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
555}
556
557static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
558{
559 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
560 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
561}
562
563static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
564{
565 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
566 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
567}
568
569static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
570{
571 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
572 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
573}
574
575static void emit_xor(u_int rs1,u_int rs2,u_int rt)
576{
577 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
578 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
579}
580
581static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
582{
583 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
584 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
585}
586
587static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
588{
589 unused const char *st = s ? "s" : "";
590 s = s ? 0x20000000 : 0;
591 is64 = is64 ? 0x80000000 : 0;
592 if (imm < 4096) {
593 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
594 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
595 }
596 else if (-imm < 4096) {
597 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
598 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
599 }
600 else if (imm < 16777216) {
601 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
602 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
603 if ((imm & 0xfff) || s) {
604 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
605 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
606 }
607 }
608 else if (-imm < 16777216) {
609 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
610 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
611 if ((imm & 0xfff) || s) {
612 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
613 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
614 }
615 }
616 else
617 abort();
618}
619
620static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
621{
622 emit_addimm_s(0, 0, rs, imm, rt);
623}
624
625static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
626{
627 emit_addimm_s(0, 1, rs, imm, rt);
628}
629
630static void emit_addimm_and_set_flags(int imm, u_int rt)
631{
632 emit_addimm_s(1, 0, rt, imm, rt);
633}
634
635static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
636{
637 const char *names[] = { "and", "orr", "eor", "ands" };
638 const char *name = names[op];
639 u_int immr, imms;
640 op = op << 29;
641 if (is_rotated_mask(imm)) {
642 gen_logical_imm(imm, &immr, &imms);
643 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
644 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
645 }
646 else {
647 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
648 host_tempreg_acquire();
649 emit_movimm(imm, HOST_TEMPREG);
650 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
651 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
652 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
653 host_tempreg_release();
654 }
655 (void)name;
656}
657
658static void emit_andimm(u_int rs, u_int imm, u_int rt)
659{
660 if (imm == 0)
661 emit_zeroreg(rt);
662 else
663 emit_logicop_imm(0, rs, imm, rt);
664}
665
666static void emit_orimm(u_int rs, u_int imm, u_int rt)
667{
668 if (imm == 0) {
669 if (rs != rt)
670 emit_mov(rs, rt);
671 }
672 else
673 emit_logicop_imm(1, rs, imm, rt);
674}
675
676static void emit_xorimm(u_int rs, u_int imm, u_int rt)
677{
678 if (imm == 0) {
679 if (rs != rt)
680 emit_mov(rs, rt);
681 }
682 else
683 emit_logicop_imm(2, rs, imm, rt);
684}
685
686static void emit_sbfm(u_int rs,u_int imm,u_int rt)
687{
688 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
689 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
690}
691
692static void emit_ubfm(u_int rs,u_int imm,u_int rt)
693{
694 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
695 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
696}
697
698static void emit_shlimm(u_int rs,u_int imm,u_int rt)
699{
700 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
701 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
702}
703
704static void emit_shrimm(u_int rs,u_int imm,u_int rt)
705{
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
708}
709
710static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
711{
712 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
714}
715
716static void emit_sarimm(u_int rs,u_int imm,u_int rt)
717{
718 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
719 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
720}
721
722static void emit_rorimm(u_int rs,u_int imm,u_int rt)
723{
724 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
725 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
726}
727
728static void emit_signextend16(u_int rs, u_int rt)
729{
730 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
731 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
732}
733
734static void emit_shl(u_int rs,u_int rshift,u_int rt)
735{
736 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
737 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
738}
739
740static void emit_shr(u_int rs,u_int rshift,u_int rt)
741{
742 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
743 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
744}
745
746static void emit_sar(u_int rs,u_int rshift,u_int rt)
747{
748 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
749 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
750}
751
752static void emit_cmpimm(u_int rs, u_int imm)
753{
754 if (imm < 4096) {
755 assem_debug("cmp %s,%#x\n", regname[rs], imm);
756 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
757 }
758 else if (-imm < 4096) {
759 assem_debug("cmn %s,%#x\n", regname[rs], imm);
760 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
761 }
762 else if (imm < 16777216 && !(imm & 0xfff)) {
763 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
764 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
765 }
766 else {
767 host_tempreg_acquire();
768 emit_movimm(imm, HOST_TEMPREG);
769 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
770 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
771 host_tempreg_release();
772 }
773}
774
775static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
776{
777 assert(imm == 0 || imm == 1);
778 assert(cond0 < 0x10);
779 assert(cond1 < 0x10);
780 if (imm) {
781 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
782 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
783 } else {
784 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
785 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
786 }
787}
788
789static void emit_cmovne_imm(u_int imm,u_int rt)
790{
791 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
792}
793
794static void emit_cmovl_imm(u_int imm,u_int rt)
795{
796 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
797}
798
799static void emit_cmovb_imm(int imm,u_int rt)
800{
801 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
802}
803
804static void emit_cmoveq_reg(u_int rs,u_int rt)
805{
806 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
807 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
808}
809
810static void emit_cmovne_reg(u_int rs,u_int rt)
811{
812 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
813 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
814}
815
816static void emit_cmovl_reg(u_int rs,u_int rt)
817{
818 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
819 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
820}
821
822static void emit_cmovb_reg(u_int rs,u_int rt)
823{
824 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
825 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
826}
827
828static void emit_cmovs_reg(u_int rs,u_int rt)
829{
830 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
831 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
832}
833
834static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
835{
836 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
837 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
838}
839
840static void emit_slti32(u_int rs,int imm,u_int rt)
841{
842 if(rs!=rt) emit_zeroreg(rt);
843 emit_cmpimm(rs,imm);
844 if(rs==rt) emit_movimm(0,rt);
845 emit_cmovl_imm(1,rt);
846}
847
848static void emit_sltiu32(u_int rs,int imm,u_int rt)
849{
850 if(rs!=rt) emit_zeroreg(rt);
851 emit_cmpimm(rs,imm);
852 if(rs==rt) emit_movimm(0,rt);
853 emit_cmovb_imm(1,rt);
854}
855
856static void emit_cmp(u_int rs,u_int rt)
857{
858 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
859 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
860}
861
862static void emit_cmpcs(u_int rs,u_int rt)
863{
864 assem_debug("ccmp %s,%s,#0,cs\n",regname[rs],regname[rt]);
865 output_w32(0x7a400000 | (COND_CS << 12) | rm_rn_rd(rt, rs, 0));
866}
867
868static void emit_set_gz32(u_int rs, u_int rt)
869{
870 //assem_debug("set_gz32\n");
871 emit_cmpimm(rs,1);
872 emit_movimm(1,rt);
873 emit_cmovl_imm(0,rt);
874}
875
876static void emit_set_nz32(u_int rs, u_int rt)
877{
878 //assem_debug("set_nz32\n");
879 if(rs!=rt) emit_mov(rs,rt);
880 emit_test(rs,rs);
881 emit_cmovne_imm(1,rt);
882}
883
884static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
885{
886 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
887 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
888 emit_cmp(rs1,rs2);
889 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
890 emit_cmovl_imm(1,rt);
891}
892
893static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
894{
895 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
896 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
897 emit_cmp(rs1,rs2);
898 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
899 emit_cmovb_imm(1,rt);
900}
901
902static int can_jump_or_call(const void *a)
903{
904 intptr_t diff = (u_char *)a - out;
905 return (-134217728 <= diff && diff <= 134217727);
906}
907
908static void emit_call(const void *a)
909{
910 intptr_t diff = (u_char *)a - out;
911 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
912 assert(!(diff & 3));
913 if (-134217728 <= diff && diff <= 134217727)
914 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
915 else
916 abort();
917}
918
919static void emit_jmp(const void *a)
920{
921 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
922 u_int offset = genjmp(a);
923 output_w32(0x14000000 | offset);
924}
925
926static void emit_jne(const void *a)
927{
928 assem_debug("bne %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_NE);
931}
932
933static void emit_jeq(const void *a)
934{
935 assem_debug("beq %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_EQ);
938}
939
940static void emit_js(const void *a)
941{
942 assem_debug("bmi %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_MI);
945}
946
947static void emit_jns(const void *a)
948{
949 assem_debug("bpl %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_PL);
952}
953
954static void emit_jl(const void *a)
955{
956 assem_debug("blt %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_LT);
959}
960
961static void emit_jge(const void *a)
962{
963 assem_debug("bge %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_GE);
966}
967
968static void emit_jno(const void *a)
969{
970 assem_debug("bvc %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_VC);
973}
974
975static void emit_jc(const void *a)
976{
977 assem_debug("bcs %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_CS);
980}
981
982static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
983{
984 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
985 u_int offset = genjmpcc(a);
986 is64 = is64 ? 0x80000000 : 0;
987 isnz = isnz ? 0x01000000 : 0;
988 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
989}
990
991static unused void emit_cbz(const void *a, u_int r)
992{
993 emit_cb(0, 0, a, r);
994}
995
996static void emit_jmpreg(u_int r)
997{
998 assem_debug("br %s\n", regname64[r]);
999 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
1000}
1001
1002static void emit_retreg(u_int r)
1003{
1004 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
1005 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
1006}
1007
1008static void emit_ret(void)
1009{
1010 emit_retreg(LR);
1011}
1012
1013static void emit_adr(void *addr, u_int rt)
1014{
1015 intptr_t offset = (u_char *)addr - out;
1016 assert(-1048576 <= offset && offset < 1048576);
1017 assert(rt < 31);
1018 assem_debug("adr x%d,#%#lx\n", rt, offset);
1019 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1020}
1021
1022static void emit_adrp(void *addr, u_int rt)
1023{
1024 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1025 assert(-4294967296l <= offset && offset < 4294967296l);
1026 assert(rt < 31);
1027 offset >>= 12;
1028 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1029 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1030}
1031
1032static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1033{
1034 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1035 assert(-256 <= offset && offset < 256);
1036 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1037}
1038
1039static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1040{
1041 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1042 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1043}
1044
1045static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1046{
1047 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1048 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1049}
1050
1051static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1052{
1053 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1054 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1055}
1056
1057static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1058{
1059 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1060 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1061}
1062#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1063
1064static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1065{
1066 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1067 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1068}
1069
1070static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1071{
1072 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1073 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1074}
1075
1076static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1077{
1078 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1079 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1080}
1081
1082static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1083{
1084 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1085 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1086}
1087
1088static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1089{
1090 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1091 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1092}
1093
1094static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1095{
1096 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1097 assert(-256 <= offset && offset < 256);
1098 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1099}
1100
1101static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1102{
1103 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1104 assert(-256 <= offset && offset < 256);
1105 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1106}
1107
1108static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1109{
1110 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1111 assert(-256 <= offset && offset < 256);
1112 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1113}
1114
1115static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1116{
1117 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1118 assert(-256 <= offset && offset < 256);
1119 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1120}
1121
1122static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1123{
1124 if (!(offset & 3) && (u_int)offset <= 16380) {
1125 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1126 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1127 }
1128 else if (-256 <= offset && offset < 256) {
1129 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1130 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1131 }
1132 else
1133 assert(0);
1134}
1135
1136static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1137{
1138 if (!(offset & 1) && (u_int)offset <= 8190) {
1139 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1140 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1141 }
1142 else if (-256 <= offset && offset < 256) {
1143 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1144 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1145 }
1146 else
1147 assert(0);
1148}
1149
1150static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1151{
1152 if ((u_int)offset < 4096) {
1153 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1154 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1155 }
1156 else if (-256 <= offset && offset < 256) {
1157 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1158 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1159 }
1160 else
1161 assert(0);
1162}
1163
1164static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1165{
1166 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1168}
1169
1170static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1171{
1172 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1174}
1175
1176static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1177{
1178 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1179 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1180}
1181
1182static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1183{
1184 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1186}
1187
1188static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1189{
1190 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1192}
1193
1194static void emit_clz(u_int rs, u_int rt)
1195{
1196 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1197 output_w32(0x5ac01000 | rn_rd(rs, rt));
1198}
1199
1200// special case for checking invalid_code
1201static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1202{
1203 host_tempreg_acquire();
1204 emit_shrimm(r, 12, HOST_TEMPREG);
1205 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1206 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1207 emit_cmpimm(HOST_TEMPREG, imm);
1208 host_tempreg_release();
1209}
1210
1211// special for loadlr_assemble, rs2 is destroyed
1212static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1213{
1214 emit_shl(rs2, shift, rs2);
1215 emit_bic(rs1, rs2, rt);
1216}
1217
1218static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1219{
1220 emit_shr(rs2, shift, rs2);
1221 emit_bic(rs1, rs2, rt);
1222}
1223
1224static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1225{
1226 u_int op = 0xb9000000;
1227 unused const char *ldst = is_st ? "st" : "ld";
1228 unused char rp = is64 ? 'x' : 'w';
1229 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1230 is64 = is64 ? 1 : 0;
1231 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1232 ofs = (ofs >> (2+is64));
1233 if (!is_st) op |= 0x00400000;
1234 if (is64) op |= 0x40000000;
1235 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1236}
1237
1238static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1239{
1240 u_int op = 0x29000000;
1241 unused const char *ldst = is_st ? "st" : "ld";
1242 unused char rp = is64 ? 'x' : 'w';
1243 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1244 is64 = is64 ? 1 : 0;
1245 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1246 ofs = (ofs >> (2+is64));
1247 assert(-64 <= ofs && ofs <= 63);
1248 ofs &= 0x7f;
1249 if (!is_st) op |= 0x00400000;
1250 if (is64) op |= 0x80000000;
1251 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1252}
1253
1254static void save_load_regs_all(int is_store, u_int reglist)
1255{
1256 int ofs = 0, c = 0;
1257 u_int r, pair[2];
1258 for (r = 0; reglist; r++, reglist >>= 1) {
1259 if (reglist & 1)
1260 pair[c++] = r;
1261 if (c == 2) {
1262 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1263 ofs += 8 * 2;
1264 c = 0;
1265 }
1266 }
1267 if (c) {
1268 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1269 ofs += 8;
1270 }
1271 assert(ofs <= SSP_CALLER_REGS);
1272}
1273
1274// Save registers before function call
1275static void save_regs(u_int reglist)
1276{
1277 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1278 save_load_regs_all(1, reglist);
1279}
1280
1281// Restore registers after function call
1282static void restore_regs(u_int reglist)
1283{
1284 reglist &= CALLER_SAVE_REGS;
1285 save_load_regs_all(0, reglist);
1286}
1287
1288/* Stubs/epilogue */
1289
1290static void literal_pool(int n)
1291{
1292 (void)literals;
1293}
1294
1295static void literal_pool_jumpover(int n)
1296{
1297}
1298
1299// parsed by get_pointer, find_extjump_insn
1300static void emit_extjump(u_char *addr, u_int target)
1301{
1302 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1303
1304 emit_movz(target & 0xffff, 0);
1305 emit_movk_lsl16(target >> 16, 0);
1306
1307 // addr is in the current recompiled block (max 256k)
1308 // offset shouldn't exceed +/-1MB
1309 emit_adr(addr, 1);
1310 emit_far_jump(dyna_linker);
1311}
1312
1313static void check_extjump2(void *src)
1314{
1315 u_int *ptr = src;
1316 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1317 (void)ptr;
1318}
1319
1320// put rt_val into rt, potentially making use of rs with value rs_val
1321static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1322{
1323 int diff = rt_val - rs_val;
1324 if ((-4096 < diff && diff < 4096)
1325 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1326 emit_addimm(rs, diff, rt);
1327 else if (rt_val == ~rs_val)
1328 emit_not(rs, rt);
1329 else if (is_rotated_mask(rs_val ^ rt_val))
1330 emit_xorimm(rs, rs_val ^ rt_val, rt);
1331 else
1332 emit_movimm(rt_val, rt);
1333}
1334
1335// return 1 if the above function can do it's job cheaply
1336static int is_similar_value(u_int v1, u_int v2)
1337{
1338 int diff = v1 - v2;
1339 return (-4096 < diff && diff < 4096)
1340 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1341 || v1 == ~v2
1342 || is_rotated_mask(v1 ^ v2);
1343}
1344
1345static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1346{
1347 if (rt_val < 0x100000000ull) {
1348 emit_movimm_from(rs_val, rs, rt_val, rt);
1349 return;
1350 }
1351 // just move the whole thing. At least on Linux all addresses
1352 // seem to be 48bit, so 3 insns - not great not terrible
1353 emit_movimm64(rt_val, rt);
1354}
1355
1356// trashes x2
1357static void pass_args64(u_int a0, u_int a1)
1358{
1359 if(a0==1&&a1==0) {
1360 // must swap
1361 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1362 }
1363 else if(a0!=0&&a1==0) {
1364 emit_mov64(a1,1);
1365 if (a0>=0) emit_mov64(a0,0);
1366 }
1367 else {
1368 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1369 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1370 }
1371}
1372
1373static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1374{
1375 switch(type) {
1376 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1377 case LOADBU_STUB:
1378 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1379 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1380 case LOADHU_STUB:
1381 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1382 case LOADW_STUB:
1383 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1384 default: assert(0);
1385 }
1386}
1387
1388#include "pcsxmem.h"
1389//#include "pcsxmem_inline.c"
1390
1391static void do_readstub(int n)
1392{
1393 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1394 set_jump_target(stubs[n].addr, out);
1395 enum stub_type type = stubs[n].type;
1396 int i = stubs[n].a;
1397 int rs = stubs[n].b;
1398 const struct regstat *i_regs = (void *)stubs[n].c;
1399 u_int reglist = stubs[n].e;
1400 const signed char *i_regmap = i_regs->regmap;
1401 int rt;
1402 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1403 rt=get_reg(i_regmap,FTEMP);
1404 }else{
1405 rt=get_reg(i_regmap,dops[i].rt1);
1406 }
1407 assert(rs>=0);
1408 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1409 void *restore_jump = NULL, *handler_jump = NULL;
1410 reglist|=(1<<rs);
1411 for (r = 0; r < HOST_CCREG; r++) {
1412 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1413 temp = r;
1414 break;
1415 }
1416 }
1417 if(rt>=0&&dops[i].rt1!=0)
1418 reglist&=~(1<<rt);
1419 if(temp==-1) {
1420 save_regs(reglist);
1421 regs_saved=1;
1422 temp=(rs==0)?2:0;
1423 }
1424 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1425 temp2=1;
1426 emit_readdword(&mem_rtab,temp);
1427 emit_shrimm(rs,12,temp2);
1428 emit_readdword_dualindexedx8(temp,temp2,temp2);
1429 emit_adds64(temp2,temp2,temp2);
1430 handler_jump=out;
1431 emit_jc(0);
1432 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1433 switch(type) {
1434 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1435 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1436 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1437 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1438 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1439 default: assert(0);
1440 }
1441 }
1442 if(regs_saved) {
1443 restore_jump=out;
1444 emit_jmp(0); // jump to reg restore
1445 }
1446 else
1447 emit_jmp(stubs[n].retaddr); // return address
1448 set_jump_target(handler_jump, out);
1449
1450 if(!regs_saved)
1451 save_regs(reglist);
1452 void *handler=NULL;
1453 if(type==LOADB_STUB||type==LOADBU_STUB)
1454 handler=jump_handler_read8;
1455 if(type==LOADH_STUB||type==LOADHU_STUB)
1456 handler=jump_handler_read16;
1457 if(type==LOADW_STUB)
1458 handler=jump_handler_read32;
1459 assert(handler);
1460 pass_args64(rs,temp2);
1461 int cc=get_reg(i_regmap,CCREG);
1462 if(cc<0)
1463 emit_loadreg(CCREG,2);
1464 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1465 emit_far_call(handler);
1466 // (no cycle reload after read)
1467 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1468 loadstore_extend(type,0,rt);
1469 }
1470 if(restore_jump)
1471 set_jump_target(restore_jump, out);
1472 restore_regs(reglist);
1473 emit_jmp(stubs[n].retaddr);
1474}
1475
1476static void inline_readstub(enum stub_type type, int i, u_int addr,
1477 const signed char regmap[], int target, int adj, u_int reglist)
1478{
1479 int rs=get_reg(regmap,target);
1480 int rt=get_reg(regmap,target);
1481 if(rs<0) rs=get_reg_temp(regmap);
1482 assert(rs>=0);
1483 u_int is_dynamic=0;
1484 uintptr_t host_addr = 0;
1485 void *handler;
1486 int cc=get_reg(regmap,CCREG);
1487 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1488 // return;
1489 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1490 if (handler == NULL) {
1491 if(rt<0||dops[i].rt1==0)
1492 return;
1493 if (addr != host_addr)
1494 emit_movimm_from64(addr, rs, host_addr, rs);
1495 switch(type) {
1496 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1497 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1498 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1499 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1500 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1501 default: assert(0);
1502 }
1503 return;
1504 }
1505 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1506 if (is_dynamic) {
1507 if(type==LOADB_STUB||type==LOADBU_STUB)
1508 handler=jump_handler_read8;
1509 if(type==LOADH_STUB||type==LOADHU_STUB)
1510 handler=jump_handler_read16;
1511 if(type==LOADW_STUB)
1512 handler=jump_handler_read32;
1513 }
1514
1515 // call a memhandler
1516 if(rt>=0&&dops[i].rt1!=0)
1517 reglist&=~(1<<rt);
1518 save_regs(reglist);
1519 if(target==0)
1520 emit_movimm(addr,0);
1521 else if(rs!=0)
1522 emit_mov(rs,0);
1523 if(cc<0)
1524 emit_loadreg(CCREG,2);
1525 emit_addimm(cc<0?2:cc,adj,2);
1526 if(is_dynamic) {
1527 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1528 intptr_t offset = (l1 & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1529 if (-4294967296l <= offset && offset < 4294967296l) {
1530 emit_adrp((void *)l1, 1);
1531 emit_addimm64(1, l1 & 0xfff, 1);
1532 }
1533 else
1534 emit_movimm64(l1, 1);
1535 }
1536 else
1537 emit_far_call(do_memhandler_pre);
1538
1539 emit_far_call(handler);
1540
1541 // (no cycle reload after read)
1542 if(rt>=0&&dops[i].rt1!=0)
1543 loadstore_extend(type, 0, rt);
1544 restore_regs(reglist);
1545}
1546
1547static void do_writestub(int n)
1548{
1549 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1550 set_jump_target(stubs[n].addr, out);
1551 enum stub_type type=stubs[n].type;
1552 int i=stubs[n].a;
1553 int rs=stubs[n].b;
1554 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1555 u_int reglist=stubs[n].e;
1556 signed char *i_regmap=i_regs->regmap;
1557 int rt,r;
1558 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1559 rt=get_reg(i_regmap,r=FTEMP);
1560 }else{
1561 rt=get_reg(i_regmap,r=dops[i].rs2);
1562 }
1563 assert(rs>=0);
1564 assert(rt>=0);
1565 int rtmp,temp=-1,temp2,regs_saved=0;
1566 void *restore_jump = NULL, *handler_jump = NULL;
1567 int reglist2=reglist|(1<<rs)|(1<<rt);
1568 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1569 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1570 temp = rtmp;
1571 break;
1572 }
1573 }
1574 if(temp==-1) {
1575 save_regs(reglist);
1576 regs_saved=1;
1577 for(rtmp=0;rtmp<=3;rtmp++)
1578 if(rtmp!=rs&&rtmp!=rt)
1579 {temp=rtmp;break;}
1580 }
1581 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1582 temp2=3;
1583 else {
1584 host_tempreg_acquire();
1585 temp2=HOST_TEMPREG;
1586 }
1587 emit_readdword(&mem_wtab,temp);
1588 emit_shrimm(rs,12,temp2);
1589 emit_readdword_dualindexedx8(temp,temp2,temp2);
1590 emit_adds64(temp2,temp2,temp2);
1591 handler_jump=out;
1592 emit_jc(0);
1593 switch(type) {
1594 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1595 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1596 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1597 default: assert(0);
1598 }
1599 if(regs_saved) {
1600 restore_jump=out;
1601 emit_jmp(0); // jump to reg restore
1602 }
1603 else
1604 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1605 set_jump_target(handler_jump, out);
1606
1607 if(!regs_saved)
1608 save_regs(reglist);
1609 void *handler=NULL;
1610 switch(type) {
1611 case STOREB_STUB: handler=jump_handler_write8; break;
1612 case STOREH_STUB: handler=jump_handler_write16; break;
1613 case STOREW_STUB: handler=jump_handler_write32; break;
1614 default: assert(0);
1615 }
1616 assert(handler);
1617 pass_args(rs,rt);
1618 if(temp2!=3) {
1619 emit_mov64(temp2,3);
1620 host_tempreg_release();
1621 }
1622 int cc=get_reg(i_regmap,CCREG);
1623 if(cc<0)
1624 emit_loadreg(CCREG,2);
1625 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1626 // returns new cycle_count
1627 emit_far_call(handler);
1628 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1629 if(cc<0)
1630 emit_storereg(CCREG,2);
1631 if(restore_jump)
1632 set_jump_target(restore_jump, out);
1633 restore_regs(reglist);
1634 emit_jmp(stubs[n].retaddr);
1635}
1636
1637static void inline_writestub(enum stub_type type, int i, u_int addr,
1638 const signed char regmap[], int target, int adj, u_int reglist)
1639{
1640 int rs = get_reg_temp(regmap);
1641 int rt = get_reg(regmap,target);
1642 assert(rs >= 0);
1643 assert(rt >= 0);
1644 uintptr_t host_addr = 0;
1645 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1646 if (handler == NULL) {
1647 if (addr != host_addr)
1648 emit_movimm_from64(addr, rs, host_addr, rs);
1649 switch (type) {
1650 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1651 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1652 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1653 default: assert(0);
1654 }
1655 return;
1656 }
1657
1658 // call a memhandler
1659 save_regs(reglist);
1660 emit_writeword(rs, &address); // some handlers still need it
1661 loadstore_extend(type, rt, 0);
1662 int cc, cc_use;
1663 cc = cc_use = get_reg(regmap, CCREG);
1664 if (cc < 0)
1665 emit_loadreg(CCREG, (cc_use = 2));
1666 emit_addimm(cc_use, adj, 2);
1667
1668 emit_far_call(do_memhandler_pre);
1669 emit_far_call(handler);
1670 emit_far_call(do_memhandler_post);
1671 emit_addimm(0, -adj, cc_use);
1672 if (cc < 0)
1673 emit_storereg(CCREG, cc_use);
1674 restore_regs(reglist);
1675}
1676
1677/* Special assem */
1678
1679static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1680{
1681 save_load_regs_all(1, reglist);
1682 cop2_do_stall_check(op, i, i_regs, 0);
1683#ifdef PCNT
1684 emit_movimm(op, 0);
1685 emit_far_call(pcnt_gte_start);
1686#endif
1687 // pointer to cop2 regs
1688 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1689}
1690
1691static void c2op_epilogue(u_int op,u_int reglist)
1692{
1693#ifdef PCNT
1694 emit_movimm(op, 0);
1695 emit_far_call(pcnt_gte_end);
1696#endif
1697 save_load_regs_all(0, reglist);
1698}
1699
1700static void c2op_assemble(int i, const struct regstat *i_regs)
1701{
1702 u_int c2op=source[i]&0x3f;
1703 u_int hr,reglist_full=0,reglist;
1704 int need_flags,need_ir;
1705 for(hr=0;hr<HOST_REGS;hr++) {
1706 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1707 }
1708 reglist=reglist_full&CALLER_SAVE_REGS;
1709
1710 if (gte_handlers[c2op]!=NULL) {
1711 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1712 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1713 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1714 source[i],gte_unneeded[i+1],need_flags,need_ir);
1715 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1716 need_flags=0;
1717 //int shift = (source[i] >> 19) & 1;
1718 //int lm = (source[i] >> 10) & 1;
1719 switch(c2op) {
1720 default:
1721 (void)need_ir;
1722 c2op_prologue(c2op, i, i_regs, reglist);
1723 emit_movimm(source[i],1); // opcode
1724 emit_writeword(1,&psxRegs.code);
1725 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1726 break;
1727 }
1728 c2op_epilogue(c2op,reglist);
1729 }
1730}
1731
1732static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1733{
1734 //value = value & 0x7ffff000;
1735 //if (value & 0x7f87e000) value |= 0x80000000;
1736 emit_andimm(sl, 0x7fffe000, temp);
1737 emit_testimm(temp, 0xff87ffff);
1738 emit_andimm(sl, 0x7ffff000, temp);
1739 host_tempreg_acquire();
1740 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1741 emit_cmovne_reg(HOST_TEMPREG, temp);
1742 host_tempreg_release();
1743 assert(0); // testing needed
1744}
1745
1746static void do_mfc2_31_one(u_int copr,signed char temp)
1747{
1748 emit_readshword(&reg_cop2d[copr],temp);
1749 emit_bicsar_imm(temp,31,temp);
1750 emit_cmpimm(temp,0xf80);
1751 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1752 emit_andimm(temp,0xf80,temp);
1753}
1754
1755static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1756{
1757 if (temp < 0) {
1758 host_tempreg_acquire();
1759 temp = HOST_TEMPREG;
1760 }
1761 do_mfc2_31_one(9,temp);
1762 emit_shrimm(temp,7,tl);
1763 do_mfc2_31_one(10,temp);
1764 emit_orrshr_imm(temp,2,tl);
1765 do_mfc2_31_one(11,temp);
1766 emit_orrshl_imm(temp,3,tl);
1767 emit_writeword(tl,&reg_cop2d[29]);
1768
1769 if (temp == HOST_TEMPREG)
1770 host_tempreg_release();
1771}
1772
1773static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1774{
1775 // case 0x18: MULT
1776 // case 0x19: MULTU
1777 // case 0x1A: DIV
1778 // case 0x1B: DIVU
1779 if(dops[i].rs1&&dops[i].rs2)
1780 {
1781 switch(dops[i].opcode2)
1782 {
1783 case 0x18: // MULT
1784 case 0x19: // MULTU
1785 {
1786 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1787 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1788 signed char hi=get_reg(i_regs->regmap,HIREG);
1789 signed char lo=get_reg(i_regs->regmap,LOREG);
1790 assert(m1>=0);
1791 assert(m2>=0);
1792 assert(hi>=0);
1793 assert(lo>=0);
1794
1795 if(dops[i].opcode2==0x18) // MULT
1796 emit_smull(m1,m2,hi);
1797 else // MULTU
1798 emit_umull(m1,m2,hi);
1799
1800 emit_mov(hi,lo);
1801 emit_shrimm64(hi,32,hi);
1802 break;
1803 }
1804 case 0x1A: // DIV
1805 case 0x1B: // DIVU
1806 {
1807 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1808 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1809 signed char quotient=get_reg(i_regs->regmap,LOREG);
1810 signed char remainder=get_reg(i_regs->regmap,HIREG);
1811 assert(numerator>=0);
1812 assert(denominator>=0);
1813 assert(quotient>=0);
1814 assert(remainder>=0);
1815
1816 if (dops[i].opcode2 == 0x1A) // DIV
1817 emit_sdiv(numerator,denominator,quotient);
1818 else // DIVU
1819 emit_udiv(numerator,denominator,quotient);
1820 emit_msub(quotient,denominator,numerator,remainder);
1821
1822 // div 0 quotient (remainder is already correct)
1823 host_tempreg_acquire();
1824 if (dops[i].opcode2 == 0x1A) // DIV
1825 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1826 else
1827 emit_movimm(~0,HOST_TEMPREG);
1828 emit_test(denominator,denominator);
1829 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1830 host_tempreg_release();
1831 break;
1832 }
1833 default:
1834 assert(0);
1835 }
1836 }
1837 else
1838 {
1839 signed char hr=get_reg(i_regs->regmap,HIREG);
1840 signed char lr=get_reg(i_regs->regmap,LOREG);
1841 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1842 {
1843 if (dops[i].rs1) {
1844 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1845 assert(numerator >= 0);
1846 if (hr >= 0)
1847 emit_mov(numerator,hr);
1848 if (lr >= 0) {
1849 if (dops[i].opcode2 == 0x1A) // DIV
1850 emit_sub_asrimm(0,numerator,31,lr);
1851 else
1852 emit_movimm(~0,lr);
1853 }
1854 }
1855 else {
1856 if (hr >= 0) emit_zeroreg(hr);
1857 if (lr >= 0) emit_movimm(~0,lr);
1858 }
1859 }
1860 else
1861 {
1862 // Multiply by zero is zero.
1863 if (hr >= 0) emit_zeroreg(hr);
1864 if (lr >= 0) emit_zeroreg(lr);
1865 }
1866 }
1867}
1868#define multdiv_assemble multdiv_assemble_arm64
1869
1870static void do_jump_vaddr(u_int rs)
1871{
1872 if (rs != 0)
1873 emit_mov(rs, 0);
1874 emit_far_call(ndrc_get_addr_ht);
1875 emit_jmpreg(0);
1876}
1877
1878static void do_preload_rhash(u_int r) {
1879 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1880 // register. On ARM the hash can be done with a single instruction (below)
1881}
1882
1883static void do_preload_rhtbl(u_int ht) {
1884 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1885}
1886
1887static void do_rhash(u_int rs,u_int rh) {
1888 emit_andimm(rs, 0xf8, rh);
1889}
1890
1891static void do_miniht_load(int ht, u_int rh) {
1892 emit_add64(ht, rh, ht);
1893 emit_ldst(0, 0, rh, ht, 0);
1894}
1895
1896static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1897 emit_cmp(rh, rs);
1898 void *jaddr = out;
1899 emit_jeq(0);
1900 do_jump_vaddr(rs);
1901
1902 set_jump_target(jaddr, out);
1903 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1904 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1905 emit_jmpreg(ht);
1906}
1907
1908// parsed by set_jump_target?
1909static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1910 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1911 emit_movk(return_address&0xffff,rt);
1912 add_to_linker(out,return_address,1);
1913 emit_adr(out,temp);
1914 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1915 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1916}
1917
1918static unused void clear_cache_arm64(char *start, char *end)
1919{
1920 // Don't rely on GCC's __clear_cache implementation, as it caches
1921 // icache/dcache cache line sizes, that can vary between cores on
1922 // big.LITTLE architectures.
1923 uint64_t addr, ctr_el0;
1924 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1925 size_t isize, dsize;
1926
1927 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1928 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1929 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1930
1931 // use the global minimum cache line size
1932 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1933 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1934
1935 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1936 not required for instruction to data coherence. */
1937 if ((ctr_el0 & (1 << 28)) == 0x0) {
1938 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1939 for (; addr < (uint64_t)end; addr += dsize)
1940 // use "civac" instead of "cvau", as this is the suggested workaround for
1941 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1942 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1943 }
1944 __asm__ volatile("dsb ish" : : : "memory");
1945
1946 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1947 Unification is not required for instruction to data coherence. */
1948 if ((ctr_el0 & (1 << 29)) == 0x0) {
1949 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1950 for (; addr < (uint64_t)end; addr += isize)
1951 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1952
1953 __asm__ volatile("dsb ish" : : : "memory");
1954 }
1955
1956 __asm__ volatile("isb" : : : "memory");
1957}
1958
1959// CPU-architecture-specific initialization
1960static void arch_init(void)
1961{
1962 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1963 struct tramp_insns *ops = NDRC_WRITE_OFFSET(ndrc->tramp.ops);
1964 size_t i;
1965 assert(!(diff & 3));
1966 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1967 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1968 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1969 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1970 }
1971 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1972}
1973
1974// vim:shiftwidth=2:expandtab