drc: rework block tracking
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "pcnt.h"
24#include "arm_features.h"
25
26#define unused __attribute__((unused))
27
28void do_memhandler_pre();
29void do_memhandler_post();
30
31/* Linker */
32static void set_jump_target(void *addr, void *target)
33{
34 u_int *ptr = addr;
35 intptr_t offset = (u_char *)target - (u_char *)addr;
36
37 if ((*ptr&0xFC000000) == 0x14000000) { // b
38 assert(offset>=-134217728LL&&offset<134217728LL);
39 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
40 }
41 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
42 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
43 // Conditional branch are limited to +/- 1MB
44 // block max size is 256k so branching beyond the +/- 1MB limit
45 // should only happen when jumping to an already compiled block (see add_jump_out)
46 // a workaround would be to do a trampoline jump via a stub at the end of the block
47 assert(-1048576 <= offset && offset < 1048576);
48 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
49 }
50 else if((*ptr&0x9f000000)==0x10000000) { // adr
51 // generated by do_miniht_insert
52 assert(offset>=-1048576LL&&offset<1048576LL);
53 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
54 }
55 else
56 abort(); // should not happen
57}
58
59// from a pointer to external jump stub (which was produced by emit_extjump2)
60// find where the jumping insn is
61static void *find_extjump_insn(void *stub)
62{
63 int *ptr = (int *)stub + 2;
64 assert((*ptr&0x9f000000) == 0x10000000); // adr
65 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
66 return ptr + offset / 4;
67}
68
69#if 0
70// find where external branch is liked to using addr of it's stub:
71// get address that the stub loads (dyna_linker arg1),
72// treat it as a pointer to branch insn,
73// return addr where that branch jumps to
74static void *get_pointer(void *stub)
75{
76 int *i_ptr = find_extjump_insn(stub);
77 if ((*i_ptr&0xfc000000) == 0x14000000) // b
78 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
79 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
80 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
81 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
82 assert(0);
83 return NULL;
84}
85#endif
86
87// Allocate a specific ARM register.
88static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
89{
90 int n;
91 int dirty=0;
92
93 // see if it's already allocated (and dealloc it)
94 for(n=0;n<HOST_REGS;n++)
95 {
96 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
97 dirty=(cur->dirty>>n)&1;
98 cur->regmap[n]=-1;
99 }
100 }
101
102 cur->regmap[hr]=reg;
103 cur->dirty&=~(1<<hr);
104 cur->dirty|=dirty<<hr;
105 cur->isconst&=~(1<<hr);
106}
107
108// Alloc cycle count into dedicated register
109static void alloc_cc(struct regstat *cur,int i)
110{
111 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
112}
113
114/* Special alloc */
115
116
117/* Assembler */
118
119static unused const char *regname[32] = {
120 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
121 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
122 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
123 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
124};
125
126static unused const char *regname64[32] = {
127 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
128 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
129 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
130 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
131};
132
133enum {
134 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
135 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
136};
137
138static unused const char *condname[16] = {
139 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
140 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
141};
142
143static void output_w32(u_int word)
144{
145 *((u_int *)out) = word;
146 out += 4;
147}
148
149static u_int rn_rd(u_int rn, u_int rd)
150{
151 assert(rn < 31);
152 assert(rd < 31);
153 return (rn << 5) | rd;
154}
155
156static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
157{
158 assert(rm < 32);
159 assert(rn < 32);
160 assert(rd < 32);
161 return (rm << 16) | (rn << 5) | rd;
162}
163
164static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
165{
166 assert(ra < 32);
167 return rm_rn_rd(rm, rn, rd) | (ra << 10);
168}
169
170static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
171{
172 assert(imm7 < 0x80);
173 assert(rt2 < 31);
174 assert(rn < 32);
175 assert(rt < 31);
176 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
177}
178
179static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
180{
181 assert(imm6 <= 63);
182 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
183}
184
185static u_int imm16_rd(u_int imm16, u_int rd)
186{
187 assert(imm16 < 0x10000);
188 assert(rd < 31);
189 return (imm16 << 5) | rd;
190}
191
192static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
193{
194 assert(imm12 < 0x1000);
195 assert(rn < 32);
196 assert(rd < 32);
197 return (imm12 << 10) | (rn << 5) | rd;
198}
199
200static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
201{
202 assert(imm9 < 0x200);
203 assert(rn < 31);
204 assert(rd < 31);
205 return (imm9 << 12) | (rn << 5) | rd;
206}
207
208static u_int imm19_rt(u_int imm19, u_int rt)
209{
210 assert(imm19 < 0x80000);
211 assert(rt < 31);
212 return (imm19 << 5) | rt;
213}
214
215static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
216{
217 assert(n < 2);
218 assert(immr < 0x40);
219 assert(imms < 0x40);
220 assert(rn < 32);
221 assert(rd < 32);
222 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
223}
224
225static u_int genjmp(const u_char *addr)
226{
227 intptr_t offset = addr - out;
228 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
229 if (offset < -134217728 || offset > 134217727) {
230 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
231 abort();
232 return 0;
233 }
234 return ((u_int)offset >> 2) & 0x03ffffff;
235}
236
237static u_int genjmpcc(const u_char *addr)
238{
239 intptr_t offset = addr - out;
240 if ((uintptr_t)addr < 3) return 0;
241 if (offset < -1048576 || offset > 1048572) {
242 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
243 abort();
244 return 0;
245 }
246 return ((u_int)offset >> 2) & 0x7ffff;
247}
248
249static uint32_t is_mask(u_int value)
250{
251 return value && ((value + 1) & value) == 0;
252}
253
254// This function returns true if the argument contains a
255// non-empty sequence of ones (possibly rotated) with the remainder zero.
256static uint32_t is_rotated_mask(u_int value)
257{
258 if (value == 0 || value == ~0)
259 return 0;
260 if (is_mask((value - 1) | value))
261 return 1;
262 return is_mask((~value - 1) | ~value);
263}
264
265static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
266{
267 int lzeros, tzeros, ones;
268 assert(value != 0);
269 if (is_mask((value - 1) | value)) {
270 lzeros = __builtin_clz(value);
271 tzeros = __builtin_ctz(value);
272 ones = 32 - lzeros - tzeros;
273 *immr = (32 - tzeros) & 31;
274 *imms = ones - 1;
275 return;
276 }
277 value = ~value;
278 if (is_mask((value - 1) | value)) {
279 lzeros = __builtin_clz(value);
280 tzeros = __builtin_ctz(value);
281 ones = 32 - lzeros - tzeros;
282 *immr = lzeros;
283 *imms = 31 - ones;
284 return;
285 }
286 abort();
287}
288
289static void emit_mov(u_int rs, u_int rt)
290{
291 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
292 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
293}
294
295static void emit_mov64(u_int rs, u_int rt)
296{
297 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
298 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
299}
300
301static void emit_add(u_int rs1, u_int rs2, u_int rt)
302{
303 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
304 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
305}
306
307static void emit_add64(u_int rs1, u_int rs2, u_int rt)
308{
309 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
310 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
311}
312
313static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
314{
315 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
316 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
317}
318#define emit_adds_ptr emit_adds64
319
320static void emit_neg(u_int rs, u_int rt)
321{
322 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
323 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
324}
325
326static void emit_sub(u_int rs1, u_int rs2, u_int rt)
327{
328 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
329 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
330}
331
332static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
333{
334 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
335 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
336}
337
338static void emit_movz(u_int imm, u_int rt)
339{
340 assem_debug("movz %s,#%#x\n", regname[rt], imm);
341 output_w32(0x52800000 | imm16_rd(imm, rt));
342}
343
344static void emit_movz_lsl16(u_int imm, u_int rt)
345{
346 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
347 output_w32(0x52a00000 | imm16_rd(imm, rt));
348}
349
350static void emit_movn(u_int imm, u_int rt)
351{
352 assem_debug("movn %s,#%#x\n", regname[rt], imm);
353 output_w32(0x12800000 | imm16_rd(imm, rt));
354}
355
356static void emit_movn_lsl16(u_int imm,u_int rt)
357{
358 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
359 output_w32(0x12a00000 | imm16_rd(imm, rt));
360}
361
362static void emit_movk(u_int imm,u_int rt)
363{
364 assem_debug("movk %s,#%#x\n", regname[rt], imm);
365 output_w32(0x72800000 | imm16_rd(imm, rt));
366}
367
368static void emit_movk_lsl16(u_int imm,u_int rt)
369{
370 assert(imm<65536);
371 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
372 output_w32(0x72a00000 | imm16_rd(imm, rt));
373}
374
375static void emit_zeroreg(u_int rt)
376{
377 emit_movz(0, rt);
378}
379
380static void emit_movimm(u_int imm, u_int rt)
381{
382 if (imm < 65536)
383 emit_movz(imm, rt);
384 else if ((~imm) < 65536)
385 emit_movn(~imm, rt);
386 else if ((imm&0xffff) == 0)
387 emit_movz_lsl16(imm >> 16, rt);
388 else if (((~imm)&0xffff) == 0)
389 emit_movn_lsl16(~imm >> 16, rt);
390 else if (is_rotated_mask(imm)) {
391 u_int immr, imms;
392 gen_logical_imm(imm, &immr, &imms);
393 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
394 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
395 }
396 else {
397 emit_movz(imm & 0xffff, rt);
398 emit_movk_lsl16(imm >> 16, rt);
399 }
400}
401
402static void emit_readword(void *addr, u_int rt)
403{
404 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
405 if (!(offset & 3) && offset <= 16380) {
406 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
407 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
408 }
409 else
410 abort();
411}
412
413static void emit_readdword(void *addr, u_int rt)
414{
415 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
416 if (!(offset & 7) && offset <= 32760) {
417 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
418 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
419 }
420 else
421 abort();
422}
423#define emit_readptr emit_readdword
424
425static void emit_readshword(void *addr, u_int rt)
426{
427 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
428 if (!(offset & 1) && offset <= 8190) {
429 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
430 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
431 }
432 else
433 assert(0);
434}
435
436static void emit_loadreg(u_int r, u_int hr)
437{
438 int is64 = 0;
439 if (r == 0)
440 emit_zeroreg(hr);
441 else {
442 void *addr;
443 switch (r) {
444 //case HIREG: addr = &hi; break;
445 //case LOREG: addr = &lo; break;
446 case CCREG: addr = &cycle_count; break;
447 case CSREG: addr = &Status; break;
448 case INVCP: addr = &invc_ptr; is64 = 1; break;
449 case ROREG: addr = &ram_offset; is64 = 1; break;
450 default:
451 assert(r < 34);
452 addr = &psxRegs.GPR.r[r];
453 break;
454 }
455 if (is64)
456 emit_readdword(addr, hr);
457 else
458 emit_readword(addr, hr);
459 }
460}
461
462static void emit_writeword(u_int rt, void *addr)
463{
464 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
465 if (!(offset & 3) && offset <= 16380) {
466 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
467 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
468 }
469 else
470 assert(0);
471}
472
473static void emit_writedword(u_int rt, void *addr)
474{
475 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
476 if (!(offset & 7) && offset <= 32760) {
477 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
478 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
479 }
480 else
481 abort();
482}
483
484static void emit_storereg(u_int r, u_int hr)
485{
486 assert(r < 64);
487 void *addr = &psxRegs.GPR.r[r];
488 switch (r) {
489 //case HIREG: addr = &hi; break;
490 //case LOREG: addr = &lo; break;
491 case CCREG: addr = &cycle_count; break;
492 default: assert(r < 34); break;
493 }
494 emit_writeword(hr, addr);
495}
496
497static void emit_test(u_int rs, u_int rt)
498{
499 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
500 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
501}
502
503static void emit_testimm(u_int rs, u_int imm)
504{
505 u_int immr, imms;
506 assem_debug("tst %s,#%#x\n", regname[rs], imm);
507 assert(is_rotated_mask(imm)); // good enough for PCSX
508 gen_logical_imm(imm, &immr, &imms);
509 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
510}
511
512static void emit_not(u_int rs,u_int rt)
513{
514 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
515 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
516}
517
518static void emit_and(u_int rs1,u_int rs2,u_int rt)
519{
520 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
521 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
522}
523
524static void emit_or(u_int rs1,u_int rs2,u_int rt)
525{
526 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
527 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
528}
529
530static void emit_bic(u_int rs1,u_int rs2,u_int rt)
531{
532 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
533 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
534}
535
536static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
537{
538 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
539 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
540}
541
542static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
543{
544 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
545 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
546}
547
548static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
549{
550 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
551 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
552}
553
554static void emit_xor(u_int rs1,u_int rs2,u_int rt)
555{
556 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
557 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
558}
559
560static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
561{
562 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
563 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
564}
565
566static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
567{
568 unused const char *st = s ? "s" : "";
569 s = s ? 0x20000000 : 0;
570 is64 = is64 ? 0x80000000 : 0;
571 if (imm < 4096) {
572 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
573 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
574 }
575 else if (-imm < 4096) {
576 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
577 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
578 }
579 else if (imm < 16777216) {
580 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
581 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
582 if ((imm & 0xfff) || s) {
583 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
584 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
585 }
586 }
587 else if (-imm < 16777216) {
588 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
589 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
590 if ((imm & 0xfff) || s) {
591 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
592 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
593 }
594 }
595 else
596 abort();
597}
598
599static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
600{
601 emit_addimm_s(0, 0, rs, imm, rt);
602}
603
604static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
605{
606 emit_addimm_s(0, 1, rs, imm, rt);
607}
608
609static void emit_addimm_and_set_flags(int imm, u_int rt)
610{
611 emit_addimm_s(1, 0, rt, imm, rt);
612}
613
614static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
615{
616 const char *names[] = { "and", "orr", "eor", "ands" };
617 const char *name = names[op];
618 u_int immr, imms;
619 op = op << 29;
620 if (is_rotated_mask(imm)) {
621 gen_logical_imm(imm, &immr, &imms);
622 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
623 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
624 }
625 else {
626 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
627 host_tempreg_acquire();
628 emit_movimm(imm, HOST_TEMPREG);
629 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
630 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
631 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
632 host_tempreg_release();
633 }
634 (void)name;
635}
636
637static void emit_andimm(u_int rs, u_int imm, u_int rt)
638{
639 if (imm == 0)
640 emit_zeroreg(rt);
641 else
642 emit_logicop_imm(0, rs, imm, rt);
643}
644
645static void emit_orimm(u_int rs, u_int imm, u_int rt)
646{
647 if (imm == 0) {
648 if (rs != rt)
649 emit_mov(rs, rt);
650 }
651 else
652 emit_logicop_imm(1, rs, imm, rt);
653}
654
655static void emit_xorimm(u_int rs, u_int imm, u_int rt)
656{
657 if (imm == 0) {
658 if (rs != rt)
659 emit_mov(rs, rt);
660 }
661 else
662 emit_logicop_imm(2, rs, imm, rt);
663}
664
665static void emit_sbfm(u_int rs,u_int imm,u_int rt)
666{
667 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
668 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
669}
670
671static void emit_ubfm(u_int rs,u_int imm,u_int rt)
672{
673 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
674 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
675}
676
677static void emit_shlimm(u_int rs,u_int imm,u_int rt)
678{
679 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
680 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
681}
682
683static void emit_shrimm(u_int rs,u_int imm,u_int rt)
684{
685 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
686 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
687}
688
689static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
690{
691 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
692 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
693}
694
695static void emit_sarimm(u_int rs,u_int imm,u_int rt)
696{
697 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
698 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
699}
700
701static void emit_rorimm(u_int rs,u_int imm,u_int rt)
702{
703 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
704 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
705}
706
707static void emit_signextend16(u_int rs, u_int rt)
708{
709 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
710 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
711}
712
713static void emit_shl(u_int rs,u_int rshift,u_int rt)
714{
715 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
716 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
717}
718
719static void emit_shr(u_int rs,u_int rshift,u_int rt)
720{
721 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
722 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
723}
724
725static void emit_sar(u_int rs,u_int rshift,u_int rt)
726{
727 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
728 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
729}
730
731static void emit_cmpimm(u_int rs, u_int imm)
732{
733 if (imm < 4096) {
734 assem_debug("cmp %s,%#x\n", regname[rs], imm);
735 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
736 }
737 else if (-imm < 4096) {
738 assem_debug("cmn %s,%#x\n", regname[rs], imm);
739 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
740 }
741 else if (imm < 16777216 && !(imm & 0xfff)) {
742 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
743 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
744 }
745 else {
746 host_tempreg_acquire();
747 emit_movimm(imm, HOST_TEMPREG);
748 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
749 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
750 host_tempreg_release();
751 }
752}
753
754static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
755{
756 assert(imm == 0 || imm == 1);
757 assert(cond0 < 0x10);
758 assert(cond1 < 0x10);
759 if (imm) {
760 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
761 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
762 } else {
763 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
764 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
765 }
766}
767
768static void emit_cmovne_imm(u_int imm,u_int rt)
769{
770 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
771}
772
773static void emit_cmovl_imm(u_int imm,u_int rt)
774{
775 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
776}
777
778static void emit_cmovb_imm(int imm,u_int rt)
779{
780 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
781}
782
783static void emit_cmoveq_reg(u_int rs,u_int rt)
784{
785 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
786 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
787}
788
789static void emit_cmovne_reg(u_int rs,u_int rt)
790{
791 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
792 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
793}
794
795static void emit_cmovl_reg(u_int rs,u_int rt)
796{
797 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
798 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
799}
800
801static void emit_cmovb_reg(u_int rs,u_int rt)
802{
803 assem_debug("csel %s,%s,%s,cc\n",regname[rt],regname[rs],regname[rt]);
804 output_w32(0x1a800000 | (COND_CC << 12) | rm_rn_rd(rt, rs, rt));
805}
806
807static void emit_cmovs_reg(u_int rs,u_int rt)
808{
809 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
810 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
811}
812
813static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
814{
815 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
816 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
817}
818
819static void emit_slti32(u_int rs,int imm,u_int rt)
820{
821 if(rs!=rt) emit_zeroreg(rt);
822 emit_cmpimm(rs,imm);
823 if(rs==rt) emit_movimm(0,rt);
824 emit_cmovl_imm(1,rt);
825}
826
827static void emit_sltiu32(u_int rs,int imm,u_int rt)
828{
829 if(rs!=rt) emit_zeroreg(rt);
830 emit_cmpimm(rs,imm);
831 if(rs==rt) emit_movimm(0,rt);
832 emit_cmovb_imm(1,rt);
833}
834
835static void emit_cmp(u_int rs,u_int rt)
836{
837 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
838 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
839}
840
841static void emit_set_gz32(u_int rs, u_int rt)
842{
843 //assem_debug("set_gz32\n");
844 emit_cmpimm(rs,1);
845 emit_movimm(1,rt);
846 emit_cmovl_imm(0,rt);
847}
848
849static void emit_set_nz32(u_int rs, u_int rt)
850{
851 //assem_debug("set_nz32\n");
852 if(rs!=rt) emit_mov(rs,rt);
853 emit_test(rs,rs);
854 emit_cmovne_imm(1,rt);
855}
856
857static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
858{
859 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
860 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
861 emit_cmp(rs1,rs2);
862 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
863 emit_cmovl_imm(1,rt);
864}
865
866static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
867{
868 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
869 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
870 emit_cmp(rs1,rs2);
871 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
872 emit_cmovb_imm(1,rt);
873}
874
875static int can_jump_or_call(const void *a)
876{
877 intptr_t diff = (u_char *)a - out;
878 return (-134217728 <= diff && diff <= 134217727);
879}
880
881static void emit_call(const void *a)
882{
883 intptr_t diff = (u_char *)a - out;
884 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
885 assert(!(diff & 3));
886 if (-134217728 <= diff && diff <= 134217727)
887 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
888 else
889 abort();
890}
891
892static void emit_jmp(const void *a)
893{
894 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
895 u_int offset = genjmp(a);
896 output_w32(0x14000000 | offset);
897}
898
899static void emit_jne(const void *a)
900{
901 assem_debug("bne %p\n", a);
902 u_int offset = genjmpcc(a);
903 output_w32(0x54000000 | (offset << 5) | COND_NE);
904}
905
906static void emit_jeq(const void *a)
907{
908 assem_debug("beq %p\n", a);
909 u_int offset = genjmpcc(a);
910 output_w32(0x54000000 | (offset << 5) | COND_EQ);
911}
912
913static void emit_js(const void *a)
914{
915 assem_debug("bmi %p\n", a);
916 u_int offset = genjmpcc(a);
917 output_w32(0x54000000 | (offset << 5) | COND_MI);
918}
919
920static void emit_jns(const void *a)
921{
922 assem_debug("bpl %p\n", a);
923 u_int offset = genjmpcc(a);
924 output_w32(0x54000000 | (offset << 5) | COND_PL);
925}
926
927static void emit_jl(const void *a)
928{
929 assem_debug("blt %p\n", a);
930 u_int offset = genjmpcc(a);
931 output_w32(0x54000000 | (offset << 5) | COND_LT);
932}
933
934static void emit_jge(const void *a)
935{
936 assem_debug("bge %p\n", a);
937 u_int offset = genjmpcc(a);
938 output_w32(0x54000000 | (offset << 5) | COND_GE);
939}
940
941static void emit_jno(const void *a)
942{
943 assem_debug("bvc %p\n", a);
944 u_int offset = genjmpcc(a);
945 output_w32(0x54000000 | (offset << 5) | COND_VC);
946}
947
948static void emit_jc(const void *a)
949{
950 assem_debug("bcs %p\n", a);
951 u_int offset = genjmpcc(a);
952 output_w32(0x54000000 | (offset << 5) | COND_CS);
953}
954
955static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
956{
957 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
958 u_int offset = genjmpcc(a);
959 is64 = is64 ? 0x80000000 : 0;
960 isnz = isnz ? 0x01000000 : 0;
961 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
962}
963
964static unused void emit_cbz(const void *a, u_int r)
965{
966 emit_cb(0, 0, a, r);
967}
968
969static void emit_jmpreg(u_int r)
970{
971 assem_debug("br %s\n", regname64[r]);
972 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
973}
974
975static void emit_retreg(u_int r)
976{
977 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
978 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
979}
980
981static void emit_ret(void)
982{
983 emit_retreg(LR);
984}
985
986static void emit_adr(void *addr, u_int rt)
987{
988 intptr_t offset = (u_char *)addr - out;
989 assert(-1048576 <= offset && offset < 1048576);
990 assert(rt < 31);
991 assem_debug("adr x%d,#%#lx\n", rt, offset);
992 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
993}
994
995static void emit_adrp(void *addr, u_int rt)
996{
997 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
998 assert(-4294967296l <= offset && offset < 4294967296l);
999 assert(rt < 31);
1000 offset >>= 12;
1001 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1002 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1003}
1004
1005static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1006{
1007 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1008 assert(-256 <= offset && offset < 256);
1009 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1010}
1011
1012static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1013{
1014 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1015 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1016}
1017
1018static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1019{
1020 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1021 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1022}
1023
1024static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1025{
1026 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1027 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1028}
1029
1030static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1031{
1032 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1033 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1034}
1035#define emit_readptr_dualindexedx_ptrlen emit_readdword_dualindexedx8
1036
1037static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1038{
1039 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1040 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1041}
1042
1043static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1044{
1045 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1046 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1047}
1048
1049static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1050{
1051 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1052 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1053}
1054
1055static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1056{
1057 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1058 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1059}
1060
1061static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1062{
1063 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1064 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1065}
1066
1067static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1068{
1069 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1070 assert(-256 <= offset && offset < 256);
1071 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1072}
1073
1074static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1075{
1076 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1077 assert(-256 <= offset && offset < 256);
1078 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1079}
1080
1081static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1082{
1083 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1084 assert(-256 <= offset && offset < 256);
1085 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1086}
1087
1088static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1089{
1090 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1091 assert(-256 <= offset && offset < 256);
1092 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1093}
1094
1095static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1096{
1097 if (!(offset & 3) && (u_int)offset <= 16380) {
1098 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1099 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1100 }
1101 else if (-256 <= offset && offset < 256) {
1102 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1103 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1104 }
1105 else
1106 assert(0);
1107}
1108
1109static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1110{
1111 if (!(offset & 1) && (u_int)offset <= 8190) {
1112 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1113 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1114 }
1115 else if (-256 <= offset && offset < 256) {
1116 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1117 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1118 }
1119 else
1120 assert(0);
1121}
1122
1123static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1124{
1125 if ((u_int)offset < 4096) {
1126 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1127 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1128 }
1129 else if (-256 <= offset && offset < 256) {
1130 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1131 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1132 }
1133 else
1134 assert(0);
1135}
1136
1137static void emit_umull(u_int rs1, u_int rs2, u_int rt)
1138{
1139 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1140 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1141}
1142
1143static void emit_smull(u_int rs1, u_int rs2, u_int rt)
1144{
1145 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1146 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1147}
1148
1149static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1150{
1151 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1152 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1153}
1154
1155static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1156{
1157 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1158 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
1159}
1160
1161static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1162{
1163 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1164 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1165}
1166
1167static void emit_clz(u_int rs, u_int rt)
1168{
1169 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1170 output_w32(0x5ac01000 | rn_rd(rs, rt));
1171}
1172
1173// special case for checking invalid_code
1174static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1175{
1176 host_tempreg_acquire();
1177 emit_shrimm(r, 12, HOST_TEMPREG);
1178 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1179 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1180 emit_cmpimm(HOST_TEMPREG, imm);
1181 host_tempreg_release();
1182}
1183
1184// special for loadlr_assemble, rs2 is destroyed
1185static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1186{
1187 emit_shl(rs2, shift, rs2);
1188 emit_bic(rs1, rs2, rt);
1189}
1190
1191static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
1192{
1193 emit_shr(rs2, shift, rs2);
1194 emit_bic(rs1, rs2, rt);
1195}
1196
1197static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1198{
1199 u_int op = 0xb9000000;
1200 unused const char *ldst = is_st ? "st" : "ld";
1201 unused char rp = is64 ? 'x' : 'w';
1202 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1203 is64 = is64 ? 1 : 0;
1204 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1205 ofs = (ofs >> (2+is64));
1206 if (!is_st) op |= 0x00400000;
1207 if (is64) op |= 0x40000000;
1208 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1209}
1210
1211static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1212{
1213 u_int op = 0x29000000;
1214 unused const char *ldst = is_st ? "st" : "ld";
1215 unused char rp = is64 ? 'x' : 'w';
1216 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1217 is64 = is64 ? 1 : 0;
1218 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1219 ofs = (ofs >> (2+is64));
1220 assert(-64 <= ofs && ofs <= 63);
1221 ofs &= 0x7f;
1222 if (!is_st) op |= 0x00400000;
1223 if (is64) op |= 0x80000000;
1224 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1225}
1226
1227static void save_load_regs_all(int is_store, u_int reglist)
1228{
1229 int ofs = 0, c = 0;
1230 u_int r, pair[2];
1231 for (r = 0; reglist; r++, reglist >>= 1) {
1232 if (reglist & 1)
1233 pair[c++] = r;
1234 if (c == 2) {
1235 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1236 ofs += 8 * 2;
1237 c = 0;
1238 }
1239 }
1240 if (c) {
1241 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1242 ofs += 8;
1243 }
1244 assert(ofs <= SSP_CALLER_REGS);
1245}
1246
1247// Save registers before function call
1248static void save_regs(u_int reglist)
1249{
1250 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1251 save_load_regs_all(1, reglist);
1252}
1253
1254// Restore registers after function call
1255static void restore_regs(u_int reglist)
1256{
1257 reglist &= CALLER_SAVE_REGS;
1258 save_load_regs_all(0, reglist);
1259}
1260
1261/* Stubs/epilogue */
1262
1263static void literal_pool(int n)
1264{
1265 (void)literals;
1266}
1267
1268static void literal_pool_jumpover(int n)
1269{
1270}
1271
1272// parsed by get_pointer, find_extjump_insn
1273static void emit_extjump(u_char *addr, u_int target)
1274{
1275 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1276
1277 emit_movz(target & 0xffff, 0);
1278 emit_movk_lsl16(target >> 16, 0);
1279
1280 // addr is in the current recompiled block (max 256k)
1281 // offset shouldn't exceed +/-1MB
1282 emit_adr(addr, 1);
1283 emit_far_jump(dyna_linker);
1284}
1285
1286static void check_extjump2(void *src)
1287{
1288 u_int *ptr = src;
1289 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1290 (void)ptr;
1291}
1292
1293// put rt_val into rt, potentially making use of rs with value rs_val
1294static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1295{
1296 int diff = rt_val - rs_val;
1297 if ((-4096 < diff && diff < 4096)
1298 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
1299 emit_addimm(rs, diff, rt);
1300 else if (rt_val == ~rs_val)
1301 emit_not(rs, rt);
1302 else if (is_rotated_mask(rs_val ^ rt_val))
1303 emit_xorimm(rs, rs_val ^ rt_val, rt);
1304 else
1305 emit_movimm(rt_val, rt);
1306}
1307
1308// return 1 if the above function can do it's job cheaply
1309static int is_similar_value(u_int v1, u_int v2)
1310{
1311 int diff = v1 - v2;
1312 return (-4096 < diff && diff < 4096)
1313 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1314 || v1 == ~v2
1315 || is_rotated_mask(v1 ^ v2);
1316}
1317
1318static void emit_movimm_from64(u_int rs_val, u_int rs, uintptr_t rt_val, u_int rt)
1319{
1320 if (rt_val < 0x100000000ull) {
1321 emit_movimm_from(rs_val, rs, rt_val, rt);
1322 return;
1323 }
1324 // just move the whole thing. At least on Linux all addresses
1325 // seem to be 48bit, so 3 insns - not great not terrible
1326 assem_debug("movz %s,#%#lx\n", regname64[rt], rt_val & 0xffff);
1327 output_w32(0xd2800000 | imm16_rd(rt_val & 0xffff, rt));
1328 assem_debug("movk %s,#%#lx,lsl #16\n", regname64[rt], (rt_val >> 16) & 0xffff);
1329 output_w32(0xf2a00000 | imm16_rd((rt_val >> 16) & 0xffff, rt));
1330 assem_debug("movk %s,#%#lx,lsl #32\n", regname64[rt], (rt_val >> 32) & 0xffff);
1331 output_w32(0xf2c00000 | imm16_rd((rt_val >> 32) & 0xffff, rt));
1332 if (rt_val >> 48) {
1333 assem_debug("movk %s,#%#lx,lsl #48\n", regname64[rt], (rt_val >> 48) & 0xffff);
1334 output_w32(0xf2e00000 | imm16_rd((rt_val >> 48) & 0xffff, rt));
1335 }
1336}
1337
1338// trashes x2
1339static void pass_args64(u_int a0, u_int a1)
1340{
1341 if(a0==1&&a1==0) {
1342 // must swap
1343 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1344 }
1345 else if(a0!=0&&a1==0) {
1346 emit_mov64(a1,1);
1347 if (a0>=0) emit_mov64(a0,0);
1348 }
1349 else {
1350 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1351 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1352 }
1353}
1354
1355static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1356{
1357 switch(type) {
1358 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1359 case LOADBU_STUB:
1360 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1361 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1362 case LOADHU_STUB:
1363 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1364 case LOADW_STUB:
1365 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1366 default: assert(0);
1367 }
1368}
1369
1370#include "pcsxmem.h"
1371//#include "pcsxmem_inline.c"
1372
1373static void do_readstub(int n)
1374{
1375 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1376 set_jump_target(stubs[n].addr, out);
1377 enum stub_type type = stubs[n].type;
1378 int i = stubs[n].a;
1379 int rs = stubs[n].b;
1380 const struct regstat *i_regs = (void *)stubs[n].c;
1381 u_int reglist = stubs[n].e;
1382 const signed char *i_regmap = i_regs->regmap;
1383 int rt;
1384 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
1385 rt=get_reg(i_regmap,FTEMP);
1386 }else{
1387 rt=get_reg(i_regmap,dops[i].rt1);
1388 }
1389 assert(rs>=0);
1390 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1391 void *restore_jump = NULL, *handler_jump = NULL;
1392 reglist|=(1<<rs);
1393 for (r = 0; r < HOST_CCREG; r++) {
1394 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1395 temp = r;
1396 break;
1397 }
1398 }
1399 if(rt>=0&&dops[i].rt1!=0)
1400 reglist&=~(1<<rt);
1401 if(temp==-1) {
1402 save_regs(reglist);
1403 regs_saved=1;
1404 temp=(rs==0)?2:0;
1405 }
1406 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1407 temp2=1;
1408 emit_readdword(&mem_rtab,temp);
1409 emit_shrimm(rs,12,temp2);
1410 emit_readdword_dualindexedx8(temp,temp2,temp2);
1411 emit_adds64(temp2,temp2,temp2);
1412 handler_jump=out;
1413 emit_jc(0);
1414 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1415 switch(type) {
1416 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1417 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1418 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1419 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1420 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1421 default: assert(0);
1422 }
1423 }
1424 if(regs_saved) {
1425 restore_jump=out;
1426 emit_jmp(0); // jump to reg restore
1427 }
1428 else
1429 emit_jmp(stubs[n].retaddr); // return address
1430 set_jump_target(handler_jump, out);
1431
1432 if(!regs_saved)
1433 save_regs(reglist);
1434 void *handler=NULL;
1435 if(type==LOADB_STUB||type==LOADBU_STUB)
1436 handler=jump_handler_read8;
1437 if(type==LOADH_STUB||type==LOADHU_STUB)
1438 handler=jump_handler_read16;
1439 if(type==LOADW_STUB)
1440 handler=jump_handler_read32;
1441 assert(handler);
1442 pass_args64(rs,temp2);
1443 int cc=get_reg(i_regmap,CCREG);
1444 if(cc<0)
1445 emit_loadreg(CCREG,2);
1446 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1447 emit_far_call(handler);
1448 // (no cycle reload after read)
1449 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
1450 loadstore_extend(type,0,rt);
1451 }
1452 if(restore_jump)
1453 set_jump_target(restore_jump, out);
1454 restore_regs(reglist);
1455 emit_jmp(stubs[n].retaddr);
1456}
1457
1458static void inline_readstub(enum stub_type type, int i, u_int addr,
1459 const signed char regmap[], int target, int adj, u_int reglist)
1460{
1461 int rs=get_reg(regmap,target);
1462 int rt=get_reg(regmap,target);
1463 if(rs<0) rs=get_reg_temp(regmap);
1464 assert(rs>=0);
1465 u_int is_dynamic=0;
1466 uintptr_t host_addr = 0;
1467 void *handler;
1468 int cc=get_reg(regmap,CCREG);
1469 //if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
1470 // return;
1471 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1472 if (handler == NULL) {
1473 if(rt<0||dops[i].rt1==0)
1474 return;
1475 if (addr != host_addr)
1476 emit_movimm_from64(addr, rs, host_addr, rs);
1477 switch(type) {
1478 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1479 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1480 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1481 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1482 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1483 default: assert(0);
1484 }
1485 return;
1486 }
1487 is_dynamic = pcsxmem_is_handler_dynamic(addr);
1488 if (is_dynamic) {
1489 if(type==LOADB_STUB||type==LOADBU_STUB)
1490 handler=jump_handler_read8;
1491 if(type==LOADH_STUB||type==LOADHU_STUB)
1492 handler=jump_handler_read16;
1493 if(type==LOADW_STUB)
1494 handler=jump_handler_read32;
1495 }
1496
1497 // call a memhandler
1498 if(rt>=0&&dops[i].rt1!=0)
1499 reglist&=~(1<<rt);
1500 save_regs(reglist);
1501 if(target==0)
1502 emit_movimm(addr,0);
1503 else if(rs!=0)
1504 emit_mov(rs,0);
1505 if(cc<0)
1506 emit_loadreg(CCREG,2);
1507 emit_addimm(cc<0?2:cc,adj,2);
1508 if(is_dynamic) {
1509 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1510 emit_adrp((void *)l1, 1);
1511 emit_addimm64(1, l1 & 0xfff, 1);
1512 }
1513 else
1514 emit_far_call(do_memhandler_pre);
1515
1516 emit_far_call(handler);
1517
1518 // (no cycle reload after read)
1519 if(rt>=0&&dops[i].rt1!=0)
1520 loadstore_extend(type, 0, rt);
1521 restore_regs(reglist);
1522}
1523
1524static void do_writestub(int n)
1525{
1526 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1527 set_jump_target(stubs[n].addr, out);
1528 enum stub_type type=stubs[n].type;
1529 int i=stubs[n].a;
1530 int rs=stubs[n].b;
1531 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1532 u_int reglist=stubs[n].e;
1533 signed char *i_regmap=i_regs->regmap;
1534 int rt,r;
1535 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
1536 rt=get_reg(i_regmap,r=FTEMP);
1537 }else{
1538 rt=get_reg(i_regmap,r=dops[i].rs2);
1539 }
1540 assert(rs>=0);
1541 assert(rt>=0);
1542 int rtmp,temp=-1,temp2,regs_saved=0;
1543 void *restore_jump = NULL, *handler_jump = NULL;
1544 int reglist2=reglist|(1<<rs)|(1<<rt);
1545 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1546 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1547 temp = rtmp;
1548 break;
1549 }
1550 }
1551 if(temp==-1) {
1552 save_regs(reglist);
1553 regs_saved=1;
1554 for(rtmp=0;rtmp<=3;rtmp++)
1555 if(rtmp!=rs&&rtmp!=rt)
1556 {temp=rtmp;break;}
1557 }
1558 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1559 temp2=3;
1560 else {
1561 host_tempreg_acquire();
1562 temp2=HOST_TEMPREG;
1563 }
1564 emit_readdword(&mem_wtab,temp);
1565 emit_shrimm(rs,12,temp2);
1566 emit_readdword_dualindexedx8(temp,temp2,temp2);
1567 emit_adds64(temp2,temp2,temp2);
1568 handler_jump=out;
1569 emit_jc(0);
1570 switch(type) {
1571 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1572 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1573 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1574 default: assert(0);
1575 }
1576 if(regs_saved) {
1577 restore_jump=out;
1578 emit_jmp(0); // jump to reg restore
1579 }
1580 else
1581 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1582 set_jump_target(handler_jump, out);
1583
1584 if(!regs_saved)
1585 save_regs(reglist);
1586 void *handler=NULL;
1587 switch(type) {
1588 case STOREB_STUB: handler=jump_handler_write8; break;
1589 case STOREH_STUB: handler=jump_handler_write16; break;
1590 case STOREW_STUB: handler=jump_handler_write32; break;
1591 default: assert(0);
1592 }
1593 assert(handler);
1594 pass_args(rs,rt);
1595 if(temp2!=3) {
1596 emit_mov64(temp2,3);
1597 host_tempreg_release();
1598 }
1599 int cc=get_reg(i_regmap,CCREG);
1600 if(cc<0)
1601 emit_loadreg(CCREG,2);
1602 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
1603 // returns new cycle_count
1604 emit_far_call(handler);
1605 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
1606 if(cc<0)
1607 emit_storereg(CCREG,2);
1608 if(restore_jump)
1609 set_jump_target(restore_jump, out);
1610 restore_regs(reglist);
1611 emit_jmp(stubs[n].retaddr);
1612}
1613
1614static void inline_writestub(enum stub_type type, int i, u_int addr,
1615 const signed char regmap[], int target, int adj, u_int reglist)
1616{
1617 int rs = get_reg_temp(regmap);
1618 int rt = get_reg(regmap,target);
1619 assert(rs >= 0);
1620 assert(rt >= 0);
1621 uintptr_t host_addr = 0;
1622 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1623 if (handler == NULL) {
1624 if (addr != host_addr)
1625 emit_movimm_from64(addr, rs, host_addr, rs);
1626 switch (type) {
1627 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1628 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1629 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1630 default: assert(0);
1631 }
1632 return;
1633 }
1634
1635 // call a memhandler
1636 save_regs(reglist);
1637 emit_writeword(rs, &address); // some handlers still need it
1638 loadstore_extend(type, rt, 0);
1639 int cc, cc_use;
1640 cc = cc_use = get_reg(regmap, CCREG);
1641 if (cc < 0)
1642 emit_loadreg(CCREG, (cc_use = 2));
1643 emit_addimm(cc_use, adj, 2);
1644
1645 emit_far_call(do_memhandler_pre);
1646 emit_far_call(handler);
1647 emit_far_call(do_memhandler_post);
1648 emit_addimm(0, -adj, cc_use);
1649 if (cc < 0)
1650 emit_storereg(CCREG, cc_use);
1651 restore_regs(reglist);
1652}
1653
1654/* Special assem */
1655
1656static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
1657{
1658 save_load_regs_all(1, reglist);
1659 cop2_do_stall_check(op, i, i_regs, 0);
1660#ifdef PCNT
1661 emit_movimm(op, 0);
1662 emit_far_call(pcnt_gte_start);
1663#endif
1664 // pointer to cop2 regs
1665 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1666}
1667
1668static void c2op_epilogue(u_int op,u_int reglist)
1669{
1670#ifdef PCNT
1671 emit_movimm(op, 0);
1672 emit_far_call(pcnt_gte_end);
1673#endif
1674 save_load_regs_all(0, reglist);
1675}
1676
1677static void c2op_assemble(int i, const struct regstat *i_regs)
1678{
1679 u_int c2op=source[i]&0x3f;
1680 u_int hr,reglist_full=0,reglist;
1681 int need_flags,need_ir;
1682 for(hr=0;hr<HOST_REGS;hr++) {
1683 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1684 }
1685 reglist=reglist_full&CALLER_SAVE_REGS;
1686
1687 if (gte_handlers[c2op]!=NULL) {
1688 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1689 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1690 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1691 source[i],gte_unneeded[i+1],need_flags,need_ir);
1692 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
1693 need_flags=0;
1694 //int shift = (source[i] >> 19) & 1;
1695 //int lm = (source[i] >> 10) & 1;
1696 switch(c2op) {
1697 default:
1698 (void)need_ir;
1699 c2op_prologue(c2op, i, i_regs, reglist);
1700 emit_movimm(source[i],1); // opcode
1701 emit_writeword(1,&psxRegs.code);
1702 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1703 break;
1704 }
1705 c2op_epilogue(c2op,reglist);
1706 }
1707}
1708
1709static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1710{
1711 //value = value & 0x7ffff000;
1712 //if (value & 0x7f87e000) value |= 0x80000000;
1713 emit_andimm(sl, 0x7fffe000, temp);
1714 emit_testimm(temp, 0xff87ffff);
1715 emit_andimm(sl, 0x7ffff000, temp);
1716 host_tempreg_acquire();
1717 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1718 emit_cmovne_reg(HOST_TEMPREG, temp);
1719 host_tempreg_release();
1720 assert(0); // testing needed
1721}
1722
1723static void do_mfc2_31_one(u_int copr,signed char temp)
1724{
1725 emit_readshword(&reg_cop2d[copr],temp);
1726 emit_bicsar_imm(temp,31,temp);
1727 emit_cmpimm(temp,0xf80);
1728 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1729 emit_andimm(temp,0xf80,temp);
1730}
1731
1732static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1733{
1734 if (temp < 0) {
1735 host_tempreg_acquire();
1736 temp = HOST_TEMPREG;
1737 }
1738 do_mfc2_31_one(9,temp);
1739 emit_shrimm(temp,7,tl);
1740 do_mfc2_31_one(10,temp);
1741 emit_orrshr_imm(temp,2,tl);
1742 do_mfc2_31_one(11,temp);
1743 emit_orrshl_imm(temp,3,tl);
1744 emit_writeword(tl,&reg_cop2d[29]);
1745
1746 if (temp == HOST_TEMPREG)
1747 host_tempreg_release();
1748}
1749
1750static void multdiv_assemble_arm64(int i, const struct regstat *i_regs)
1751{
1752 // case 0x18: MULT
1753 // case 0x19: MULTU
1754 // case 0x1A: DIV
1755 // case 0x1B: DIVU
1756 if(dops[i].rs1&&dops[i].rs2)
1757 {
1758 switch(dops[i].opcode2)
1759 {
1760 case 0x18: // MULT
1761 case 0x19: // MULTU
1762 {
1763 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
1764 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
1765 signed char hi=get_reg(i_regs->regmap,HIREG);
1766 signed char lo=get_reg(i_regs->regmap,LOREG);
1767 assert(m1>=0);
1768 assert(m2>=0);
1769 assert(hi>=0);
1770 assert(lo>=0);
1771
1772 if(dops[i].opcode2==0x18) // MULT
1773 emit_smull(m1,m2,hi);
1774 else // MULTU
1775 emit_umull(m1,m2,hi);
1776
1777 emit_mov(hi,lo);
1778 emit_shrimm64(hi,32,hi);
1779 break;
1780 }
1781 case 0x1A: // DIV
1782 case 0x1B: // DIVU
1783 {
1784 signed char numerator=get_reg(i_regs->regmap,dops[i].rs1);
1785 signed char denominator=get_reg(i_regs->regmap,dops[i].rs2);
1786 signed char quotient=get_reg(i_regs->regmap,LOREG);
1787 signed char remainder=get_reg(i_regs->regmap,HIREG);
1788 assert(numerator>=0);
1789 assert(denominator>=0);
1790 assert(quotient>=0);
1791 assert(remainder>=0);
1792
1793 if (dops[i].opcode2 == 0x1A) // DIV
1794 emit_sdiv(numerator,denominator,quotient);
1795 else // DIVU
1796 emit_udiv(numerator,denominator,quotient);
1797 emit_msub(quotient,denominator,numerator,remainder);
1798
1799 // div 0 quotient (remainder is already correct)
1800 host_tempreg_acquire();
1801 if (dops[i].opcode2 == 0x1A) // DIV
1802 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1803 else
1804 emit_movimm(~0,HOST_TEMPREG);
1805 emit_test(denominator,denominator);
1806 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1807 host_tempreg_release();
1808 break;
1809 }
1810 default:
1811 assert(0);
1812 }
1813 }
1814 else
1815 {
1816 signed char hr=get_reg(i_regs->regmap,HIREG);
1817 signed char lr=get_reg(i_regs->regmap,LOREG);
1818 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
1819 {
1820 if (dops[i].rs1) {
1821 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
1822 assert(numerator >= 0);
1823 if (hr >= 0)
1824 emit_mov(numerator,hr);
1825 if (lr >= 0) {
1826 if (dops[i].opcode2 == 0x1A) // DIV
1827 emit_sub_asrimm(0,numerator,31,lr);
1828 else
1829 emit_movimm(~0,lr);
1830 }
1831 }
1832 else {
1833 if (hr >= 0) emit_zeroreg(hr);
1834 if (lr >= 0) emit_movimm(~0,lr);
1835 }
1836 }
1837 else
1838 {
1839 // Multiply by zero is zero.
1840 if (hr >= 0) emit_zeroreg(hr);
1841 if (lr >= 0) emit_zeroreg(lr);
1842 }
1843 }
1844}
1845#define multdiv_assemble multdiv_assemble_arm64
1846
1847static void do_jump_vaddr(u_int rs)
1848{
1849 if (rs != 0)
1850 emit_mov(rs, 0);
1851 emit_far_call(ndrc_get_addr_ht);
1852 emit_jmpreg(0);
1853}
1854
1855static void do_preload_rhash(u_int r) {
1856 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1857 // register. On ARM the hash can be done with a single instruction (below)
1858}
1859
1860static void do_preload_rhtbl(u_int ht) {
1861 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1862}
1863
1864static void do_rhash(u_int rs,u_int rh) {
1865 emit_andimm(rs, 0xf8, rh);
1866}
1867
1868static void do_miniht_load(int ht, u_int rh) {
1869 emit_add64(ht, rh, ht);
1870 emit_ldst(0, 0, rh, ht, 0);
1871}
1872
1873static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1874 emit_cmp(rh, rs);
1875 void *jaddr = out;
1876 emit_jeq(0);
1877 do_jump_vaddr(rs);
1878
1879 set_jump_target(jaddr, out);
1880 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1881 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1882 emit_jmpreg(ht);
1883}
1884
1885// parsed by set_jump_target?
1886static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1887 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1888 emit_movk(return_address&0xffff,rt);
1889 add_to_linker(out,return_address,1);
1890 emit_adr(out,temp);
1891 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1892 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1893}
1894
1895static void clear_cache_arm64(char *start, char *end)
1896{
1897 // Don't rely on GCC's __clear_cache implementation, as it caches
1898 // icache/dcache cache line sizes, that can vary between cores on
1899 // big.LITTLE architectures.
1900 uint64_t addr, ctr_el0;
1901 static size_t icache_line_size = 0xffff, dcache_line_size = 0xffff;
1902 size_t isize, dsize;
1903
1904 __asm__ volatile("mrs %0, ctr_el0" : "=r"(ctr_el0));
1905 isize = 4 << ((ctr_el0 >> 0) & 0xf);
1906 dsize = 4 << ((ctr_el0 >> 16) & 0xf);
1907
1908 // use the global minimum cache line size
1909 icache_line_size = isize = icache_line_size < isize ? icache_line_size : isize;
1910 dcache_line_size = dsize = dcache_line_size < dsize ? dcache_line_size : dsize;
1911
1912 /* If CTR_EL0.IDC is enabled, Data cache clean to the Point of Unification is
1913 not required for instruction to data coherence. */
1914 if ((ctr_el0 & (1 << 28)) == 0x0) {
1915 addr = (uint64_t)start & ~(uint64_t)(dsize - 1);
1916 for (; addr < (uint64_t)end; addr += dsize)
1917 // use "civac" instead of "cvau", as this is the suggested workaround for
1918 // Cortex-A53 errata 819472, 826319, 827319 and 824069.
1919 __asm__ volatile("dc civac, %0" : : "r"(addr) : "memory");
1920 }
1921 __asm__ volatile("dsb ish" : : : "memory");
1922
1923 /* If CTR_EL0.DIC is enabled, Instruction cache cleaning to the Point of
1924 Unification is not required for instruction to data coherence. */
1925 if ((ctr_el0 & (1 << 29)) == 0x0) {
1926 addr = (uint64_t)start & ~(uint64_t)(isize - 1);
1927 for (; addr < (uint64_t)end; addr += isize)
1928 __asm__ volatile("ic ivau, %0" : : "r"(addr) : "memory");
1929
1930 __asm__ volatile("dsb ish" : : : "memory");
1931 }
1932
1933 __asm__ volatile("isb" : : : "memory");
1934}
1935
1936// CPU-architecture-specific initialization
1937static void arch_init(void)
1938{
1939 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops;
1940 struct tramp_insns *ops = ndrc->tramp.ops;
1941 size_t i;
1942 assert(!(diff & 3));
1943 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1944 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++) {
1945 ops[i].ldr = 0x58000000 | imm19_rt(diff >> 2, 17); // ldr x17, [=val]
1946 ops[i].br = 0xd61f0000 | rm_rn_rd(0, 17, 0); // br x17
1947 }
1948 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
1949}
1950
1951// vim:shiftwidth=2:expandtab