drc: adjust bogus looking check
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "arm_features.h"
24
25#if defined(BASE_ADDR_FIXED)
26#elif defined(BASE_ADDR_DYNAMIC)
27u_char *translation_cache;
28#else
29u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
30#endif
31static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
32
33#define CALLER_SAVE_REGS 0x0007ffff
34
35#define unused __attribute__((unused))
36
37void do_memhandler_pre();
38void do_memhandler_post();
39
40/* Linker */
41static void set_jump_target(void *addr, void *target)
42{
43 u_int *ptr = addr;
44 intptr_t offset = (u_char *)target - (u_char *)addr;
45
46 if((*ptr&0xFC000000)==0x14000000) {
47 assert(offset>=-134217728LL&&offset<134217728LL);
48 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
49 }
50 else if((*ptr&0xff000000)==0x54000000) {
51 // Conditional branch are limited to +/- 1MB
52 // block max size is 256k so branching beyond the +/- 1MB limit
53 // should only happen when jumping to an already compiled block (see add_link)
54 // a workaround would be to do a trampoline jump via a stub at the end of the block
55 assert(offset>=-1048576LL&&offset<1048576LL);
56 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
57 }
58 else if((*ptr&0x9f000000)==0x10000000) { //adr
59 // generated by do_miniht_insert
60 assert(offset>=-1048576LL&&offset<1048576LL);
61 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
62 }
63 else
64 assert(0); // should not happen
65}
66
67// from a pointer to external jump stub (which was produced by emit_extjump2)
68// find where the jumping insn is
69static void *find_extjump_insn(void *stub)
70{
71 int *ptr = (int *)stub + 2;
72 assert((*ptr&0x9f000000) == 0x10000000); // adr
73 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
74 return ptr + offset / 4;
75}
76
77// find where external branch is liked to using addr of it's stub:
78// get address that insn one after stub loads (dyna_linker arg1),
79// treat it as a pointer to branch insn,
80// return addr where that branch jumps to
81static void *get_pointer(void *stub)
82{
83 int *i_ptr = find_extjump_insn(stub);
84 assert((*i_ptr&0xfc000000) == 0x14000000); // b
85 return (u_char *)i_ptr+(((signed int)(*i_ptr<<6)>>6)<<2);
86}
87
88// Find the "clean" entry point from a "dirty" entry point
89// by skipping past the call to verify_code
90static void *get_clean_addr(void *addr)
91{
92 assert(0);
93 return NULL;
94}
95
96static int verify_dirty(u_int *ptr)
97{
98 assert(0);
99 return 0;
100}
101
102static int isclean(void *addr)
103{
104 u_int *ptr = addr;
105 return (*ptr >> 24) != 0x58; // the only place ldr (literal) is used
106}
107
108static uint64_t get_from_ldr_literal(const u_int *i)
109{
110 signed int ofs;
111 assert((i[0] & 0xff000000) == 0x58000000);
112 ofs = i[0] << 8;
113 ofs >>= 5+8;
114 return *(uint64_t *)(i + ofs);
115}
116
117static uint64_t get_from_movz(const u_int *i)
118{
119 assert((i[0] & 0x7fe00000) == 0x52800000);
120 return (i[0] >> 5) & 0xffff;
121}
122
123// get source that block at addr was compiled from (host pointers)
124static void get_bounds(void *addr, u_char **start, u_char **end)
125{
126 const u_int *ptr = addr;
127 assert((ptr[0] & 0xff00001f) == 0x58000001); // ldr x1, source
128 assert((ptr[1] & 0xff00001f) == 0x58000002); // ldr x2, copy
129 assert((ptr[2] & 0xffe0001f) == 0x52800003); // movz w3, #slen*4
130 *start = (u_char *)get_from_ldr_literal(&ptr[0]);
131 *end = *start + get_from_movz(&ptr[2]);
132}
133
134// Allocate a specific ARM register.
135static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
136{
137 int n;
138 int dirty=0;
139
140 // see if it's already allocated (and dealloc it)
141 for(n=0;n<HOST_REGS;n++)
142 {
143 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
144 dirty=(cur->dirty>>n)&1;
145 cur->regmap[n]=-1;
146 }
147 }
148
149 cur->regmap[hr]=reg;
150 cur->dirty&=~(1<<hr);
151 cur->dirty|=dirty<<hr;
152 cur->isconst&=~(1<<hr);
153}
154
155// Alloc cycle count into dedicated register
156static void alloc_cc(struct regstat *cur,int i)
157{
158 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
159}
160
161/* Special alloc */
162
163
164/* Assembler */
165
166static unused const char *regname[32] = {
167 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
168 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
169 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
170 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
171};
172
173static unused const char *regname64[32] = {
174 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
175 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
176 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
177 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
178};
179
180enum {
181 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
182 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
183};
184
185static unused const char *condname[16] = {
186 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
187 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
188};
189
190static void output_w32(u_int word)
191{
192 *((u_int *)out) = word;
193 out += 4;
194}
195
196static void output_w64(uint64_t dword)
197{
198 *((uint64_t *)out) = dword;
199 out+=8;
200}
201
202/*
203static u_int rm_rd(u_int rm, u_int rd)
204{
205 assert(rm < 31);
206 assert(rd < 31);
207 return (rm << 16) | rd;
208}
209*/
210
211static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
212{
213 assert(rm < 32);
214 assert(rn < 32);
215 assert(rd < 32);
216 return (rm << 16) | (rn << 5) | rd;
217}
218
219static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
220{
221 assert(imm7 < 0x80);
222 assert(rt2 < 31);
223 assert(rn < 32);
224 assert(rt < 31);
225 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
226}
227
228static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
229{
230 assert(imm6 <= 63);
231 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
232}
233
234static u_int imm16_rd(u_int imm16, u_int rd)
235{
236 assert(imm16 < 0x10000);
237 assert(rd < 31);
238 return (imm16 << 5) | rd;
239}
240
241static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
242{
243 assert(imm12 < 0x1000);
244 assert(rn < 32);
245 assert(rd < 32);
246 return (imm12 << 10) | (rn << 5) | rd;
247}
248
249static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
250{
251 assert(imm9 < 0x200);
252 assert(rn < 31);
253 assert(rd < 31);
254 return (imm9 << 12) | (rn << 5) | rd;
255}
256
257static u_int imm19_rt(u_int imm19, u_int rt)
258{
259 assert(imm19 < 0x80000);
260 assert(rt < 31);
261 return (imm19 << 5) | rt;
262}
263
264static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
265{
266 assert(n < 2);
267 assert(immr < 0x40);
268 assert(imms < 0x40);
269 assert(rn < 32);
270 assert(rd < 32);
271 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
272}
273
274static u_int genjmp(const u_char *addr)
275{
276 intptr_t offset = addr - out;
277 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
278 if (offset < -134217728 || offset > 134217727) {
279 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
280 abort();
281 return 0;
282 }
283 return ((u_int)offset >> 2) & 0x03ffffff;
284}
285
286static u_int genjmpcc(const u_char *addr)
287{
288 intptr_t offset = addr - out;
289 if ((uintptr_t)addr < 3) return 0;
290 if (offset < -1048576 || offset > 1048572) {
291 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
292 abort();
293 return 0;
294 }
295 return ((u_int)offset >> 2) & 0x7ffff;
296}
297
298static uint32_t is_mask(u_int value)
299{
300 return value && ((value + 1) & value) == 0;
301}
302
303// This function returns true if the argument contains a
304// non-empty sequence of ones (possibly rotated) with the remainder zero.
305static uint32_t is_rotated_mask(u_int value)
306{
307 if (value == 0)
308 return 0;
309 if (is_mask((value - 1) | value))
310 return 1;
311 return is_mask((~value - 1) | ~value);
312}
313
314static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
315{
316 int lzeros, tzeros, ones;
317 assert(value != 0);
318 if (is_mask((value - 1) | value)) {
319 lzeros = __builtin_clz(value);
320 tzeros = __builtin_ctz(value);
321 ones = 32 - lzeros - tzeros;
322 *immr = (32 - tzeros) & 31;
323 *imms = ones - 1;
324 return;
325 }
326 value = ~value;
327 if (is_mask((value - 1) | value)) {
328 lzeros = __builtin_clz(value);
329 tzeros = __builtin_ctz(value);
330 ones = 32 - lzeros - tzeros;
331 *immr = 31 - tzeros;
332 *imms = 31 - ones;
333 return;
334 }
335 assert(0);
336}
337
338static void emit_mov(u_int rs, u_int rt)
339{
340 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
341 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
342}
343
344static void emit_mov64(u_int rs, u_int rt)
345{
346 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
347 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
348}
349
350static void emit_movs(u_int rs, u_int rt)
351{
352 assert(0); // misleading
353 assem_debug("movs %s,%s\n", regname[rt], regname[rs]);
354 output_w32(0x31000000 | imm12_rn_rd(0, rs, rt));
355}
356
357static void emit_add(u_int rs1, u_int rs2, u_int rt)
358{
359 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
360 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
361}
362
363static void emit_add64(u_int rs1, u_int rs2, u_int rt)
364{
365 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
366 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
367}
368
369#pragma GCC diagnostic ignored "-Wunused-function"
370static void emit_adds(u_int rs1, u_int rs2, u_int rt)
371{
372 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
373 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
374}
375
376static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
377{
378 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
379 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
380}
381
382static void emit_neg(u_int rs, u_int rt)
383{
384 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
385 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
386}
387
388static void emit_sub(u_int rs1, u_int rs2, u_int rt)
389{
390 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
391 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
392}
393
394static void emit_movz(u_int imm, u_int rt)
395{
396 assem_debug("movz %s,#%#x\n", regname[rt], imm);
397 output_w32(0x52800000 | imm16_rd(imm, rt));
398}
399
400static void emit_movz_lsl16(u_int imm, u_int rt)
401{
402 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
403 output_w32(0x52a00000 | imm16_rd(imm, rt));
404}
405
406static void emit_movn(u_int imm, u_int rt)
407{
408 assem_debug("movn %s,#%#x\n", regname[rt], imm);
409 output_w32(0x12800000 | imm16_rd(imm, rt));
410}
411
412static void emit_movn_lsl16(u_int imm,u_int rt)
413{
414 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
415 output_w32(0x12a00000 | imm16_rd(imm, rt));
416}
417
418static void emit_movk(u_int imm,u_int rt)
419{
420 assem_debug("movk %s,#%#x\n", regname[rt], imm);
421 output_w32(0x72800000 | imm16_rd(imm, rt));
422}
423
424static void emit_movk_lsl16(u_int imm,u_int rt)
425{
426 assert(imm<65536);
427 assem_debug("movk %s, #%#x, lsl #16\n", regname[rt], imm);
428 output_w32(0x72a00000 | imm16_rd(imm, rt));
429}
430
431static void emit_zeroreg(u_int rt)
432{
433 emit_movz(0, rt);
434}
435
436static void emit_movimm(u_int imm, u_int rt)
437{
438 if (imm < 65536)
439 emit_movz(imm, rt);
440 else if ((~imm) < 65536)
441 emit_movn(~imm, rt);
442 else if ((imm&0xffff) == 0)
443 emit_movz_lsl16(imm >> 16, rt);
444 else if (((~imm)&0xffff) == 0)
445 emit_movn_lsl16(~imm >> 16, rt);
446 else if (is_rotated_mask(imm)) {
447 u_int immr, imms;
448 gen_logical_imm(imm, &immr, &imms);
449 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
450 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
451 }
452 else {
453 emit_movz(imm & 0xffff, rt);
454 emit_movk_lsl16(imm >> 16, rt);
455 }
456}
457
458static void emit_readword(void *addr, u_int rt)
459{
460 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
461 if (!(offset & 3) && offset <= 16380) {
462 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
463 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
464 }
465 else
466 assert(0);
467}
468
469static void emit_readdword(void *addr, u_int rt)
470{
471 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
472 if (!(offset & 7) && offset <= 32760) {
473 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
474 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
475 }
476 else
477 assert(0);
478}
479
480static void emit_loadreg(u_int r, u_int hr)
481{
482 int is64 = 0;
483 assert(r < 64);
484 if (r == 0)
485 emit_zeroreg(hr);
486 else {
487 void *addr = &psxRegs.GPR.r[r];
488 switch (r) {
489 //case HIREG: addr = &hi; break;
490 //case LOREG: addr = &lo; break;
491 case CCREG: addr = &cycle_count; break;
492 case CSREG: addr = &Status; break;
493 case INVCP: addr = &invc_ptr; is64 = 1; break;
494 default: assert(r < 34); break;
495 }
496 if (is64)
497 emit_readdword(addr, hr);
498 else
499 emit_readword(addr, hr);
500 }
501}
502
503static void emit_writeword(u_int rt, void *addr)
504{
505 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
506 if (!(offset & 3) && offset <= 16380) {
507 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
508 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
509 }
510 else
511 assert(0);
512}
513
514static void emit_writedword(u_int rt, void *addr)
515{
516 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
517 if (!(offset & 7) && offset <= 32760) {
518 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
519 output_w32(0xf9000000 | imm12_rn_rd(offset >> 2, FP, rt));
520 }
521 else
522 assert(0);
523}
524
525static void emit_storereg(u_int r, u_int hr)
526{
527 assert(r < 64);
528 void *addr = &psxRegs.GPR.r[r];
529 switch (r) {
530 //case HIREG: addr = &hi; break;
531 //case LOREG: addr = &lo; break;
532 case CCREG: addr = &cycle_count; break;
533 default: assert(r < 34); break;
534 }
535 emit_writeword(hr, addr);
536}
537
538static void emit_test(u_int rs, u_int rt)
539{
540 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
541 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
542}
543
544static void emit_testimm(u_int rs, u_int imm)
545{
546 u_int immr, imms;
547 assem_debug("tst %s,#%#x\n", regname[rs], imm);
548 assert(is_rotated_mask(imm)); // good enough for PCSX
549 gen_logical_imm(imm, &immr, &imms);
550 output_w32(0xb9000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
551}
552
553static void emit_testeqimm(u_int rs,int imm)
554{
555 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
556 assert(0); // TODO eliminate emit_testeqimm
557}
558
559static void emit_not(u_int rs,u_int rt)
560{
561 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
562 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
563}
564
565static void emit_mvnmi(u_int rs,u_int rt)
566{
567 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
568 assert(0); // eliminate
569}
570
571static void emit_and(u_int rs1,u_int rs2,u_int rt)
572{
573 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
574 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
575}
576
577static void emit_or(u_int rs1,u_int rs2,u_int rt)
578{
579 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
580 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
581}
582
583static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
584{
585 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
586 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
587}
588
589static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
590{
591 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
592 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
593}
594
595static void emit_xor(u_int rs1,u_int rs2,u_int rt)
596{
597 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
598 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
599}
600
601static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
602{
603 unused const char *st = s ? "s" : "";
604 s = s ? 0x20000000 : 0;
605 is64 = is64 ? 0x80000000 : 0;
606 if (imm < 4096) {
607 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
608 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
609 }
610 else if (-imm < 4096) {
611 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
612 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
613 }
614 else if (imm < 16777216) {
615 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
616 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
617 if ((imm & 0xfff) || s) {
618 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
619 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rt, rt));
620 }
621 }
622 else if (-imm < 16777216) {
623 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
624 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
625 if ((imm & 0xfff) || s) {
626 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
627 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
628 }
629 }
630 else
631 assert(0);
632}
633
634static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
635{
636 emit_addimm_s(0, 0, rs, imm, rt);
637}
638
639static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
640{
641 emit_addimm_s(0, 1, rs, imm, rt);
642}
643
644static void emit_addimm_and_set_flags(int imm, u_int rt)
645{
646 emit_addimm_s(1, 0, rt, imm, rt);
647}
648
649static void emit_addimm_no_flags(u_int imm,u_int rt)
650{
651 emit_addimm(rt,imm,rt);
652}
653
654static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
655{
656 const char *names[] = { "and", "orr", "eor", "ands" };
657 const char *name = names[op];
658 u_int immr, imms;
659 op = op << 29;
660 if (is_rotated_mask(imm)) {
661 gen_logical_imm(imm, &immr, &imms);
662 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
663 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
664 }
665 else {
666 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
667 host_tempreg_acquire();
668 emit_movimm(imm, HOST_TEMPREG);
669 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
670 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
671 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
672 host_tempreg_release();
673 }
674 (void)name;
675}
676
677static void emit_andimm(u_int rs, u_int imm, u_int rt)
678{
679 if (imm == 0)
680 emit_zeroreg(rt);
681 else
682 emit_logicop_imm(0, rs, imm, rt);
683}
684
685static void emit_orimm(u_int rs, u_int imm, u_int rt)
686{
687 if (imm == 0) {
688 if (rs != rt)
689 emit_mov(rs, rt);
690 }
691 else
692 emit_logicop_imm(1, rs, imm, rt);
693}
694
695static void emit_xorimm(u_int rs, u_int imm, u_int rt)
696{
697 if (imm == 0) {
698 if (rs != rt)
699 emit_mov(rs, rt);
700 }
701 else
702 emit_logicop_imm(2, rs, imm, rt);
703}
704
705static void emit_sbfm(u_int rs,u_int imm,u_int rt)
706{
707 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
708 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
709}
710
711static void emit_ubfm(u_int rs,u_int imm,u_int rt)
712{
713 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
714 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
715}
716
717static void emit_shlimm(u_int rs,u_int imm,u_int rt)
718{
719 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
720 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
721}
722
723static unused void emit_lslpls_imm(u_int rs,int imm,u_int rt)
724{
725 assert(0); // eliminate
726}
727
728static void emit_shrimm(u_int rs,u_int imm,u_int rt)
729{
730 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
731 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
732}
733
734static void emit_sarimm(u_int rs,u_int imm,u_int rt)
735{
736 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
737 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
738}
739
740static void emit_rorimm(u_int rs,u_int imm,u_int rt)
741{
742 assem_debug("ror %s,%s,#%d",regname[rt],regname[rs],imm);
743 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
744}
745
746static void emit_signextend16(u_int rs, u_int rt)
747{
748 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
749 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
750}
751
752static void emit_shl(u_int rs,u_int rshift,u_int rt)
753{
754 assem_debug("lsl %s,%s,%s",regname[rt],regname[rs],regname[rshift]);
755 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
756}
757
758static void emit_shr(u_int rs,u_int rshift,u_int rt)
759{
760 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
761 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
762}
763
764static void emit_sar(u_int rs,u_int rshift,u_int rt)
765{
766 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
767 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
768}
769
770static void emit_cmpimm(u_int rs, u_int imm)
771{
772 if (imm < 4096) {
773 assem_debug("cmp %s,%#x\n", regname[rs], imm);
774 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
775 }
776 else if (-imm < 4096) {
777 assem_debug("cmn %s,%#x\n", regname[rs], imm);
778 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
779 }
780 else if (imm < 16777216 && !(imm & 0xfff)) {
781 assem_debug("cmp %s,#%#x,lsl #12\n", regname[rs], imm >> 12);
782 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
783 }
784 else {
785 host_tempreg_acquire();
786 emit_movimm(imm, HOST_TEMPREG);
787 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
788 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
789 host_tempreg_release();
790 }
791}
792
793static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
794{
795 assert(imm == 0 || imm == 1);
796 assert(cond0 < 0x10);
797 assert(cond1 < 0x10);
798 if (imm) {
799 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
800 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
801 } else {
802 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
803 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
804 }
805}
806
807static void emit_cmovne_imm(u_int imm,u_int rt)
808{
809 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
810}
811
812static void emit_cmovl_imm(u_int imm,u_int rt)
813{
814 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
815}
816
817static void emit_cmovb_imm(int imm,u_int rt)
818{
819 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
820}
821
822static void emit_cmovs_imm(int imm,u_int rt)
823{
824 emit_cmov_imm(COND_MI, COND_PL, imm, rt);
825}
826
827static void emit_cmovne_reg(u_int rs,u_int rt)
828{
829 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
830 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
831}
832
833static void emit_cmovl_reg(u_int rs,u_int rt)
834{
835 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
836 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
837}
838
839static void emit_cmovs_reg(u_int rs,u_int rt)
840{
841 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
842 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
843}
844
845static void emit_slti32(u_int rs,int imm,u_int rt)
846{
847 if(rs!=rt) emit_zeroreg(rt);
848 emit_cmpimm(rs,imm);
849 if(rs==rt) emit_movimm(0,rt);
850 emit_cmovl_imm(1,rt);
851}
852
853static void emit_sltiu32(u_int rs,int imm,u_int rt)
854{
855 if(rs!=rt) emit_zeroreg(rt);
856 emit_cmpimm(rs,imm);
857 if(rs==rt) emit_movimm(0,rt);
858 emit_cmovb_imm(1,rt);
859}
860
861static void emit_cmp(u_int rs,u_int rt)
862{
863 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
864 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
865}
866
867static void emit_set_gz32(u_int rs, u_int rt)
868{
869 //assem_debug("set_gz32\n");
870 emit_cmpimm(rs,1);
871 emit_movimm(1,rt);
872 emit_cmovl_imm(0,rt);
873}
874
875static void emit_set_nz32(u_int rs, u_int rt)
876{
877 //assem_debug("set_nz32\n");
878 if(rs!=rt) emit_mov(rs,rt);
879 emit_test(rs,rs);
880 emit_cmovne_imm(1,rt);
881}
882
883static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
884{
885 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
887 emit_cmp(rs1,rs2);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovl_imm(1,rt);
890}
891
892static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
893{
894 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
895 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
896 emit_cmp(rs1,rs2);
897 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
898 emit_cmovb_imm(1,rt);
899}
900
901static void emit_call(const void *a)
902{
903 intptr_t diff = (u_char *)a - out;
904 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
905 assert(!(diff & 3));
906 if (-134217728 <= diff && diff <= 134217727)
907 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
908 else
909 assert(0);
910}
911
912static void emit_jmp(const void *a)
913{
914 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
915 u_int offset = genjmp(a);
916 output_w32(0x14000000 | offset);
917}
918
919static void emit_jne(const void *a)
920{
921 assem_debug("bne %p\n", a);
922 u_int offset = genjmpcc(a);
923 output_w32(0x54000000 | (offset << 5) | COND_NE);
924}
925
926static void emit_jeq(const void *a)
927{
928 assem_debug("beq %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_EQ);
931}
932
933static void emit_js(const void *a)
934{
935 assem_debug("bmi %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_MI);
938}
939
940static void emit_jns(const void *a)
941{
942 assem_debug("bpl %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_PL);
945}
946
947static void emit_jl(const void *a)
948{
949 assem_debug("blt %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_LT);
952}
953
954static void emit_jge(const void *a)
955{
956 assem_debug("bge %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_GE);
959}
960
961static void emit_jno(const void *a)
962{
963 assem_debug("bvc %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_VC);
966}
967
968static void emit_jc(const void *a)
969{
970 assem_debug("bcs %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_CS);
973}
974
975static void emit_jcc(const void *a)
976{
977 assem_debug("bcc %p\n", a);
978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_CC);
980}
981
982static void emit_jmpreg(u_int r)
983{
984 assem_debug("br %s", regname64[r]);
985 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
986}
987
988static void emit_retreg(u_int r)
989{
990 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
991 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
992}
993
994static void emit_ret(void)
995{
996 emit_retreg(LR);
997}
998
999static void emit_adr(void *addr, u_int rt)
1000{
1001 intptr_t offset = (u_char *)addr - out;
1002 assert(-1048576 <= offset && offset < 1048576);
1003 assem_debug("adr x%d,#%#lx\n", rt, offset);
1004 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1005}
1006
1007static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1008{
1009 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1010 assert(-256 <= offset && offset < 256);
1011 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1012}
1013
1014static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1015{
1016 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1017 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1018}
1019
1020static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1021{
1022 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1023 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1024}
1025
1026static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1027{
1028 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1030}
1031
1032static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1033{
1034 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1036}
1037
1038static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1039{
1040 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1042}
1043
1044static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1045{
1046 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1047 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1048}
1049
1050static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1051{
1052 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1053 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1054}
1055
1056static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1057{
1058 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1059 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1060}
1061
1062static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1063{
1064 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1065 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
1066}
1067
1068static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1069{
1070 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1071 assert(-256 <= offset && offset < 256);
1072 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1073}
1074
1075static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1076{
1077 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1080}
1081
1082static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1083{
1084 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1087}
1088
1089static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1090{
1091 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1094}
1095
1096static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1097{
1098 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1099 if (!(offset & 3) && offset <= 16380)
1100 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1101 else
1102 assert(0);
1103}
1104
1105static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1106{
1107 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1108 if (!(offset & 1) && offset <= 8190)
1109 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1110 else
1111 assert(0);
1112}
1113
1114static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1115{
1116 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1117 if ((u_int)offset < 4096)
1118 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1119 else
1120 assert(0);
1121}
1122
1123static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1124{
1125 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1126 assert(rs1<16);
1127 assert(rs2<16);
1128 assert(hi<16);
1129 assert(lo<16);
1130 assert(0);
1131}
1132
1133static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1134{
1135 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1136 assert(rs1<16);
1137 assert(rs2<16);
1138 assert(hi<16);
1139 assert(lo<16);
1140 assert(0);
1141}
1142
1143static void emit_clz(u_int rs,u_int rt)
1144{
1145 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1146 assert(0);
1147}
1148
1149// special case for checking invalid_code
1150static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
1151{
1152 host_tempreg_acquire();
1153 emit_shrimm(r, 12, HOST_TEMPREG);
1154 assem_debug("ldrb %s,[%s,%s]",regname[HOST_TEMPREG],regname64[rbase],regname64[HOST_TEMPREG]);
1155 output_w32(0x38606800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1156 emit_cmpimm(HOST_TEMPREG, imm);
1157 host_tempreg_release();
1158}
1159
1160static void emit_orrne_imm(u_int rs,int imm,u_int rt)
1161{
1162 assem_debug("orrne %s,%s,#%#x\n",regname[rt],regname[rs],imm);
1163 assert(0);
1164}
1165
1166static void emit_andne_imm(u_int rs,int imm,u_int rt)
1167{
1168 assem_debug("andne %s,%s,#%#x\n",regname[rt],regname[rs],imm);
1169 assert(0);
1170}
1171
1172static unused void emit_addpl_imm(u_int rs,int imm,u_int rt)
1173{
1174 assem_debug("addpl %s,%s,#%#x\n",regname[rt],regname[rs],imm);
1175 assert(0);
1176}
1177
1178static void emit_loadlp_ofs(u_int ofs, u_int rt)
1179{
1180 output_w32(0x58000000 | imm19_rt(ofs, rt));
1181}
1182
1183static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
1184{
1185 u_int op = 0xb9000000;
1186 unused const char *ldst = is_st ? "st" : "ld";
1187 unused char rp = is64 ? 'x' : 'w';
1188 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1189 is64 = is64 ? 1 : 0;
1190 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1191 ofs = (ofs >> (2+is64));
1192 if (!is_st) op |= 0x00400000;
1193 if (is64) op |= 0x40000000;
1194 output_w32(op | imm12_rn_rd(ofs, rn, rt));
1195}
1196
1197static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
1198{
1199 u_int op = 0x29000000;
1200 unused const char *ldst = is_st ? "st" : "ld";
1201 unused char rp = is64 ? 'x' : 'w';
1202 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1203 is64 = is64 ? 1 : 0;
1204 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1205 ofs = (ofs >> (2+is64));
1206 assert(-64 <= ofs && ofs <= 63);
1207 ofs &= 0x7f;
1208 if (!is_st) op |= 0x00400000;
1209 if (is64) op |= 0x80000000;
1210 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
1211}
1212
1213static void save_load_regs_all(int is_store, u_int reglist)
1214{
1215 int ofs = 0, c = 0;
1216 u_int r, pair[2];
1217 for (r = 0; reglist; r++, reglist >>= 1) {
1218 if (reglist & 1)
1219 pair[c++] = r;
1220 if (c == 2) {
1221 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1222 ofs += 8 * 2;
1223 c = 0;
1224 }
1225 }
1226 if (c) {
1227 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1228 ofs += 8;
1229 }
1230 assert(ofs <= SSP_CALLER_REGS);
1231}
1232
1233// Save registers before function call
1234static void save_regs(u_int reglist)
1235{
1236 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
1237 save_load_regs_all(1, reglist);
1238}
1239
1240// Restore registers after function call
1241static void restore_regs(u_int reglist)
1242{
1243 reglist &= CALLER_SAVE_REGS;
1244 save_load_regs_all(0, reglist);
1245}
1246
1247/* Stubs/epilogue */
1248
1249static void literal_pool(int n)
1250{
1251 (void)literals;
1252}
1253
1254static void literal_pool_jumpover(int n)
1255{
1256}
1257
1258// parsed by get_pointer, find_extjump_insn
1259static void emit_extjump2(u_char *addr, u_int target, void *linker)
1260{
1261 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
1262
1263 emit_movz(target & 0xffff, 0);
1264 emit_movk_lsl16(target >> 16, 0);
1265
1266 // addr is in the current recompiled block (max 256k)
1267 // offset shouldn't exceed +/-1MB
1268 emit_adr(addr, 1);
1269 emit_jmp(linker);
1270}
1271
1272static void check_extjump2(void *src)
1273{
1274 u_int *ptr = src;
1275 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1276 (void)ptr;
1277}
1278
1279// put rt_val into rt, potentially making use of rs with value rs_val
1280static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
1281{
1282 int diff = rt_val - rs_val;
1283 if ((-4096 <= diff && diff < 4096)
1284 || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff)))
1285 emit_addimm(rs, diff, rt);
1286 else if (is_rotated_mask(rs_val ^ rt_val))
1287 emit_xorimm(rs, rs_val ^ rt_val, rt);
1288 else
1289 emit_movimm(rt_val, rt);
1290}
1291
1292// return 1 if the above function can do it's job cheaply
1293static int is_similar_value(u_int v1, u_int v2)
1294{
1295 int diff = v1 - v2;
1296 return (-4096 <= diff && diff < 4096)
1297 || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff))
1298 || is_rotated_mask(v1 ^ v2);
1299}
1300
1301// trashes r2
1302static void pass_args64(u_int a0, u_int a1)
1303{
1304 if(a0==1&&a1==0) {
1305 // must swap
1306 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1307 }
1308 else if(a0!=0&&a1==0) {
1309 emit_mov64(a1,1);
1310 if (a0>=0) emit_mov64(a0,0);
1311 }
1312 else {
1313 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1314 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1315 }
1316}
1317
1318static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1319{
1320 switch(type) {
1321 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1322 case LOADBU_STUB:
1323 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1324 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1325 case LOADHU_STUB:
1326 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1327 case LOADW_STUB:
1328 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1329 default: assert(0);
1330 }
1331}
1332
1333#include "pcsxmem.h"
1334//#include "pcsxmem_inline.c"
1335
1336static void do_readstub(int n)
1337{
1338 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
1339 set_jump_target(stubs[n].addr, out);
1340 enum stub_type type = stubs[n].type;
1341 int i = stubs[n].a;
1342 int rs = stubs[n].b;
1343 const struct regstat *i_regs = (void *)stubs[n].c;
1344 u_int reglist = stubs[n].e;
1345 const signed char *i_regmap = i_regs->regmap;
1346 int rt;
1347 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1348 rt=get_reg(i_regmap,FTEMP);
1349 }else{
1350 rt=get_reg(i_regmap,rt1[i]);
1351 }
1352 assert(rs>=0);
1353 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1354 void *restore_jump = NULL, *handler_jump = NULL;
1355 reglist|=(1<<rs);
1356 for (r = 0; r < HOST_CCREG; r++) {
1357 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1358 temp = r;
1359 break;
1360 }
1361 }
1362 if(rt>=0&&rt1[i]!=0)
1363 reglist&=~(1<<rt);
1364 if(temp==-1) {
1365 save_regs(reglist);
1366 regs_saved=1;
1367 temp=(rs==0)?2:0;
1368 }
1369 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1370 temp2=1;
1371 emit_readdword(&mem_rtab,temp);
1372 emit_shrimm(rs,12,temp2);
1373 emit_readdword_dualindexedx8(temp,temp2,temp2);
1374 emit_adds64(temp2,temp2,temp2);
1375 handler_jump=out;
1376 emit_jc(0);
1377 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1378 switch(type) {
1379 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1380 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1381 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1382 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1383 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1384 default: assert(0);
1385 }
1386 }
1387 if(regs_saved) {
1388 restore_jump=out;
1389 emit_jmp(0); // jump to reg restore
1390 }
1391 else
1392 emit_jmp(stubs[n].retaddr); // return address
1393 set_jump_target(handler_jump, out);
1394
1395 if(!regs_saved)
1396 save_regs(reglist);
1397 void *handler=NULL;
1398 if(type==LOADB_STUB||type==LOADBU_STUB)
1399 handler=jump_handler_read8;
1400 if(type==LOADH_STUB||type==LOADHU_STUB)
1401 handler=jump_handler_read16;
1402 if(type==LOADW_STUB)
1403 handler=jump_handler_read32;
1404 assert(handler);
1405 pass_args64(rs,temp2);
1406 int cc=get_reg(i_regmap,CCREG);
1407 if(cc<0)
1408 emit_loadreg(CCREG,2);
1409 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1410 emit_call(handler);
1411 // (no cycle reload after read)
1412 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1413 loadstore_extend(type,0,rt);
1414 }
1415 if(restore_jump)
1416 set_jump_target(restore_jump, out);
1417 restore_regs(reglist);
1418 emit_jmp(stubs[n].retaddr);
1419}
1420
1421static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1422{
1423 int rs=get_reg(regmap,target);
1424 int rt=get_reg(regmap,target);
1425 if(rs<0) rs=get_reg(regmap,-1);
1426 assert(rs>=0);
1427 u_int is_dynamic=0;
1428 uintptr_t host_addr = 0;
1429 void *handler;
1430 int cc=get_reg(regmap,CCREG);
1431 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1432 // return;
1433 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1434 if (handler == NULL) {
1435 if(rt<0||rt1[i]==0)
1436 return;
1437 if (addr != host_addr) {
1438 if (host_addr >= 0x100000000ull)
1439 abort(); // ROREG not implemented
1440 emit_movimm_from(addr, rs, host_addr, rs);
1441 }
1442 switch(type) {
1443 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1444 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1445 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1446 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1447 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1448 default: assert(0);
1449 }
1450 return;
1451 }
1452 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1453 if(is_dynamic) {
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1460 }
1461
1462 // call a memhandler
1463 if(rt>=0&&rt1[i]!=0)
1464 reglist&=~(1<<rt);
1465 save_regs(reglist);
1466 if(target==0)
1467 emit_movimm(addr,0);
1468 else if(rs!=0)
1469 emit_mov(rs,0);
1470 if(cc<0)
1471 emit_loadreg(CCREG,2);
1472 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1473 if(is_dynamic)
1474 emit_readdword(&mem_rtab,1);
1475 else
1476 emit_call(do_memhandler_pre);
1477
1478 emit_call(handler);
1479
1480 // (no cycle reload after read)
1481 if(rt>=0&&rt1[i]!=0)
1482 loadstore_extend(type, 0, rt);
1483 restore_regs(reglist);
1484}
1485
1486static void do_writestub(int n)
1487{
1488 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
1489 set_jump_target(stubs[n].addr, out);
1490 enum stub_type type=stubs[n].type;
1491 int i=stubs[n].a;
1492 int rs=stubs[n].b;
1493 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1494 u_int reglist=stubs[n].e;
1495 signed char *i_regmap=i_regs->regmap;
1496 int rt,r;
1497 if(itype[i]==C1LS||itype[i]==C2LS) {
1498 rt=get_reg(i_regmap,r=FTEMP);
1499 }else{
1500 rt=get_reg(i_regmap,r=rs2[i]);
1501 }
1502 assert(rs>=0);
1503 assert(rt>=0);
1504 int rtmp,temp=-1,temp2,regs_saved=0;
1505 void *restore_jump = NULL, *handler_jump = NULL;
1506 int reglist2=reglist|(1<<rs)|(1<<rt);
1507 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1508 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1509 temp = rtmp;
1510 break;
1511 }
1512 }
1513 if(temp==-1) {
1514 save_regs(reglist);
1515 regs_saved=1;
1516 for(rtmp=0;rtmp<=3;rtmp++)
1517 if(rtmp!=rs&&rtmp!=rt)
1518 {temp=rtmp;break;}
1519 }
1520 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1521 temp2=3;
1522 else {
1523 host_tempreg_acquire();
1524 temp2=HOST_TEMPREG;
1525 }
1526 emit_readdword(&mem_wtab,temp);
1527 emit_shrimm(rs,12,temp2);
1528 emit_readdword_dualindexedx8(temp,temp2,temp2);
1529 emit_adds64(temp2,temp2,temp2);
1530 handler_jump=out;
1531 emit_jc(0);
1532 switch(type) {
1533 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1534 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1535 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1536 default: assert(0);
1537 }
1538 if(regs_saved) {
1539 restore_jump=out;
1540 emit_jmp(0); // jump to reg restore
1541 }
1542 else
1543 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1544 set_jump_target(handler_jump, out);
1545
1546 // TODO FIXME: regalloc should prefer callee-saved regs
1547 if(!regs_saved)
1548 save_regs(reglist);
1549 void *handler=NULL;
1550 switch(type) {
1551 case STOREB_STUB: handler=jump_handler_write8; break;
1552 case STOREH_STUB: handler=jump_handler_write16; break;
1553 case STOREW_STUB: handler=jump_handler_write32; break;
1554 default: assert(0);
1555 }
1556 assert(handler);
1557 pass_args(rs,rt);
1558 if(temp2!=3) {
1559 emit_mov64(temp2,3);
1560 host_tempreg_release();
1561 }
1562 int cc=get_reg(i_regmap,CCREG);
1563 if(cc<0)
1564 emit_loadreg(CCREG,2);
1565 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1566 // returns new cycle_count
1567 emit_call(handler);
1568 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1569 if(cc<0)
1570 emit_storereg(CCREG,2);
1571 if(restore_jump)
1572 set_jump_target(restore_jump, out);
1573 restore_regs(reglist);
1574 emit_jmp(stubs[n].retaddr);
1575}
1576
1577static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1578{
1579 int rs = get_reg(regmap,-1);
1580 int rt = get_reg(regmap,target);
1581 assert(rs >= 0);
1582 assert(rt >= 0);
1583 uintptr_t host_addr = 0;
1584 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1585 if (handler == NULL) {
1586 if (addr != host_addr) {
1587 if (host_addr >= 0x100000000ull)
1588 abort(); // ROREG not implemented
1589 emit_movimm_from(addr, rs, host_addr, rs);
1590 }
1591 switch (type) {
1592 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1593 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1594 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1595 default: assert(0);
1596 }
1597 return;
1598 }
1599
1600 // call a memhandler
1601 save_regs(reglist);
1602 emit_writeword(rs, &address); // some handlers still need it
1603 loadstore_extend(type, rt, 0);
1604 int cc, cc_use;
1605 cc = cc_use = get_reg(regmap, CCREG);
1606 if (cc < 0)
1607 emit_loadreg(CCREG, (cc_use = 2));
1608 emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2);
1609
1610 emit_call(do_memhandler_pre);
1611 emit_call(handler);
1612 emit_call(do_memhandler_post);
1613 emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use);
1614 if (cc < 0)
1615 emit_storereg(CCREG, cc_use);
1616 restore_regs(reglist);
1617}
1618
1619static void do_unalignedwritestub(int n)
1620{
1621 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
1622 assert(0);
1623}
1624
1625static void set_loadlp(u_int *loadl, void *lit)
1626{
1627 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1628 assert((*loadl & ~0x1f) == 0x58000000);
1629 assert((ofs & 3) == 0);
1630 assert(ofs < 0x100000);
1631 *loadl |= (ofs >> 2) << 5;
1632}
1633
1634// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1635static void do_dirty_stub_emit_args(u_int arg0)
1636{
1637 assert(slen <= MAXBLOCK);
1638 emit_loadlp_ofs(0, 1); // ldr x1, source
1639 emit_loadlp_ofs(0, 2); // ldr x2, copy
1640 emit_movz(slen*4, 3);
1641 emit_movz(arg0 & 0xffff, 0);
1642 emit_movk_lsl16(arg0 >> 16, 0);
1643}
1644
1645static void do_dirty_stub_emit_literals(u_int *loadlps)
1646{
1647 set_loadlp(&loadlps[0], out);
1648 output_w64((uintptr_t)source);
1649 set_loadlp(&loadlps[1], out);
1650 output_w64((uintptr_t)copy);
1651}
1652
1653static void *do_dirty_stub(int i)
1654{
1655 assem_debug("do_dirty_stub %x\n",start+i*4);
1656 u_int *loadlps = (void *)out;
1657 do_dirty_stub_emit_args(start + i*4);
1658 emit_call(verify_code);
1659 void *entry = out;
1660 load_regs_entry(i);
1661 if (entry == out)
1662 entry = instr_addr[i];
1663 emit_jmp(instr_addr[i]);
1664 do_dirty_stub_emit_literals(loadlps);
1665 return entry;
1666}
1667
1668static void do_dirty_stub_ds()
1669{
1670 do_dirty_stub_emit_args(start + 1);
1671 u_int *loadlps = (void *)out;
1672 emit_call(verify_code_ds);
1673 emit_jmp(out + 8*2);
1674 do_dirty_stub_emit_literals(loadlps);
1675}
1676
1677/* Special assem */
1678
1679#define shift_assemble shift_assemble_arm64
1680
1681static void shift_assemble_arm64(int i,struct regstat *i_regs)
1682{
1683 assert(0);
1684}
1685#define loadlr_assemble loadlr_assemble_arm64
1686
1687static void loadlr_assemble_arm64(int i,struct regstat *i_regs)
1688{
1689 assert(0);
1690}
1691
1692static void c2op_assemble(int i,struct regstat *i_regs)
1693{
1694 assert(0);
1695}
1696
1697static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1698{
1699 assert(0);
1700}
1701#define multdiv_assemble multdiv_assemble_arm64
1702
1703static void do_jump_vaddr(u_int rs)
1704{
1705 if (rs != 0)
1706 emit_mov(rs, 0);
1707 emit_call(get_addr_ht);
1708 emit_jmpreg(0);
1709}
1710
1711static void do_preload_rhash(u_int r) {
1712 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1713 // register. On ARM the hash can be done with a single instruction (below)
1714}
1715
1716static void do_preload_rhtbl(u_int ht) {
1717 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
1718}
1719
1720static void do_rhash(u_int rs,u_int rh) {
1721 emit_andimm(rs, 0xf8, rh);
1722}
1723
1724static void do_miniht_load(int ht, u_int rh) {
1725 emit_add64(ht, rh, ht);
1726 emit_ldst(0, 0, rh, ht, 0);
1727}
1728
1729static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1730 emit_cmp(rh, rs);
1731 void *jaddr = out;
1732 emit_jeq(0);
1733 do_jump_vaddr(rs);
1734
1735 set_jump_target(jaddr, out);
1736 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1737 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1738 emit_jmpreg(ht);
1739}
1740
1741// parsed by set_jump_target?
1742static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
1743 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1744 emit_movk(return_address&0xffff,rt);
1745 add_to_linker(out,return_address,1);
1746 emit_adr(out,temp);
1747 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1748 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
1749}
1750
1751static void mark_clear_cache(void *target)
1752{
1753 u_long offset = (u_char *)target - translation_cache;
1754 u_int mask = 1u << ((offset >> 12) & 31);
1755 if (!(needs_clear_cache[offset >> 17] & mask)) {
1756 char *start = (char *)((u_long)target & ~4095ul);
1757 start_tcache_write(start, start + 4096);
1758 needs_clear_cache[offset >> 17] |= mask;
1759 }
1760}
1761
1762// Clearing the cache is rather slow on ARM Linux, so mark the areas
1763// that need to be cleared, and then only clear these areas once.
1764static void do_clear_cache()
1765{
1766 int i,j;
1767 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
1768 {
1769 u_int bitmap=needs_clear_cache[i];
1770 if(bitmap) {
1771 u_char *start, *end;
1772 for(j=0;j<32;j++)
1773 {
1774 if(bitmap&(1<<j)) {
1775 start=translation_cache+i*131072+j*4096;
1776 end=start+4095;
1777 j++;
1778 while(j<32) {
1779 if(bitmap&(1<<j)) {
1780 end+=4096;
1781 j++;
1782 }else{
1783 end_tcache_write(start, end);
1784 break;
1785 }
1786 }
1787 }
1788 }
1789 needs_clear_cache[i]=0;
1790 }
1791 }
1792}
1793
1794// CPU-architecture-specific initialization
1795static void arch_init() {
1796}
1797
1798// vim:shiftwidth=2:expandtab