drc: arm64 wip
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
23#include "arm_features.h"
24
25#if defined(BASE_ADDR_FIXED)
26#elif defined(BASE_ADDR_DYNAMIC)
27u_char *translation_cache;
28#else
29u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
30#endif
d1e4ebd9 31static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
be516ebe 32
33#define CALLER_SAVE_REGS 0x0007ffff
34
35#define unused __attribute__((unused))
36
d1e4ebd9 37void do_memhandler_pre();
38void do_memhandler_post();
be516ebe 39
40/* Linker */
d1e4ebd9 41static void set_jump_target(void *addr, void *target)
be516ebe 42{
d1e4ebd9 43 u_int *ptr = addr;
44 intptr_t offset = (u_char *)target - (u_char *)addr;
45
46 if((*ptr&0xFC000000)==0x14000000) {
47 assert(offset>=-134217728LL&&offset<134217728LL);
48 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
49 }
50 else if((*ptr&0xff000000)==0x54000000) {
51 // Conditional branch are limited to +/- 1MB
52 // block max size is 256k so branching beyond the +/- 1MB limit
53 // should only happen when jumping to an already compiled block (see add_link)
54 // a workaround would be to do a trampoline jump via a stub at the end of the block
55 assert(offset>=-1048576LL&&offset<1048576LL);
56 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
57 }
58 else if((*ptr&0x9f000000)==0x10000000) { //adr
59 // generated by do_miniht_insert
60 assert(offset>=-1048576LL&&offset<1048576LL);
61 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
62 }
63 else
64 assert(0); // should not happen
be516ebe 65}
66
67// from a pointer to external jump stub (which was produced by emit_extjump2)
68// find where the jumping insn is
69static void *find_extjump_insn(void *stub)
70{
d1e4ebd9 71 int *ptr = (int *)stub + 2;
72 assert((*ptr&0x9f000000) == 0x10000000); // adr
73 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
74 return ptr + offset / 4;
be516ebe 75}
76
77// find where external branch is liked to using addr of it's stub:
78// get address that insn one after stub loads (dyna_linker arg1),
79// treat it as a pointer to branch insn,
80// return addr where that branch jumps to
81static void *get_pointer(void *stub)
82{
d1e4ebd9 83 int *i_ptr = find_extjump_insn(stub);
84 assert((*i_ptr&0xfc000000) == 0x14000000); // b
85 return (u_char *)i_ptr+(((signed int)(*i_ptr<<6)>>6)<<2);
be516ebe 86}
87
88// Find the "clean" entry point from a "dirty" entry point
89// by skipping past the call to verify_code
90static void *get_clean_addr(void *addr)
91{
92 assert(0);
93 return NULL;
94}
95
96static int verify_dirty(u_int *ptr)
97{
98 assert(0);
99 return 0;
100}
101
be516ebe 102static int isclean(void *addr)
103{
d1e4ebd9 104 u_int *ptr = addr;
105 return (*ptr >> 24) != 0x58; // the only place ldr (literal) is used
106}
107
108static uint64_t get_from_ldr_literal(const u_int *i)
109{
110 signed int ofs;
111 assert((i[0] & 0xff000000) == 0x58000000);
112 ofs = i[0] << 8;
113 ofs >>= 5+8;
114 return *(uint64_t *)(i + ofs);
115}
116
117static uint64_t get_from_movz(const u_int *i)
118{
119 assert((i[0] & 0x7fe00000) == 0x52800000);
120 return (i[0] >> 5) & 0xffff;
be516ebe 121}
122
123// get source that block at addr was compiled from (host pointers)
124static void get_bounds(void *addr, u_char **start, u_char **end)
125{
d1e4ebd9 126 const u_int *ptr = addr;
127 assert((ptr[0] & 0xff00001f) == 0x58000001); // ldr x1, source
128 assert((ptr[1] & 0xff00001f) == 0x58000002); // ldr x2, copy
129 assert((ptr[2] & 0xffe0001f) == 0x52800003); // movz w3, #slen*4
130 *start = (u_char *)get_from_ldr_literal(&ptr[0]);
131 *end = *start + get_from_movz(&ptr[2]);
be516ebe 132}
133
134// Allocate a specific ARM register.
135static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
136{
137 int n;
138 int dirty=0;
139
140 // see if it's already allocated (and dealloc it)
141 for(n=0;n<HOST_REGS;n++)
142 {
143 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
144 dirty=(cur->dirty>>n)&1;
145 cur->regmap[n]=-1;
146 }
147 }
148
149 cur->regmap[hr]=reg;
150 cur->dirty&=~(1<<hr);
151 cur->dirty|=dirty<<hr;
152 cur->isconst&=~(1<<hr);
153}
154
155// Alloc cycle count into dedicated register
156static void alloc_cc(struct regstat *cur,int i)
157{
158 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
159}
160
161/* Special alloc */
162
163
164/* Assembler */
165
166static unused const char *regname[32] = {
d1e4ebd9 167 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
168 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
169 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
170 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
171};
172
173static unused const char *regname64[32] = {
174 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
175 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
176 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
177 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
178};
179
180enum {
181 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
182 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
183};
184
185static unused const char *condname[16] = {
186 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
187 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 188};
189
be516ebe 190static void output_w32(u_int word)
191{
192 *((u_int *)out) = word;
193 out += 4;
194}
195
d1e4ebd9 196static void output_w64(uint64_t dword)
197{
198 *((uint64_t *)out) = dword;
199 out+=8;
200}
201
202/*
687b4580 203static u_int rm_rd(u_int rm, u_int rd)
204{
205 assert(rm < 31);
206 assert(rd < 31);
207 return (rm << 16) | rd;
208}
d1e4ebd9 209*/
687b4580 210
be516ebe 211static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
212{
d1e4ebd9 213 assert(rm < 32);
214 assert(rn < 32);
215 assert(rd < 32);
be516ebe 216 return (rm << 16) | (rn << 5) | rd;
217}
218
d1e4ebd9 219static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
220{
221 assert(imm7 < 0x80);
222 assert(rt2 < 31);
223 assert(rn < 32);
224 assert(rt < 31);
225 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
226}
227
687b4580 228static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
229{
230 assert(imm6 <= 63);
231 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
232}
233
be516ebe 234static u_int imm16_rd(u_int imm16, u_int rd)
235{
236 assert(imm16 < 0x10000);
237 assert(rd < 31);
238 return (imm16 << 5) | rd;
239}
240
687b4580 241static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
242{
243 assert(imm12 < 0x1000);
d1e4ebd9 244 assert(rn < 32);
245 assert(rd < 32);
246 return (imm12 << 10) | (rn << 5) | rd;
247}
248
249static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
250{
251 assert(imm9 < 0x200);
687b4580 252 assert(rn < 31);
253 assert(rd < 31);
d1e4ebd9 254 return (imm9 << 12) | (rn << 5) | rd;
687b4580 255}
256
d1e4ebd9 257static u_int imm19_rt(u_int imm19, u_int rt)
258{
259 assert(imm19 < 0x80000);
260 assert(rt < 31);
261 return (imm19 << 5) | rt;
262}
263
264static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
265{
266 assert(n < 2);
267 assert(immr < 0x40);
268 assert(imms < 0x40);
269 assert(rn < 32);
270 assert(rd < 32);
271 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
272}
273
274static u_int genjmp(const u_char *addr)
be516ebe 275{
276 intptr_t offset = addr - out;
d1e4ebd9 277 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 278 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 279 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
280 abort();
be516ebe 281 return 0;
282 }
d1e4ebd9 283 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 284}
285
d1e4ebd9 286static u_int genjmpcc(const u_char *addr)
be516ebe 287{
288 intptr_t offset = addr - out;
d1e4ebd9 289 if ((uintptr_t)addr < 3) return 0;
be516ebe 290 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 291 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
292 abort();
293 return 0;
294 }
295 return ((u_int)offset >> 2) & 0x7ffff;
296}
297
298static uint32_t is_mask(u_int value)
299{
300 return value && ((value + 1) & value) == 0;
301}
302
303// This function returns true if the argument contains a
304// non-empty sequence of ones (possibly rotated) with the remainder zero.
305static uint32_t is_rotated_mask(u_int value)
306{
307 if (value == 0)
be516ebe 308 return 0;
d1e4ebd9 309 if (is_mask((value - 1) | value))
310 return 1;
311 return is_mask((~value - 1) | ~value);
312}
313
314static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
315{
316 int lzeros, tzeros, ones;
317 assert(value != 0);
318 if (is_mask((value - 1) | value)) {
319 lzeros = __builtin_clz(value);
320 tzeros = __builtin_ctz(value);
321 ones = 32 - lzeros - tzeros;
322 *immr = (32 - tzeros) & 31;
323 *imms = ones - 1;
324 return;
be516ebe 325 }
d1e4ebd9 326 value = ~value;
327 if (is_mask((value - 1) | value)) {
328 lzeros = __builtin_clz(value);
329 tzeros = __builtin_ctz(value);
330 ones = 32 - lzeros - tzeros;
331 *immr = 31 - tzeros;
332 *imms = 31 - ones;
333 return;
334 }
335 assert(0);
be516ebe 336}
337
338static void emit_mov(u_int rs, u_int rt)
339{
687b4580 340 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 341 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
342}
343
344static void emit_mov64(u_int rs, u_int rt)
345{
346 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
347 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 348}
349
350static void emit_movs(u_int rs, u_int rt)
351{
d1e4ebd9 352 assert(0); // misleading
687b4580 353 assem_debug("movs %s,%s\n", regname[rt], regname[rs]);
354 output_w32(0x31000000 | imm12_rn_rd(0, rs, rt));
be516ebe 355}
356
687b4580 357static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 358{
d1e4ebd9 359 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
360 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 361}
362
d1e4ebd9 363static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 364{
d1e4ebd9 365 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
366 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 367}
368
d1e4ebd9 369#pragma GCC diagnostic ignored "-Wunused-function"
370static void emit_adds(u_int rs1, u_int rs2, u_int rt)
be516ebe 371{
d1e4ebd9 372 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
373 output_w32(0x2b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 374}
375
d1e4ebd9 376static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 377{
d1e4ebd9 378 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
379 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
380}
381
382static void emit_neg(u_int rs, u_int rt)
383{
384 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
385 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 386}
387
687b4580 388static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 389{
d1e4ebd9 390 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 391 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 392}
393
d1e4ebd9 394static void emit_movz(u_int imm, u_int rt)
be516ebe 395{
d1e4ebd9 396 assem_debug("movz %s,#%#x\n", regname[rt], imm);
397 output_w32(0x52800000 | imm16_rd(imm, rt));
398}
399
400static void emit_movz_lsl16(u_int imm, u_int rt)
401{
402 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
403 output_w32(0x52a00000 | imm16_rd(imm, rt));
404}
405
406static void emit_movn(u_int imm, u_int rt)
407{
408 assem_debug("movn %s,#%#x\n", regname[rt], imm);
409 output_w32(0x12800000 | imm16_rd(imm, rt));
410}
411
412static void emit_movn_lsl16(u_int imm,u_int rt)
413{
414 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
415 output_w32(0x12a00000 | imm16_rd(imm, rt));
416}
417
418static void emit_movk(u_int imm,u_int rt)
419{
420 assem_debug("movk %s,#%#x\n", regname[rt], imm);
421 output_w32(0x72800000 | imm16_rd(imm, rt));
422}
423
424static void emit_movk_lsl16(u_int imm,u_int rt)
425{
426 assert(imm<65536);
427 assem_debug("movk %s, #%#x, lsl #16\n", regname[rt], imm);
428 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 429}
430
431static void emit_zeroreg(u_int rt)
432{
d1e4ebd9 433 emit_movz(0, rt);
be516ebe 434}
435
be516ebe 436static void emit_movimm(u_int imm, u_int rt)
437{
d1e4ebd9 438 if (imm < 65536)
439 emit_movz(imm, rt);
440 else if ((~imm) < 65536)
441 emit_movn(~imm, rt);
442 else if ((imm&0xffff) == 0)
443 emit_movz_lsl16(imm >> 16, rt);
444 else if (((~imm)&0xffff) == 0)
445 emit_movn_lsl16(~imm >> 16, rt);
446 else if (is_rotated_mask(imm)) {
447 u_int immr, imms;
448 gen_logical_imm(imm, &immr, &imms);
449 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
450 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
451 }
be516ebe 452 else {
d1e4ebd9 453 emit_movz(imm & 0xffff, rt);
454 emit_movk_lsl16(imm >> 16, rt);
be516ebe 455 }
456}
457
687b4580 458static void emit_readword(void *addr, u_int rt)
459{
460 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
461 if (!(offset & 3) && offset <= 16380) {
462 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
463 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
464 }
465 else
466 assert(0);
467}
468
d1e4ebd9 469static void emit_readdword(void *addr, u_int rt)
470{
471 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
472 if (!(offset & 7) && offset <= 32760) {
473 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
474 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
475 }
476 else
477 assert(0);
478}
479
be516ebe 480static void emit_loadreg(u_int r, u_int hr)
481{
d1e4ebd9 482 int is64 = 0;
be516ebe 483 assert(r < 64);
484 if (r == 0)
485 emit_zeroreg(hr);
486 else {
7c3a5182 487 void *addr = &psxRegs.GPR.r[r];
be516ebe 488 switch (r) {
7c3a5182 489 //case HIREG: addr = &hi; break;
490 //case LOREG: addr = &lo; break;
be516ebe 491 case CCREG: addr = &cycle_count; break;
492 case CSREG: addr = &Status; break;
d1e4ebd9 493 case INVCP: addr = &invc_ptr; is64 = 1; break;
7c3a5182 494 default: assert(r < 34); break;
be516ebe 495 }
d1e4ebd9 496 if (is64)
497 emit_readdword(addr, hr);
498 else
499 emit_readword(addr, hr);
be516ebe 500 }
501}
502
687b4580 503static void emit_writeword(u_int rt, void *addr)
504{
505 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
506 if (!(offset & 3) && offset <= 16380) {
507 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
508 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
509 }
510 else
511 assert(0);
512}
513
d1e4ebd9 514static void emit_writedword(u_int rt, void *addr)
515{
516 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
517 if (!(offset & 7) && offset <= 32760) {
518 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
519 output_w32(0xf9000000 | imm12_rn_rd(offset >> 2, FP, rt));
520 }
521 else
522 assert(0);
523}
524
687b4580 525static void emit_storereg(u_int r, u_int hr)
be516ebe 526{
527 assert(r < 64);
7c3a5182 528 void *addr = &psxRegs.GPR.r[r];
be516ebe 529 switch (r) {
7c3a5182 530 //case HIREG: addr = &hi; break;
531 //case LOREG: addr = &lo; break;
be516ebe 532 case CCREG: addr = &cycle_count; break;
7c3a5182 533 default: assert(r < 34); break;
be516ebe 534 }
687b4580 535 emit_writeword(hr, addr);
be516ebe 536}
537
538static void emit_test(u_int rs, u_int rt)
539{
d1e4ebd9 540 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
541 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 542}
543
d1e4ebd9 544static void emit_testimm(u_int rs, u_int imm)
be516ebe 545{
d1e4ebd9 546 u_int immr, imms;
687b4580 547 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 548 assert(is_rotated_mask(imm)); // good enough for PCSX
549 gen_logical_imm(imm, &immr, &imms);
550 output_w32(0xb9000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 551}
552
553static void emit_testeqimm(u_int rs,int imm)
554{
555 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
d1e4ebd9 556 assert(0); // TODO eliminate emit_testeqimm
be516ebe 557}
558
559static void emit_not(u_int rs,u_int rt)
560{
561 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 562 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 563}
564
565static void emit_mvnmi(u_int rs,u_int rt)
566{
567 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 568 assert(0); // eliminate
be516ebe 569}
570
571static void emit_and(u_int rs1,u_int rs2,u_int rt)
572{
573 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 574 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 575}
576
577static void emit_or(u_int rs1,u_int rs2,u_int rt)
578{
579 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 580 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 581}
582
583static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
584{
be516ebe 585 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 586 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 587}
588
589static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
590{
be516ebe 591 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 592 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 593}
594
595static void emit_xor(u_int rs1,u_int rs2,u_int rt)
596{
597 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 598 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 599}
600
d1e4ebd9 601static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 602{
d1e4ebd9 603 unused const char *st = s ? "s" : "";
604 s = s ? 0x20000000 : 0;
605 is64 = is64 ? 0x80000000 : 0;
687b4580 606 if (imm < 4096) {
d1e4ebd9 607 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
608 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 609 }
610 else if (-imm < 4096) {
d1e4ebd9 611 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
612 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
613 }
614 else if (imm < 16777216) {
615 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
616 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
617 if ((imm & 0xfff) || s) {
618 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
619 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rt, rt));
620 }
621 }
622 else if (-imm < 16777216) {
623 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
624 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
625 if ((imm & 0xfff) || s) {
626 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
627 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
628 }
687b4580 629 }
630 else
631 assert(0);
be516ebe 632}
633
d1e4ebd9 634static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
635{
636 emit_addimm_s(0, 0, rs, imm, rt);
637}
638
639static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
640{
641 emit_addimm_s(0, 1, rs, imm, rt);
642}
643
be516ebe 644static void emit_addimm_and_set_flags(int imm, u_int rt)
645{
d1e4ebd9 646 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 647}
648
649static void emit_addimm_no_flags(u_int imm,u_int rt)
650{
651 emit_addimm(rt,imm,rt);
652}
653
d1e4ebd9 654static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 655{
d1e4ebd9 656 const char *names[] = { "and", "orr", "eor", "ands" };
657 const char *name = names[op];
658 u_int immr, imms;
659 op = op << 29;
660 if (is_rotated_mask(imm)) {
661 gen_logical_imm(imm, &immr, &imms);
662 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
663 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
664 }
665 else {
666 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
667 host_tempreg_acquire();
668 emit_movimm(imm, HOST_TEMPREG);
669 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
670 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
671 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
672 host_tempreg_release();
673 }
674 (void)name;
be516ebe 675}
676
d1e4ebd9 677static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 678{
d1e4ebd9 679 if (imm == 0)
680 emit_zeroreg(rt);
681 else
682 emit_logicop_imm(0, rs, imm, rt);
be516ebe 683}
684
d1e4ebd9 685static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 686{
d1e4ebd9 687 if (imm == 0) {
688 if (rs != rt)
689 emit_mov(rs, rt);
690 }
691 else
692 emit_logicop_imm(1, rs, imm, rt);
be516ebe 693}
694
d1e4ebd9 695static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 696{
d1e4ebd9 697 if (imm == 0) {
698 if (rs != rt)
699 emit_mov(rs, rt);
700 }
701 else
702 emit_logicop_imm(2, rs, imm, rt);
be516ebe 703}
704
d1e4ebd9 705static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 706{
d1e4ebd9 707 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
708 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 709}
710
d1e4ebd9 711static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 712{
d1e4ebd9 713 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
714 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 715}
716
717static void emit_shlimm(u_int rs,u_int imm,u_int rt)
718{
be516ebe 719 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 720 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 721}
722
723static unused void emit_lslpls_imm(u_int rs,int imm,u_int rt)
724{
d1e4ebd9 725 assert(0); // eliminate
be516ebe 726}
727
728static void emit_shrimm(u_int rs,u_int imm,u_int rt)
729{
be516ebe 730 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 731 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 732}
733
734static void emit_sarimm(u_int rs,u_int imm,u_int rt)
735{
be516ebe 736 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 737 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 738}
739
740static void emit_rorimm(u_int rs,u_int imm,u_int rt)
741{
d1e4ebd9 742 assem_debug("ror %s,%s,#%d",regname[rt],regname[rs],imm);
743 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 744}
745
746static void emit_signextend16(u_int rs, u_int rt)
747{
748 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 749 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 750}
751
d1e4ebd9 752static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 753{
d1e4ebd9 754 assem_debug("lsl %s,%s,%s",regname[rt],regname[rs],regname[rshift]);
755 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 756}
757
d1e4ebd9 758static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 759{
d1e4ebd9 760 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
761 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 762}
763
d1e4ebd9 764static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 765{
d1e4ebd9 766 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
767 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 768}
769
d1e4ebd9 770static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 771{
d1e4ebd9 772 if (imm < 4096) {
773 assem_debug("cmp %s,%#x\n", regname[rs], imm);
774 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
775 }
776 else if (-imm < 4096) {
777 assem_debug("cmn %s,%#x\n", regname[rs], imm);
778 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
779 }
780 else if (imm < 16777216 && !(imm & 0xfff)) {
781 assem_debug("cmp %s,#%#x,lsl #12\n", regname[rs], imm >> 12);
782 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
783 }
784 else {
785 host_tempreg_acquire();
786 emit_movimm(imm, HOST_TEMPREG);
787 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
788 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
789 host_tempreg_release();
790 }
be516ebe 791}
792
d1e4ebd9 793static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 794{
d1e4ebd9 795 assert(imm == 0 || imm == 1);
796 assert(cond0 < 0x10);
797 assert(cond1 < 0x10);
798 if (imm) {
799 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
800 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
801 } else {
802 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
803 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
804 }
be516ebe 805}
806
d1e4ebd9 807static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 808{
d1e4ebd9 809 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 810}
811
d1e4ebd9 812static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 813{
d1e4ebd9 814 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 815}
816
817static void emit_cmovb_imm(int imm,u_int rt)
818{
d1e4ebd9 819 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 820}
821
822static void emit_cmovs_imm(int imm,u_int rt)
823{
d1e4ebd9 824 emit_cmov_imm(COND_MI, COND_PL, imm, rt);
be516ebe 825}
826
827static void emit_cmovne_reg(u_int rs,u_int rt)
828{
d1e4ebd9 829 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
830 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 831}
832
833static void emit_cmovl_reg(u_int rs,u_int rt)
834{
d1e4ebd9 835 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
836 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 837}
838
839static void emit_cmovs_reg(u_int rs,u_int rt)
840{
d1e4ebd9 841 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
842 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 843}
844
845static void emit_slti32(u_int rs,int imm,u_int rt)
846{
847 if(rs!=rt) emit_zeroreg(rt);
848 emit_cmpimm(rs,imm);
849 if(rs==rt) emit_movimm(0,rt);
850 emit_cmovl_imm(1,rt);
851}
852
853static void emit_sltiu32(u_int rs,int imm,u_int rt)
854{
855 if(rs!=rt) emit_zeroreg(rt);
856 emit_cmpimm(rs,imm);
857 if(rs==rt) emit_movimm(0,rt);
858 emit_cmovb_imm(1,rt);
859}
860
861static void emit_cmp(u_int rs,u_int rt)
862{
863 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 864 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 865}
866
867static void emit_set_gz32(u_int rs, u_int rt)
868{
869 //assem_debug("set_gz32\n");
870 emit_cmpimm(rs,1);
871 emit_movimm(1,rt);
872 emit_cmovl_imm(0,rt);
873}
874
875static void emit_set_nz32(u_int rs, u_int rt)
876{
877 //assem_debug("set_nz32\n");
d1e4ebd9 878 if(rs!=rt) emit_mov(rs,rt);
879 emit_test(rs,rs);
880 emit_cmovne_imm(1,rt);
be516ebe 881}
882
883static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
884{
885 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
887 emit_cmp(rs1,rs2);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovl_imm(1,rt);
890}
891
892static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
893{
894 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
895 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
896 emit_cmp(rs1,rs2);
897 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
898 emit_cmovb_imm(1,rt);
899}
900
d1e4ebd9 901static void emit_call(const void *a)
be516ebe 902{
d1e4ebd9 903 intptr_t diff = (u_char *)a - out;
904 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 905 assert(!(diff & 3));
906 if (-134217728 <= diff && diff <= 134217727)
907 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
908 else
909 assert(0);
be516ebe 910}
911
d1e4ebd9 912static void emit_jmp(const void *a)
be516ebe 913{
d1e4ebd9 914 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
915 u_int offset = genjmp(a);
916 output_w32(0x14000000 | offset);
be516ebe 917}
918
d1e4ebd9 919static void emit_jne(const void *a)
be516ebe 920{
d1e4ebd9 921 assem_debug("bne %p\n", a);
922 u_int offset = genjmpcc(a);
923 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 924}
925
7c3a5182 926static void emit_jeq(const void *a)
be516ebe 927{
d1e4ebd9 928 assem_debug("beq %p\n", a);
929 u_int offset = genjmpcc(a);
930 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 931}
932
7c3a5182 933static void emit_js(const void *a)
be516ebe 934{
d1e4ebd9 935 assem_debug("bmi %p\n", a);
936 u_int offset = genjmpcc(a);
937 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 938}
939
7c3a5182 940static void emit_jns(const void *a)
be516ebe 941{
d1e4ebd9 942 assem_debug("bpl %p\n", a);
943 u_int offset = genjmpcc(a);
944 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 945}
946
7c3a5182 947static void emit_jl(const void *a)
be516ebe 948{
d1e4ebd9 949 assem_debug("blt %p\n", a);
950 u_int offset = genjmpcc(a);
951 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 952}
953
7c3a5182 954static void emit_jge(const void *a)
be516ebe 955{
d1e4ebd9 956 assem_debug("bge %p\n", a);
957 u_int offset = genjmpcc(a);
958 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 959}
960
7c3a5182 961static void emit_jno(const void *a)
be516ebe 962{
d1e4ebd9 963 assem_debug("bvc %p\n", a);
964 u_int offset = genjmpcc(a);
965 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 966}
967
7c3a5182 968static void emit_jc(const void *a)
be516ebe 969{
d1e4ebd9 970 assem_debug("bcs %p\n", a);
971 u_int offset = genjmpcc(a);
972 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 973}
974
7c3a5182 975static void emit_jcc(const void *a)
be516ebe 976{
7c3a5182 977 assem_debug("bcc %p\n", a);
d1e4ebd9 978 u_int offset = genjmpcc(a);
979 output_w32(0x54000000 | (offset << 5) | COND_CC);
be516ebe 980}
981
982static void emit_jmpreg(u_int r)
983{
d1e4ebd9 984 assem_debug("br %s", regname64[r]);
985 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 986}
987
988static void emit_retreg(u_int r)
989{
d1e4ebd9 990 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 991 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
992}
993
994static void emit_ret(void)
995{
996 emit_retreg(LR);
997}
998
d1e4ebd9 999static void emit_adr(void *addr, u_int rt)
1000{
1001 intptr_t offset = (u_char *)addr - out;
1002 assert(-1048576 <= offset && offset < 1048576);
1003 assem_debug("adr x%d,#%#lx\n", rt, offset);
1004 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1005}
1006
be516ebe 1007static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1008{
d1e4ebd9 1009 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1010 assert(-256 <= offset && offset < 256);
1011 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1012}
1013
1014static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1015{
1016 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1017 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1018}
1019
1020static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1021{
1022 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1023 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1024}
1025
1026static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1027{
1028 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1029 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1030}
1031
1032static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1033{
1034 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1035 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1036}
1037
1038static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1039{
1040 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1041 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1042}
1043
1044static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1045{
1046 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1047 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1048}
1049
1050static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1051{
1052 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1053 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1054}
1055
1056static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1057{
1058 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1059 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1060}
1061
1062static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1063{
1064 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1065 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1066}
1067
be516ebe 1068static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1069{
d1e4ebd9 1070 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1071 assert(-256 <= offset && offset < 256);
1072 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1073}
1074
1075static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1076{
d1e4ebd9 1077 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1078 assert(-256 <= offset && offset < 256);
1079 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1080}
1081
1082static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1083{
d1e4ebd9 1084 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1085 assert(-256 <= offset && offset < 256);
1086 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1087}
1088
1089static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1090{
d1e4ebd9 1091 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1092 assert(-256 <= offset && offset < 256);
1093 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1094}
1095
be516ebe 1096static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1097{
687b4580 1098 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1099 if (!(offset & 3) && offset <= 16380)
1100 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
1101 else
1102 assert(0);
be516ebe 1103}
1104
1105static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1106{
687b4580 1107 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1108 if (!(offset & 1) && offset <= 8190)
1109 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
1110 else
1111 assert(0);
be516ebe 1112}
1113
1114static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1115{
687b4580 1116 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1117 if ((u_int)offset < 4096)
1118 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
1119 else
1120 assert(0);
be516ebe 1121}
1122
1123static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1124{
1125 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1126 assert(rs1<16);
1127 assert(rs2<16);
1128 assert(hi<16);
1129 assert(lo<16);
1130 assert(0);
1131}
1132
1133static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1134{
1135 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1136 assert(rs1<16);
1137 assert(rs2<16);
1138 assert(hi<16);
1139 assert(lo<16);
1140 assert(0);
1141}
1142
1143static void emit_clz(u_int rs,u_int rt)
1144{
1145 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1146 assert(0);
1147}
1148
be516ebe 1149// special case for checking invalid_code
d1e4ebd9 1150static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1151{
d1e4ebd9 1152 host_tempreg_acquire();
1153 emit_shrimm(r, 12, HOST_TEMPREG);
1154 assem_debug("ldrb %s,[%s,%s]",regname[HOST_TEMPREG],regname64[rbase],regname64[HOST_TEMPREG]);
1155 output_w32(0x38606800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
1156 emit_cmpimm(HOST_TEMPREG, imm);
1157 host_tempreg_release();
be516ebe 1158}
1159
1160static void emit_orrne_imm(u_int rs,int imm,u_int rt)
1161{
687b4580 1162 assem_debug("orrne %s,%s,#%#x\n",regname[rt],regname[rs],imm);
be516ebe 1163 assert(0);
1164}
1165
1166static void emit_andne_imm(u_int rs,int imm,u_int rt)
1167{
687b4580 1168 assem_debug("andne %s,%s,#%#x\n",regname[rt],regname[rs],imm);
be516ebe 1169 assert(0);
1170}
1171
1172static unused void emit_addpl_imm(u_int rs,int imm,u_int rt)
1173{
687b4580 1174 assem_debug("addpl %s,%s,#%#x\n",regname[rt],regname[rs],imm);
be516ebe 1175 assert(0);
1176}
1177
d1e4ebd9 1178static void emit_loadlp_ofs(u_int ofs, u_int rt)
1179{
1180 output_w32(0x58000000 | imm19_rt(ofs, rt));
1181}
1182
687b4580 1183static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1184{
687b4580 1185 u_int op = 0xb9000000;
d1e4ebd9 1186 unused const char *ldst = is_st ? "st" : "ld";
1187 unused char rp = is64 ? 'x' : 'w';
687b4580 1188 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1189 is64 = is64 ? 1 : 0;
1190 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1191 ofs = (ofs >> (2+is64));
687b4580 1192 if (!is_st) op |= 0x00400000;
1193 if (is64) op |= 0x40000000;
d1e4ebd9 1194 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1195}
1196
687b4580 1197static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1198{
687b4580 1199 u_int op = 0x29000000;
d1e4ebd9 1200 unused const char *ldst = is_st ? "st" : "ld";
1201 unused char rp = is64 ? 'x' : 'w';
687b4580 1202 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1203 is64 = is64 ? 1 : 0;
1204 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1205 ofs = (ofs >> (2+is64));
1206 assert(-64 <= ofs && ofs <= 63);
1207 ofs &= 0x7f;
1208 if (!is_st) op |= 0x00400000;
1209 if (is64) op |= 0x80000000;
d1e4ebd9 1210 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1211}
1212
1213static void save_load_regs_all(int is_store, u_int reglist)
1214{
1215 int ofs = 0, c = 0;
1216 u_int r, pair[2];
1217 for (r = 0; reglist; r++, reglist >>= 1) {
1218 if (reglist & 1)
1219 pair[c++] = r;
1220 if (c == 2) {
1221 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1222 ofs += 8 * 2;
1223 c = 0;
1224 }
1225 }
1226 if (c) {
1227 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1228 ofs += 8;
1229 }
1230 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1231}
1232
1233// Save registers before function call
1234static void save_regs(u_int reglist)
1235{
1236 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1237 save_load_regs_all(1, reglist);
be516ebe 1238}
1239
1240// Restore registers after function call
1241static void restore_regs(u_int reglist)
1242{
1243 reglist &= CALLER_SAVE_REGS;
687b4580 1244 save_load_regs_all(0, reglist);
be516ebe 1245}
1246
1247/* Stubs/epilogue */
1248
1249static void literal_pool(int n)
1250{
1251 (void)literals;
1252}
1253
1254static void literal_pool_jumpover(int n)
1255{
1256}
1257
d1e4ebd9 1258// parsed by get_pointer, find_extjump_insn
1259static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1260{
d1e4ebd9 1261 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1262
d1e4ebd9 1263 emit_movz(target & 0xffff, 0);
1264 emit_movk_lsl16(target >> 16, 0);
1265
1266 // addr is in the current recompiled block (max 256k)
1267 // offset shouldn't exceed +/-1MB
1268 emit_adr(addr, 1);
1269 emit_jmp(linker);
be516ebe 1270}
1271
d1e4ebd9 1272static void check_extjump2(void *src)
be516ebe 1273{
d1e4ebd9 1274 u_int *ptr = src;
1275 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1276 (void)ptr;
be516ebe 1277}
1278
1279// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1280static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1281{
d1e4ebd9 1282 int diff = rt_val - rs_val;
1283 if ((-4096 <= diff && diff < 4096)
1284 || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1285 emit_addimm(rs, diff, rt);
d1e4ebd9 1286 else if (is_rotated_mask(rs_val ^ rt_val))
1287 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1288 else
d1e4ebd9 1289 emit_movimm(rt_val, rt);
be516ebe 1290}
1291
d1e4ebd9 1292// return 1 if the above function can do it's job cheaply
687b4580 1293static int is_similar_value(u_int v1, u_int v2)
be516ebe 1294{
687b4580 1295 int diff = v1 - v2;
d1e4ebd9 1296 return (-4096 <= diff && diff < 4096)
1297 || (-16777216 <= diff && diff < 16777216 && !(diff & 0xfff))
1298 || is_rotated_mask(v1 ^ v2);
1299}
1300
1301// trashes r2
1302static void pass_args64(u_int a0, u_int a1)
1303{
1304 if(a0==1&&a1==0) {
1305 // must swap
1306 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1307 }
1308 else if(a0!=0&&a1==0) {
1309 emit_mov64(a1,1);
1310 if (a0>=0) emit_mov64(a0,0);
1311 }
1312 else {
1313 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1314 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1315 }
be516ebe 1316}
1317
d1e4ebd9 1318static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1319{
1320 switch(type) {
1321 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1322 case LOADBU_STUB:
1323 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1324 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1325 case LOADHU_STUB:
1326 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1327 case LOADW_STUB:
1328 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
1329 default: assert(0);
1330 }
1331}
1332
1333#include "pcsxmem.h"
be516ebe 1334//#include "pcsxmem_inline.c"
1335
1336static void do_readstub(int n)
1337{
1338 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1339 set_jump_target(stubs[n].addr, out);
1340 enum stub_type type = stubs[n].type;
1341 int i = stubs[n].a;
1342 int rs = stubs[n].b;
1343 const struct regstat *i_regs = (void *)stubs[n].c;
1344 u_int reglist = stubs[n].e;
1345 const signed char *i_regmap = i_regs->regmap;
1346 int rt;
1347 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1348 rt=get_reg(i_regmap,FTEMP);
1349 }else{
1350 rt=get_reg(i_regmap,rt1[i]);
1351 }
1352 assert(rs>=0);
1353 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1354 void *restore_jump = NULL, *handler_jump = NULL;
1355 reglist|=(1<<rs);
1356 for (r = 0; r < HOST_CCREG; r++) {
1357 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1358 temp = r;
1359 break;
1360 }
1361 }
1362 if(rt>=0&&rt1[i]!=0)
1363 reglist&=~(1<<rt);
1364 if(temp==-1) {
1365 save_regs(reglist);
1366 regs_saved=1;
1367 temp=(rs==0)?2:0;
1368 }
1369 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1370 temp2=1;
1371 emit_readdword(&mem_rtab,temp);
1372 emit_shrimm(rs,12,temp2);
1373 emit_readdword_dualindexedx8(temp,temp2,temp2);
1374 emit_adds64(temp2,temp2,temp2);
1375 handler_jump=out;
1376 emit_jc(0);
1377 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1378 switch(type) {
1379 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1380 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1381 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1382 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1383 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
1384 default: assert(0);
1385 }
1386 }
1387 if(regs_saved) {
1388 restore_jump=out;
1389 emit_jmp(0); // jump to reg restore
1390 }
1391 else
1392 emit_jmp(stubs[n].retaddr); // return address
1393 set_jump_target(handler_jump, out);
1394
1395 if(!regs_saved)
1396 save_regs(reglist);
1397 void *handler=NULL;
1398 if(type==LOADB_STUB||type==LOADBU_STUB)
1399 handler=jump_handler_read8;
1400 if(type==LOADH_STUB||type==LOADHU_STUB)
1401 handler=jump_handler_read16;
1402 if(type==LOADW_STUB)
1403 handler=jump_handler_read32;
1404 assert(handler);
1405 pass_args64(rs,temp2);
1406 int cc=get_reg(i_regmap,CCREG);
1407 if(cc<0)
1408 emit_loadreg(CCREG,2);
1409 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1410 emit_call(handler);
1411 // (no cycle reload after read)
1412 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1413 loadstore_extend(type,0,rt);
1414 }
1415 if(restore_jump)
1416 set_jump_target(restore_jump, out);
1417 restore_regs(reglist);
1418 emit_jmp(stubs[n].retaddr);
be516ebe 1419}
1420
1421static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1422{
d1e4ebd9 1423 int rs=get_reg(regmap,target);
1424 int rt=get_reg(regmap,target);
1425 if(rs<0) rs=get_reg(regmap,-1);
1426 assert(rs>=0);
1427 u_int is_dynamic=0;
1428 uintptr_t host_addr = 0;
1429 void *handler;
1430 int cc=get_reg(regmap,CCREG);
1431 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1432 // return;
1433 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1434 if (handler == NULL) {
1435 if(rt<0||rt1[i]==0)
1436 return;
1437 if (addr != host_addr) {
1438 if (host_addr >= 0x100000000ull)
1439 abort(); // ROREG not implemented
1440 emit_movimm_from(addr, rs, host_addr, rs);
1441 }
1442 switch(type) {
1443 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1444 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1445 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1446 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1447 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1448 default: assert(0);
1449 }
1450 return;
1451 }
1452 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1453 if(is_dynamic) {
1454 if(type==LOADB_STUB||type==LOADBU_STUB)
1455 handler=jump_handler_read8;
1456 if(type==LOADH_STUB||type==LOADHU_STUB)
1457 handler=jump_handler_read16;
1458 if(type==LOADW_STUB)
1459 handler=jump_handler_read32;
1460 }
1461
1462 // call a memhandler
1463 if(rt>=0&&rt1[i]!=0)
1464 reglist&=~(1<<rt);
1465 save_regs(reglist);
1466 if(target==0)
1467 emit_movimm(addr,0);
1468 else if(rs!=0)
1469 emit_mov(rs,0);
1470 if(cc<0)
1471 emit_loadreg(CCREG,2);
1472 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1473 if(is_dynamic)
1474 emit_readdword(&mem_rtab,1);
1475 else
1476 emit_call(do_memhandler_pre);
1477
1478 emit_call(handler);
1479
1480 // (no cycle reload after read)
1481 if(rt>=0&&rt1[i]!=0)
1482 loadstore_extend(type, 0, rt);
1483 restore_regs(reglist);
be516ebe 1484}
1485
1486static void do_writestub(int n)
1487{
1488 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1489 set_jump_target(stubs[n].addr, out);
1490 enum stub_type type=stubs[n].type;
1491 int i=stubs[n].a;
1492 int rs=stubs[n].b;
1493 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1494 u_int reglist=stubs[n].e;
1495 signed char *i_regmap=i_regs->regmap;
1496 int rt,r;
1497 if(itype[i]==C1LS||itype[i]==C2LS) {
1498 rt=get_reg(i_regmap,r=FTEMP);
1499 }else{
1500 rt=get_reg(i_regmap,r=rs2[i]);
1501 }
1502 assert(rs>=0);
1503 assert(rt>=0);
1504 int rtmp,temp=-1,temp2,regs_saved=0;
1505 void *restore_jump = NULL, *handler_jump = NULL;
1506 int reglist2=reglist|(1<<rs)|(1<<rt);
1507 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1508 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1509 temp = rtmp;
1510 break;
1511 }
1512 }
1513 if(temp==-1) {
1514 save_regs(reglist);
1515 regs_saved=1;
1516 for(rtmp=0;rtmp<=3;rtmp++)
1517 if(rtmp!=rs&&rtmp!=rt)
1518 {temp=rtmp;break;}
1519 }
1520 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1521 temp2=3;
1522 else {
1523 host_tempreg_acquire();
1524 temp2=HOST_TEMPREG;
1525 }
1526 emit_readdword(&mem_wtab,temp);
1527 emit_shrimm(rs,12,temp2);
1528 emit_readdword_dualindexedx8(temp,temp2,temp2);
1529 emit_adds64(temp2,temp2,temp2);
1530 handler_jump=out;
1531 emit_jc(0);
1532 switch(type) {
1533 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1534 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1535 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1536 default: assert(0);
1537 }
1538 if(regs_saved) {
1539 restore_jump=out;
1540 emit_jmp(0); // jump to reg restore
1541 }
1542 else
1543 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1544 set_jump_target(handler_jump, out);
1545
1546 // TODO FIXME: regalloc should prefer callee-saved regs
1547 if(!regs_saved)
1548 save_regs(reglist);
1549 void *handler=NULL;
1550 switch(type) {
1551 case STOREB_STUB: handler=jump_handler_write8; break;
1552 case STOREH_STUB: handler=jump_handler_write16; break;
1553 case STOREW_STUB: handler=jump_handler_write32; break;
1554 default: assert(0);
1555 }
1556 assert(handler);
1557 pass_args(rs,rt);
1558 if(temp2!=3) {
1559 emit_mov64(temp2,3);
1560 host_tempreg_release();
1561 }
1562 int cc=get_reg(i_regmap,CCREG);
1563 if(cc<0)
1564 emit_loadreg(CCREG,2);
1565 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1566 // returns new cycle_count
1567 emit_call(handler);
1568 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1569 if(cc<0)
1570 emit_storereg(CCREG,2);
1571 if(restore_jump)
1572 set_jump_target(restore_jump, out);
1573 restore_regs(reglist);
1574 emit_jmp(stubs[n].retaddr);
be516ebe 1575}
1576
1577static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1578{
687b4580 1579 int rs = get_reg(regmap,-1);
1580 int rt = get_reg(regmap,target);
1581 assert(rs >= 0);
1582 assert(rt >= 0);
1583 uintptr_t host_addr = 0;
1584 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1585 if (handler == NULL) {
d1e4ebd9 1586 if (addr != host_addr) {
1587 if (host_addr >= 0x100000000ull)
1588 abort(); // ROREG not implemented
687b4580 1589 emit_movimm_from(addr, rs, host_addr, rs);
d1e4ebd9 1590 }
1591 switch (type) {
687b4580 1592 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1593 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1594 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1595 default: assert(0);
1596 }
1597 return;
1598 }
1599
1600 // call a memhandler
1601 save_regs(reglist);
687b4580 1602 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1603 loadstore_extend(type, rt, 0);
1604 int cc, cc_use;
1605 cc = cc_use = get_reg(regmap, CCREG);
1606 if (cc < 0)
1607 emit_loadreg(CCREG, (cc_use = 2));
1608 emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2);
1609
1610 emit_call(do_memhandler_pre);
687b4580 1611 emit_call(handler);
d1e4ebd9 1612 emit_call(do_memhandler_post);
1613 emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use);
1614 if (cc < 0)
1615 emit_storereg(CCREG, cc_use);
687b4580 1616 restore_regs(reglist);
be516ebe 1617}
1618
1619static void do_unalignedwritestub(int n)
1620{
1621 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
1622 assert(0);
1623}
1624
d1e4ebd9 1625static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1626{
d1e4ebd9 1627 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1628 assert((*loadl & ~0x1f) == 0x58000000);
1629 assert((ofs & 3) == 0);
1630 assert(ofs < 0x100000);
1631 *loadl |= (ofs >> 2) << 5;
1632}
1633
1634// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1635static void do_dirty_stub_emit_args(u_int arg0)
1636{
1637 assert(slen <= MAXBLOCK);
1638 emit_loadlp_ofs(0, 1); // ldr x1, source
1639 emit_loadlp_ofs(0, 2); // ldr x2, copy
1640 emit_movz(slen*4, 3);
1641 emit_movz(arg0 & 0xffff, 0);
1642 emit_movk_lsl16(arg0 >> 16, 0);
1643}
1644
1645static void do_dirty_stub_emit_literals(u_int *loadlps)
1646{
1647 set_loadlp(&loadlps[0], out);
1648 output_w64((uintptr_t)source);
1649 set_loadlp(&loadlps[1], out);
1650 output_w64((uintptr_t)copy);
be516ebe 1651}
1652
d1e4ebd9 1653static void *do_dirty_stub(int i)
be516ebe 1654{
1655 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1656 u_int *loadlps = (void *)out;
1657 do_dirty_stub_emit_args(start + i*4);
1658 emit_call(verify_code);
1659 void *entry = out;
be516ebe 1660 load_regs_entry(i);
d1e4ebd9 1661 if (entry == out)
1662 entry = instr_addr[i];
1663 emit_jmp(instr_addr[i]);
1664 do_dirty_stub_emit_literals(loadlps);
1665 return entry;
be516ebe 1666}
1667
1668static void do_dirty_stub_ds()
1669{
d1e4ebd9 1670 do_dirty_stub_emit_args(start + 1);
1671 u_int *loadlps = (void *)out;
1672 emit_call(verify_code_ds);
1673 emit_jmp(out + 8*2);
1674 do_dirty_stub_emit_literals(loadlps);
be516ebe 1675}
1676
1677/* Special assem */
1678
1679#define shift_assemble shift_assemble_arm64
1680
1681static void shift_assemble_arm64(int i,struct regstat *i_regs)
1682{
1683 assert(0);
1684}
1685#define loadlr_assemble loadlr_assemble_arm64
1686
1687static void loadlr_assemble_arm64(int i,struct regstat *i_regs)
1688{
1689 assert(0);
1690}
1691
1692static void c2op_assemble(int i,struct regstat *i_regs)
1693{
1694 assert(0);
1695}
1696
1697static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1698{
1699 assert(0);
1700}
1701#define multdiv_assemble multdiv_assemble_arm64
1702
d1e4ebd9 1703static void do_jump_vaddr(u_int rs)
1704{
1705 if (rs != 0)
1706 emit_mov(rs, 0);
1707 emit_call(get_addr_ht);
1708 emit_jmpreg(0);
1709}
1710
be516ebe 1711static void do_preload_rhash(u_int r) {
1712 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1713 // register. On ARM the hash can be done with a single instruction (below)
1714}
1715
1716static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1717 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1718}
1719
1720static void do_rhash(u_int rs,u_int rh) {
1721 emit_andimm(rs, 0xf8, rh);
1722}
1723
d1e4ebd9 1724static void do_miniht_load(int ht, u_int rh) {
1725 emit_add64(ht, rh, ht);
1726 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1727}
1728
d1e4ebd9 1729static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1730 emit_cmp(rh, rs);
1731 void *jaddr = out;
1732 emit_jeq(0);
1733 do_jump_vaddr(rs);
1734
1735 set_jump_target(jaddr, out);
1736 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
1737 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
1738 emit_jmpreg(ht);
be516ebe 1739}
1740
d1e4ebd9 1741// parsed by set_jump_target?
be516ebe 1742static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 1743 emit_movz_lsl16((return_address>>16)&0xffff,rt);
1744 emit_movk(return_address&0xffff,rt);
1745 add_to_linker(out,return_address,1);
1746 emit_adr(out,temp);
1747 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
1748 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 1749}
1750
1751static void mark_clear_cache(void *target)
1752{
1753 u_long offset = (u_char *)target - translation_cache;
1754 u_int mask = 1u << ((offset >> 12) & 31);
1755 if (!(needs_clear_cache[offset >> 17] & mask)) {
1756 char *start = (char *)((u_long)target & ~4095ul);
1757 start_tcache_write(start, start + 4096);
1758 needs_clear_cache[offset >> 17] |= mask;
1759 }
1760}
1761
1762// Clearing the cache is rather slow on ARM Linux, so mark the areas
1763// that need to be cleared, and then only clear these areas once.
1764static void do_clear_cache()
1765{
1766 int i,j;
1767 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
1768 {
1769 u_int bitmap=needs_clear_cache[i];
1770 if(bitmap) {
1771 u_char *start, *end;
1772 for(j=0;j<32;j++)
1773 {
1774 if(bitmap&(1<<j)) {
1775 start=translation_cache+i*131072+j*4096;
1776 end=start+4095;
1777 j++;
1778 while(j<32) {
1779 if(bitmap&(1<<j)) {
1780 end+=4096;
1781 j++;
1782 }else{
1783 end_tcache_write(start, end);
1784 break;
1785 }
1786 }
1787 }
1788 }
1789 needs_clear_cache[i]=0;
1790 }
1791 }
1792}
1793
1794// CPU-architecture-specific initialization
1795static void arch_init() {
1796}
1797
1798// vim:shiftwidth=2:expandtab