drc: something works on arm64
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm64.c
CommitLineData
be516ebe 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm64.c *
3 * Copyright (C) 2009-2011 Ari64 *
d1e4ebd9 4 * Copyright (C) 2009-2018 Gillou68310 *
5 * Copyright (C) 2021 notaz *
be516ebe 6 * *
7 * This program is free software; you can redistribute it and/or modify *
8 * it under the terms of the GNU General Public License as published by *
9 * the Free Software Foundation; either version 2 of the License, or *
10 * (at your option) any later version. *
11 * *
12 * This program is distributed in the hope that it will be useful, *
13 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
14 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
15 * GNU General Public License for more details. *
16 * *
17 * You should have received a copy of the GNU General Public License *
18 * along with this program; if not, write to the *
19 * Free Software Foundation, Inc., *
20 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
21 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
22
3968e69e 23#include "pcnt.h"
be516ebe 24#include "arm_features.h"
25
26#if defined(BASE_ADDR_FIXED)
27#elif defined(BASE_ADDR_DYNAMIC)
28u_char *translation_cache;
29#else
30u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
31#endif
d1e4ebd9 32static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
be516ebe 33
34#define CALLER_SAVE_REGS 0x0007ffff
35
36#define unused __attribute__((unused))
37
d1e4ebd9 38void do_memhandler_pre();
39void do_memhandler_post();
be516ebe 40
41/* Linker */
d1e4ebd9 42static void set_jump_target(void *addr, void *target)
be516ebe 43{
d1e4ebd9 44 u_int *ptr = addr;
45 intptr_t offset = (u_char *)target - (u_char *)addr;
46
3968e69e 47 if ((*ptr&0xFC000000) == 0x14000000) { // b
d1e4ebd9 48 assert(offset>=-134217728LL&&offset<134217728LL);
49 *ptr=(*ptr&0xFC000000)|((offset>>2)&0x3ffffff);
50 }
3968e69e 51 else if ((*ptr&0xff000000) == 0x54000000 // b.cond
52 || (*ptr&0x7e000000) == 0x34000000) { // cbz/cbnz
d1e4ebd9 53 // Conditional branch are limited to +/- 1MB
54 // block max size is 256k so branching beyond the +/- 1MB limit
55 // should only happen when jumping to an already compiled block (see add_link)
56 // a workaround would be to do a trampoline jump via a stub at the end of the block
3968e69e 57 assert(-1048576 <= offset && offset < 1048576);
d1e4ebd9 58 *ptr=(*ptr&0xFF00000F)|(((offset>>2)&0x7ffff)<<5);
59 }
3968e69e 60 else if((*ptr&0x9f000000)==0x10000000) { // adr
d1e4ebd9 61 // generated by do_miniht_insert
62 assert(offset>=-1048576LL&&offset<1048576LL);
63 *ptr=(*ptr&0x9F00001F)|(offset&0x3)<<29|((offset>>2)&0x7ffff)<<5;
64 }
65 else
3968e69e 66 abort(); // should not happen
be516ebe 67}
68
69// from a pointer to external jump stub (which was produced by emit_extjump2)
70// find where the jumping insn is
71static void *find_extjump_insn(void *stub)
72{
d1e4ebd9 73 int *ptr = (int *)stub + 2;
74 assert((*ptr&0x9f000000) == 0x10000000); // adr
75 int offset = (((signed int)(*ptr<<8)>>13)<<2)|((*ptr>>29)&0x3);
76 return ptr + offset / 4;
be516ebe 77}
78
79// find where external branch is liked to using addr of it's stub:
3968e69e 80// get address that the stub loads (dyna_linker arg1),
be516ebe 81// treat it as a pointer to branch insn,
82// return addr where that branch jumps to
83static void *get_pointer(void *stub)
84{
d1e4ebd9 85 int *i_ptr = find_extjump_insn(stub);
3968e69e 86 if ((*i_ptr&0xfc000000) == 0x14000000) // b
87 return i_ptr + ((signed int)(*i_ptr<<6)>>6);
88 if ((*i_ptr&0xff000000) == 0x54000000 // b.cond
89 || (*i_ptr&0x7e000000) == 0x34000000) // cbz/cbnz
90 return i_ptr + ((signed int)(*i_ptr<<8)>>13);
be516ebe 91 assert(0);
92 return NULL;
93}
94
be516ebe 95// Allocate a specific ARM register.
96static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
97{
98 int n;
99 int dirty=0;
100
101 // see if it's already allocated (and dealloc it)
102 for(n=0;n<HOST_REGS;n++)
103 {
104 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
105 dirty=(cur->dirty>>n)&1;
106 cur->regmap[n]=-1;
107 }
108 }
109
110 cur->regmap[hr]=reg;
111 cur->dirty&=~(1<<hr);
112 cur->dirty|=dirty<<hr;
113 cur->isconst&=~(1<<hr);
114}
115
116// Alloc cycle count into dedicated register
117static void alloc_cc(struct regstat *cur,int i)
118{
119 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
120}
121
122/* Special alloc */
123
124
125/* Assembler */
126
127static unused const char *regname[32] = {
d1e4ebd9 128 "w0", "w1", "w2", "w3", "w4", "w5", "w6", "w7",
129 "w8", "w9", "w10", "w11", "w12", "w13", "w14", "w15",
130 "ip0", "ip1", "w18", "w19", "w20", "w21", "w22", "w23",
131 "w24", "w25", "w26", "w27", "w28", "wfp", "wlr", "wsp"
132};
133
134static unused const char *regname64[32] = {
135 "x0", "x1", "x2", "x3", "x4", "x5", "x6", "x7",
136 "x8", "x9", "x10", "x11", "x12", "x13", "x14", "x15",
137 "ip0", "ip1", "x18", "x19", "x20", "x21", "x22", "x23",
138 "x24", "x25", "x26", "x27", "x28", "fp", "lr", "sp"
139};
140
141enum {
142 COND_EQ, COND_NE, COND_CS, COND_CC, COND_MI, COND_PL, COND_VS, COND_VC,
143 COND_HI, COND_LS, COND_GE, COND_LT, COND_GT, COND_LE, COND_AW, COND_NV
144};
145
146static unused const char *condname[16] = {
147 "eq", "ne", "cs", "cc", "mi", "pl", "vs", "vc",
148 "hi", "ls", "ge", "lt", "gt", "le", "aw", "nv"
be516ebe 149};
150
be516ebe 151static void output_w32(u_int word)
152{
153 *((u_int *)out) = word;
154 out += 4;
155}
156
d1e4ebd9 157static void output_w64(uint64_t dword)
158{
159 *((uint64_t *)out) = dword;
160 out+=8;
161}
162
163/*
687b4580 164static u_int rm_rd(u_int rm, u_int rd)
165{
166 assert(rm < 31);
167 assert(rd < 31);
168 return (rm << 16) | rd;
169}
d1e4ebd9 170*/
687b4580 171
3968e69e 172static u_int rn_rd(u_int rn, u_int rd)
173{
174 assert(rn < 31);
175 assert(rd < 31);
176 return (rn << 5) | rd;
177}
178
be516ebe 179static u_int rm_rn_rd(u_int rm, u_int rn, u_int rd)
180{
d1e4ebd9 181 assert(rm < 32);
182 assert(rn < 32);
183 assert(rd < 32);
be516ebe 184 return (rm << 16) | (rn << 5) | rd;
185}
186
3968e69e 187static u_int rm_ra_rn_rd(u_int rm, u_int ra, u_int rn, u_int rd)
188{
189 assert(ra < 32);
190 return rm_rn_rd(rm, rn, rd) | (ra << 10);
191}
192
d1e4ebd9 193static u_int imm7_rt2_rn_rt(u_int imm7, u_int rt2, u_int rn, u_int rt)
194{
195 assert(imm7 < 0x80);
196 assert(rt2 < 31);
197 assert(rn < 32);
198 assert(rt < 31);
199 return (imm7 << 15) | (rt2 << 10) | (rn << 5) | rt;
200}
201
687b4580 202static u_int rm_imm6_rn_rd(u_int rm, u_int imm6, u_int rn, u_int rd)
203{
204 assert(imm6 <= 63);
205 return rm_rn_rd(rm, rn, rd) | (imm6 << 10);
206}
207
be516ebe 208static u_int imm16_rd(u_int imm16, u_int rd)
209{
210 assert(imm16 < 0x10000);
211 assert(rd < 31);
212 return (imm16 << 5) | rd;
213}
214
687b4580 215static u_int imm12_rn_rd(u_int imm12, u_int rn, u_int rd)
216{
217 assert(imm12 < 0x1000);
d1e4ebd9 218 assert(rn < 32);
219 assert(rd < 32);
220 return (imm12 << 10) | (rn << 5) | rd;
221}
222
223static u_int imm9_rn_rt(u_int imm9, u_int rn, u_int rd)
224{
225 assert(imm9 < 0x200);
687b4580 226 assert(rn < 31);
227 assert(rd < 31);
d1e4ebd9 228 return (imm9 << 12) | (rn << 5) | rd;
687b4580 229}
230
d1e4ebd9 231static u_int imm19_rt(u_int imm19, u_int rt)
232{
233 assert(imm19 < 0x80000);
234 assert(rt < 31);
235 return (imm19 << 5) | rt;
236}
237
238static u_int n_immr_imms_rn_rd(u_int n, u_int immr, u_int imms, u_int rn, u_int rd)
239{
240 assert(n < 2);
241 assert(immr < 0x40);
242 assert(imms < 0x40);
243 assert(rn < 32);
244 assert(rd < 32);
245 return (n << 22) | (immr << 16) | (imms << 10) | (rn << 5) | rd;
246}
247
248static u_int genjmp(const u_char *addr)
be516ebe 249{
250 intptr_t offset = addr - out;
d1e4ebd9 251 if ((uintptr_t)addr < 3) return 0; // a branch that will be patched later
be516ebe 252 if (offset < -134217728 || offset > 134217727) {
d1e4ebd9 253 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
254 abort();
be516ebe 255 return 0;
256 }
d1e4ebd9 257 return ((u_int)offset >> 2) & 0x03ffffff;
be516ebe 258}
259
d1e4ebd9 260static u_int genjmpcc(const u_char *addr)
be516ebe 261{
262 intptr_t offset = addr - out;
d1e4ebd9 263 if ((uintptr_t)addr < 3) return 0;
be516ebe 264 if (offset < -1048576 || offset > 1048572) {
d1e4ebd9 265 SysPrintf("%s: out of range: %p %lx\n", __func__, addr, offset);
266 abort();
267 return 0;
268 }
269 return ((u_int)offset >> 2) & 0x7ffff;
270}
271
272static uint32_t is_mask(u_int value)
273{
274 return value && ((value + 1) & value) == 0;
275}
276
277// This function returns true if the argument contains a
278// non-empty sequence of ones (possibly rotated) with the remainder zero.
279static uint32_t is_rotated_mask(u_int value)
280{
3968e69e 281 if (value == 0 || value == ~0)
be516ebe 282 return 0;
d1e4ebd9 283 if (is_mask((value - 1) | value))
284 return 1;
285 return is_mask((~value - 1) | ~value);
286}
287
288static void gen_logical_imm(u_int value, u_int *immr, u_int *imms)
289{
290 int lzeros, tzeros, ones;
291 assert(value != 0);
292 if (is_mask((value - 1) | value)) {
293 lzeros = __builtin_clz(value);
294 tzeros = __builtin_ctz(value);
295 ones = 32 - lzeros - tzeros;
296 *immr = (32 - tzeros) & 31;
297 *imms = ones - 1;
298 return;
be516ebe 299 }
d1e4ebd9 300 value = ~value;
301 if (is_mask((value - 1) | value)) {
302 lzeros = __builtin_clz(value);
303 tzeros = __builtin_ctz(value);
304 ones = 32 - lzeros - tzeros;
3968e69e 305 *immr = lzeros;
d1e4ebd9 306 *imms = 31 - ones;
307 return;
308 }
3968e69e 309 abort();
be516ebe 310}
311
312static void emit_mov(u_int rs, u_int rt)
313{
687b4580 314 assem_debug("mov %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 315 output_w32(0x2a000000 | rm_rn_rd(rs, WZR, rt));
316}
317
318static void emit_mov64(u_int rs, u_int rt)
319{
320 assem_debug("mov %s,%s\n", regname64[rt], regname64[rs]);
321 output_w32(0xaa000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 322}
323
687b4580 324static void emit_add(u_int rs1, u_int rs2, u_int rt)
be516ebe 325{
d1e4ebd9 326 assem_debug("add %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
327 output_w32(0x0b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 328}
329
d1e4ebd9 330static void emit_add64(u_int rs1, u_int rs2, u_int rt)
be516ebe 331{
d1e4ebd9 332 assem_debug("add %s,%s,%s\n", regname64[rt], regname64[rs1], regname64[rs2]);
333 output_w32(0x8b000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 334}
335
d1e4ebd9 336static void emit_adds64(u_int rs1, u_int rs2, u_int rt)
be516ebe 337{
3968e69e 338 assem_debug("adds %s,%s,%s\n",regname64[rt],regname64[rs1],regname64[rs2]);
d1e4ebd9 339 output_w32(0xab000000 | rm_rn_rd(rs2, rs1, rt));
340}
341
342static void emit_neg(u_int rs, u_int rt)
343{
344 assem_debug("neg %s,%s\n",regname[rt],regname[rs]);
345 output_w32(0x4b000000 | rm_rn_rd(rs, WZR, rt));
be516ebe 346}
347
687b4580 348static void emit_sub(u_int rs1, u_int rs2, u_int rt)
be516ebe 349{
d1e4ebd9 350 assem_debug("sub %s,%s,%s\n", regname[rt], regname[rs1], regname[rs2]);
687b4580 351 output_w32(0x4b000000 | rm_imm6_rn_rd(rs2, 0, rs1, rt));
be516ebe 352}
353
3968e69e 354static void emit_sub_asrimm(u_int rs1, u_int rs2, u_int shift, u_int rt)
355{
356 assem_debug("sub %s,%s,%s,asr #%u\n",regname[rt],regname[rs1],regname[rs2],shift);
357 output_w32(0x4b800000 | rm_imm6_rn_rd(rs2, shift, rs1, rt));
358}
359
d1e4ebd9 360static void emit_movz(u_int imm, u_int rt)
be516ebe 361{
d1e4ebd9 362 assem_debug("movz %s,#%#x\n", regname[rt], imm);
363 output_w32(0x52800000 | imm16_rd(imm, rt));
364}
365
366static void emit_movz_lsl16(u_int imm, u_int rt)
367{
368 assem_debug("movz %s,#%#x,lsl #16\n", regname[rt], imm);
369 output_w32(0x52a00000 | imm16_rd(imm, rt));
370}
371
372static void emit_movn(u_int imm, u_int rt)
373{
374 assem_debug("movn %s,#%#x\n", regname[rt], imm);
375 output_w32(0x12800000 | imm16_rd(imm, rt));
376}
377
378static void emit_movn_lsl16(u_int imm,u_int rt)
379{
380 assem_debug("movn %s,#%#x,lsl #16\n", regname[rt], imm);
381 output_w32(0x12a00000 | imm16_rd(imm, rt));
382}
383
384static void emit_movk(u_int imm,u_int rt)
385{
386 assem_debug("movk %s,#%#x\n", regname[rt], imm);
387 output_w32(0x72800000 | imm16_rd(imm, rt));
388}
389
390static void emit_movk_lsl16(u_int imm,u_int rt)
391{
392 assert(imm<65536);
3968e69e 393 assem_debug("movk %s,#%#x,lsl #16\n", regname[rt], imm);
d1e4ebd9 394 output_w32(0x72a00000 | imm16_rd(imm, rt));
be516ebe 395}
396
397static void emit_zeroreg(u_int rt)
398{
d1e4ebd9 399 emit_movz(0, rt);
be516ebe 400}
401
be516ebe 402static void emit_movimm(u_int imm, u_int rt)
403{
d1e4ebd9 404 if (imm < 65536)
405 emit_movz(imm, rt);
406 else if ((~imm) < 65536)
407 emit_movn(~imm, rt);
408 else if ((imm&0xffff) == 0)
409 emit_movz_lsl16(imm >> 16, rt);
410 else if (((~imm)&0xffff) == 0)
411 emit_movn_lsl16(~imm >> 16, rt);
412 else if (is_rotated_mask(imm)) {
413 u_int immr, imms;
414 gen_logical_imm(imm, &immr, &imms);
415 assem_debug("orr %s,wzr,#%#x\n", regname[rt], imm);
416 output_w32(0x32000000 | n_immr_imms_rn_rd(0, immr, imms, WZR, rt));
417 }
be516ebe 418 else {
d1e4ebd9 419 emit_movz(imm & 0xffff, rt);
420 emit_movk_lsl16(imm >> 16, rt);
be516ebe 421 }
422}
423
687b4580 424static void emit_readword(void *addr, u_int rt)
425{
426 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
427 if (!(offset & 3) && offset <= 16380) {
428 assem_debug("ldr %s,[x%d+%#lx]\n", regname[rt], FP, offset);
429 output_w32(0xb9400000 | imm12_rn_rd(offset >> 2, FP, rt));
430 }
431 else
3968e69e 432 abort();
687b4580 433}
434
d1e4ebd9 435static void emit_readdword(void *addr, u_int rt)
436{
437 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
438 if (!(offset & 7) && offset <= 32760) {
439 assem_debug("ldr %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
440 output_w32(0xf9400000 | imm12_rn_rd(offset >> 3, FP, rt));
441 }
3968e69e 442 else
443 abort();
444}
445
446static void emit_readshword(void *addr, u_int rt)
447{
448 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
449 if (!(offset & 1) && offset <= 8190) {
450 assem_debug("ldrsh %s,[x%d+%#lx]\n", regname[rt], FP, offset);
451 output_w32(0x79c00000 | imm12_rn_rd(offset >> 1, FP, rt));
452 }
d1e4ebd9 453 else
454 assert(0);
455}
456
be516ebe 457static void emit_loadreg(u_int r, u_int hr)
458{
d1e4ebd9 459 int is64 = 0;
be516ebe 460 assert(r < 64);
461 if (r == 0)
462 emit_zeroreg(hr);
463 else {
7c3a5182 464 void *addr = &psxRegs.GPR.r[r];
be516ebe 465 switch (r) {
7c3a5182 466 //case HIREG: addr = &hi; break;
467 //case LOREG: addr = &lo; break;
be516ebe 468 case CCREG: addr = &cycle_count; break;
469 case CSREG: addr = &Status; break;
d1e4ebd9 470 case INVCP: addr = &invc_ptr; is64 = 1; break;
7c3a5182 471 default: assert(r < 34); break;
be516ebe 472 }
d1e4ebd9 473 if (is64)
474 emit_readdword(addr, hr);
475 else
476 emit_readword(addr, hr);
be516ebe 477 }
478}
479
687b4580 480static void emit_writeword(u_int rt, void *addr)
481{
482 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
483 if (!(offset & 3) && offset <= 16380) {
484 assem_debug("str %s,[x%d+%#lx]\n", regname[rt], FP, offset);
485 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, FP, rt));
486 }
487 else
488 assert(0);
489}
490
d1e4ebd9 491static void emit_writedword(u_int rt, void *addr)
492{
493 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
494 if (!(offset & 7) && offset <= 32760) {
495 assem_debug("str %s,[x%d+%#lx]\n", regname64[rt], FP, offset);
3968e69e 496 output_w32(0xf9000000 | imm12_rn_rd(offset >> 3, FP, rt));
d1e4ebd9 497 }
498 else
3968e69e 499 abort();
d1e4ebd9 500}
501
687b4580 502static void emit_storereg(u_int r, u_int hr)
be516ebe 503{
504 assert(r < 64);
7c3a5182 505 void *addr = &psxRegs.GPR.r[r];
be516ebe 506 switch (r) {
7c3a5182 507 //case HIREG: addr = &hi; break;
508 //case LOREG: addr = &lo; break;
be516ebe 509 case CCREG: addr = &cycle_count; break;
7c3a5182 510 default: assert(r < 34); break;
be516ebe 511 }
687b4580 512 emit_writeword(hr, addr);
be516ebe 513}
514
515static void emit_test(u_int rs, u_int rt)
516{
d1e4ebd9 517 assem_debug("tst %s,%s\n", regname[rs], regname[rt]);
518 output_w32(0x6a000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 519}
520
d1e4ebd9 521static void emit_testimm(u_int rs, u_int imm)
be516ebe 522{
d1e4ebd9 523 u_int immr, imms;
687b4580 524 assem_debug("tst %s,#%#x\n", regname[rs], imm);
d1e4ebd9 525 assert(is_rotated_mask(imm)); // good enough for PCSX
526 gen_logical_imm(imm, &immr, &imms);
3968e69e 527 output_w32(0x72000000 | n_immr_imms_rn_rd(0, immr, imms, rs, WZR));
be516ebe 528}
529
530static void emit_not(u_int rs,u_int rt)
531{
532 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
d1e4ebd9 533 output_w32(0x2a200000 | rm_rn_rd(rs, WZR, rt));
be516ebe 534}
535
be516ebe 536static void emit_and(u_int rs1,u_int rs2,u_int rt)
537{
538 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 539 output_w32(0x0a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 540}
541
542static void emit_or(u_int rs1,u_int rs2,u_int rt)
543{
544 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 545 output_w32(0x2a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 546}
547
3968e69e 548static void emit_bic(u_int rs1,u_int rs2,u_int rt)
549{
550 assem_debug("bic %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
551 output_w32(0x0a200000 | rm_rn_rd(rs2, rs1, rt));
552}
553
be516ebe 554static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
555{
be516ebe 556 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 557 output_w32(0x2a000000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 558}
559
560static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
561{
be516ebe 562 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
d1e4ebd9 563 output_w32(0x2a400000 | rm_imm6_rn_rd(rs, imm, rt, rt));
be516ebe 564}
565
3968e69e 566static void emit_bicsar_imm(u_int rs,u_int imm,u_int rt)
567{
568 assem_debug("bic %s,%s,%s,asr #%d\n",regname[rt],regname[rt],regname[rs],imm);
569 output_w32(0x0aa00000 | rm_imm6_rn_rd(rs, imm, rt, rt));
570}
571
be516ebe 572static void emit_xor(u_int rs1,u_int rs2,u_int rt)
573{
574 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
d1e4ebd9 575 output_w32(0x4a000000 | rm_rn_rd(rs2, rs1, rt));
be516ebe 576}
577
3968e69e 578static void emit_xorsar_imm(u_int rs1, u_int rs2, u_int imm, u_int rt)
579{
580 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
581 output_w32(0x4a800000 | rm_imm6_rn_rd(rs2, imm, rs1, rt));
582}
583
d1e4ebd9 584static void emit_addimm_s(u_int s, u_int is64, u_int rs, uintptr_t imm, u_int rt)
be516ebe 585{
d1e4ebd9 586 unused const char *st = s ? "s" : "";
587 s = s ? 0x20000000 : 0;
588 is64 = is64 ? 0x80000000 : 0;
687b4580 589 if (imm < 4096) {
d1e4ebd9 590 assem_debug("add%s %s,%s,%#lx\n", st, regname[rt], regname[rs], imm);
591 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm, rs, rt));
687b4580 592 }
593 else if (-imm < 4096) {
3968e69e 594 assem_debug("sub%s %s,%s,%#lx\n", st, regname[rt], regname[rs], -imm);
d1e4ebd9 595 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm, rs, rt));
596 }
597 else if (imm < 16777216) {
598 assem_debug("add %s,%s,#%#lx\n",regname[rt],regname[rt],imm&0xfff000);
599 output_w32(0x11400000 | is64 | imm12_rn_rd(imm >> 12, rs, rt));
600 if ((imm & 0xfff) || s) {
601 assem_debug("add%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],imm&0xfff);
3968e69e 602 output_w32(0x11000000 | is64 | s | imm12_rn_rd(imm & 0xfff, rt, rt));
d1e4ebd9 603 }
604 }
605 else if (-imm < 16777216) {
606 assem_debug("sub %s,%s,#%#lx\n",regname[rt],regname[rt],-imm&0xfff000);
607 output_w32(0x51400000 | is64 | imm12_rn_rd(-imm >> 12, rs, rt));
608 if ((imm & 0xfff) || s) {
609 assem_debug("sub%s %s,%s,#%#lx\n",st,regname[rt],regname[rs],-imm&0xfff);
610 output_w32(0x51000000 | is64 | s | imm12_rn_rd(-imm & 0xfff, rt, rt));
611 }
687b4580 612 }
613 else
3968e69e 614 abort();
be516ebe 615}
616
d1e4ebd9 617static void emit_addimm(u_int rs, uintptr_t imm, u_int rt)
618{
619 emit_addimm_s(0, 0, rs, imm, rt);
620}
621
622static void emit_addimm64(u_int rs, uintptr_t imm, u_int rt)
623{
624 emit_addimm_s(0, 1, rs, imm, rt);
625}
626
be516ebe 627static void emit_addimm_and_set_flags(int imm, u_int rt)
628{
d1e4ebd9 629 emit_addimm_s(1, 0, rt, imm, rt);
be516ebe 630}
631
632static void emit_addimm_no_flags(u_int imm,u_int rt)
633{
634 emit_addimm(rt,imm,rt);
635}
636
d1e4ebd9 637static void emit_logicop_imm(u_int op, u_int rs, u_int imm, u_int rt)
be516ebe 638{
d1e4ebd9 639 const char *names[] = { "and", "orr", "eor", "ands" };
640 const char *name = names[op];
641 u_int immr, imms;
642 op = op << 29;
643 if (is_rotated_mask(imm)) {
644 gen_logical_imm(imm, &immr, &imms);
645 assem_debug("%s %s,%s,#%#x\n", name, regname[rt], regname[rs], imm);
646 output_w32(op | 0x12000000 | n_immr_imms_rn_rd(0, immr, imms, rs, rt));
647 }
648 else {
649 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
650 host_tempreg_acquire();
651 emit_movimm(imm, HOST_TEMPREG);
652 assem_debug("%s %s,%s,%s\n", name, regname[rt], regname[rs], regname[HOST_TEMPREG]);
653 output_w32(op | 0x0a000000 | rm_rn_rd(HOST_TEMPREG, rs, rt));
654 if (rs == HOST_TEMPREG || rt != HOST_TEMPREG)
655 host_tempreg_release();
656 }
657 (void)name;
be516ebe 658}
659
d1e4ebd9 660static void emit_andimm(u_int rs, u_int imm, u_int rt)
be516ebe 661{
d1e4ebd9 662 if (imm == 0)
663 emit_zeroreg(rt);
664 else
665 emit_logicop_imm(0, rs, imm, rt);
be516ebe 666}
667
d1e4ebd9 668static void emit_orimm(u_int rs, u_int imm, u_int rt)
be516ebe 669{
d1e4ebd9 670 if (imm == 0) {
671 if (rs != rt)
672 emit_mov(rs, rt);
673 }
674 else
675 emit_logicop_imm(1, rs, imm, rt);
be516ebe 676}
677
d1e4ebd9 678static void emit_xorimm(u_int rs, u_int imm, u_int rt)
be516ebe 679{
d1e4ebd9 680 if (imm == 0) {
681 if (rs != rt)
682 emit_mov(rs, rt);
683 }
684 else
685 emit_logicop_imm(2, rs, imm, rt);
be516ebe 686}
687
d1e4ebd9 688static void emit_sbfm(u_int rs,u_int imm,u_int rt)
be516ebe 689{
d1e4ebd9 690 assem_debug("sbfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 692}
693
d1e4ebd9 694static void emit_ubfm(u_int rs,u_int imm,u_int rt)
be516ebe 695{
d1e4ebd9 696 assem_debug("ubfm %s,%s,#0,#%d\n",regname[rt],regname[rs],imm);
697 output_w32(0x53000000 | n_immr_imms_rn_rd(0, 0, imm, rs, rt));
be516ebe 698}
699
700static void emit_shlimm(u_int rs,u_int imm,u_int rt)
701{
be516ebe 702 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 703 output_w32(0x53000000 | n_immr_imms_rn_rd(0, (31-imm)+1, 31-imm, rs, rt));
be516ebe 704}
705
3968e69e 706static void emit_shrimm(u_int rs,u_int imm,u_int rt)
be516ebe 707{
3968e69e 708 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
709 output_w32(0x53000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 710}
711
3968e69e 712static void emit_shrimm64(u_int rs,u_int imm,u_int rt)
be516ebe 713{
be516ebe 714 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
3968e69e 715 output_w32(0xd3400000 | n_immr_imms_rn_rd(0, imm, 63, rs, rt));
be516ebe 716}
717
718static void emit_sarimm(u_int rs,u_int imm,u_int rt)
719{
be516ebe 720 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 721 output_w32(0x13000000 | n_immr_imms_rn_rd(0, imm, 31, rs, rt));
be516ebe 722}
723
724static void emit_rorimm(u_int rs,u_int imm,u_int rt)
725{
3968e69e 726 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
d1e4ebd9 727 output_w32(0x13800000 | rm_imm6_rn_rd(rs, imm, rs, rt));
be516ebe 728}
729
730static void emit_signextend16(u_int rs, u_int rt)
731{
732 assem_debug("sxth %s,%s\n", regname[rt], regname[rs]);
d1e4ebd9 733 output_w32(0x13000000 | n_immr_imms_rn_rd(0, 0, 15, rs, rt));
be516ebe 734}
735
d1e4ebd9 736static void emit_shl(u_int rs,u_int rshift,u_int rt)
be516ebe 737{
3968e69e 738 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
d1e4ebd9 739 output_w32(0x1ac02000 | rm_rn_rd(rshift, rs, rt));
be516ebe 740}
741
d1e4ebd9 742static void emit_shr(u_int rs,u_int rshift,u_int rt)
be516ebe 743{
d1e4ebd9 744 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
745 output_w32(0x1ac02400 | rm_rn_rd(rshift, rs, rt));
be516ebe 746}
747
d1e4ebd9 748static void emit_sar(u_int rs,u_int rshift,u_int rt)
be516ebe 749{
d1e4ebd9 750 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[rshift]);
751 output_w32(0x1ac02800 | rm_rn_rd(rshift, rs, rt));
be516ebe 752}
753
d1e4ebd9 754static void emit_cmpimm(u_int rs, u_int imm)
be516ebe 755{
d1e4ebd9 756 if (imm < 4096) {
757 assem_debug("cmp %s,%#x\n", regname[rs], imm);
758 output_w32(0x71000000 | imm12_rn_rd(imm, rs, WZR));
759 }
760 else if (-imm < 4096) {
761 assem_debug("cmn %s,%#x\n", regname[rs], imm);
762 output_w32(0x31000000 | imm12_rn_rd(-imm, rs, WZR));
763 }
764 else if (imm < 16777216 && !(imm & 0xfff)) {
3968e69e 765 assem_debug("cmp %s,#%#x\n", regname[rs], imm);
d1e4ebd9 766 output_w32(0x71400000 | imm12_rn_rd(imm >> 12, rs, WZR));
767 }
768 else {
769 host_tempreg_acquire();
770 emit_movimm(imm, HOST_TEMPREG);
771 assem_debug("cmp %s,%s\n", regname[rs], regname[HOST_TEMPREG]);
772 output_w32(0x6b000000 | rm_rn_rd(HOST_TEMPREG, rs, WZR));
773 host_tempreg_release();
774 }
be516ebe 775}
776
d1e4ebd9 777static void emit_cmov_imm(u_int cond0, u_int cond1, u_int imm, u_int rt)
be516ebe 778{
d1e4ebd9 779 assert(imm == 0 || imm == 1);
780 assert(cond0 < 0x10);
781 assert(cond1 < 0x10);
782 if (imm) {
783 assem_debug("csinc %s,%s,%s,%s\n",regname[rt],regname[rt],regname[WZR],condname[cond1]);
784 output_w32(0x1a800400 | (cond1 << 12) | rm_rn_rd(WZR, rt, rt));
785 } else {
786 assem_debug("csel %s,%s,%s,%s\n",regname[rt],regname[WZR],regname[rt],condname[cond0]);
787 output_w32(0x1a800000 | (cond0 << 12) | rm_rn_rd(rt, WZR, rt));
788 }
be516ebe 789}
790
d1e4ebd9 791static void emit_cmovne_imm(u_int imm,u_int rt)
be516ebe 792{
d1e4ebd9 793 emit_cmov_imm(COND_NE, COND_EQ, imm, rt);
be516ebe 794}
795
d1e4ebd9 796static void emit_cmovl_imm(u_int imm,u_int rt)
be516ebe 797{
d1e4ebd9 798 emit_cmov_imm(COND_LT, COND_GE, imm, rt);
be516ebe 799}
800
801static void emit_cmovb_imm(int imm,u_int rt)
802{
d1e4ebd9 803 emit_cmov_imm(COND_CC, COND_CS, imm, rt);
be516ebe 804}
805
3968e69e 806static void emit_cmoveq_reg(u_int rs,u_int rt)
be516ebe 807{
3968e69e 808 assem_debug("csel %s,%s,%s,eq\n",regname[rt],regname[rs],regname[rt]);
809 output_w32(0x1a800000 | (COND_EQ << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 810}
811
812static void emit_cmovne_reg(u_int rs,u_int rt)
813{
d1e4ebd9 814 assem_debug("csel %s,%s,%s,ne\n",regname[rt],regname[rs],regname[rt]);
815 output_w32(0x1a800000 | (COND_NE << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 816}
817
818static void emit_cmovl_reg(u_int rs,u_int rt)
819{
d1e4ebd9 820 assem_debug("csel %s,%s,%s,lt\n",regname[rt],regname[rs],regname[rt]);
821 output_w32(0x1a800000 | (COND_LT << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 822}
823
824static void emit_cmovs_reg(u_int rs,u_int rt)
825{
d1e4ebd9 826 assem_debug("csel %s,%s,%s,mi\n",regname[rt],regname[rs],regname[rt]);
827 output_w32(0x1a800000 | (COND_MI << 12) | rm_rn_rd(rt, rs, rt));
be516ebe 828}
829
3968e69e 830static void emit_csinvle_reg(u_int rs1,u_int rs2,u_int rt)
831{
832 assem_debug("csinv %s,%s,%s,le\n",regname[rt],regname[rs1],regname[rs2]);
833 output_w32(0x5a800000 | (COND_LE << 12) | rm_rn_rd(rs2, rs1, rt));
834}
835
be516ebe 836static void emit_slti32(u_int rs,int imm,u_int rt)
837{
838 if(rs!=rt) emit_zeroreg(rt);
839 emit_cmpimm(rs,imm);
840 if(rs==rt) emit_movimm(0,rt);
841 emit_cmovl_imm(1,rt);
842}
843
844static void emit_sltiu32(u_int rs,int imm,u_int rt)
845{
846 if(rs!=rt) emit_zeroreg(rt);
847 emit_cmpimm(rs,imm);
848 if(rs==rt) emit_movimm(0,rt);
849 emit_cmovb_imm(1,rt);
850}
851
852static void emit_cmp(u_int rs,u_int rt)
853{
854 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
d1e4ebd9 855 output_w32(0x6b000000 | rm_rn_rd(rt, rs, WZR));
be516ebe 856}
857
858static void emit_set_gz32(u_int rs, u_int rt)
859{
860 //assem_debug("set_gz32\n");
861 emit_cmpimm(rs,1);
862 emit_movimm(1,rt);
863 emit_cmovl_imm(0,rt);
864}
865
866static void emit_set_nz32(u_int rs, u_int rt)
867{
868 //assem_debug("set_nz32\n");
d1e4ebd9 869 if(rs!=rt) emit_mov(rs,rt);
870 emit_test(rs,rs);
871 emit_cmovne_imm(1,rt);
be516ebe 872}
873
874static void emit_set_if_less32(u_int rs1, u_int rs2, u_int rt)
875{
876 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
877 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
878 emit_cmp(rs1,rs2);
879 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
880 emit_cmovl_imm(1,rt);
881}
882
883static void emit_set_if_carry32(u_int rs1, u_int rs2, u_int rt)
884{
885 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
886 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
887 emit_cmp(rs1,rs2);
888 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
889 emit_cmovb_imm(1,rt);
890}
891
d1e4ebd9 892static void emit_call(const void *a)
be516ebe 893{
d1e4ebd9 894 intptr_t diff = (u_char *)a - out;
895 assem_debug("bl %p (%p+%lx)%s\n", a, out, diff, func_name(a));
687b4580 896 assert(!(diff & 3));
897 if (-134217728 <= diff && diff <= 134217727)
898 output_w32(0x94000000 | ((diff >> 2) & 0x03ffffff));
899 else
3968e69e 900 abort();
be516ebe 901}
902
d1e4ebd9 903static void emit_jmp(const void *a)
be516ebe 904{
d1e4ebd9 905 assem_debug("b %p (%p+%lx)%s\n", a, out, (u_char *)a - out, func_name(a));
906 u_int offset = genjmp(a);
907 output_w32(0x14000000 | offset);
be516ebe 908}
909
d1e4ebd9 910static void emit_jne(const void *a)
be516ebe 911{
d1e4ebd9 912 assem_debug("bne %p\n", a);
913 u_int offset = genjmpcc(a);
914 output_w32(0x54000000 | (offset << 5) | COND_NE);
be516ebe 915}
916
7c3a5182 917static void emit_jeq(const void *a)
be516ebe 918{
d1e4ebd9 919 assem_debug("beq %p\n", a);
920 u_int offset = genjmpcc(a);
921 output_w32(0x54000000 | (offset << 5) | COND_EQ);
be516ebe 922}
923
7c3a5182 924static void emit_js(const void *a)
be516ebe 925{
d1e4ebd9 926 assem_debug("bmi %p\n", a);
927 u_int offset = genjmpcc(a);
928 output_w32(0x54000000 | (offset << 5) | COND_MI);
be516ebe 929}
930
7c3a5182 931static void emit_jns(const void *a)
be516ebe 932{
d1e4ebd9 933 assem_debug("bpl %p\n", a);
934 u_int offset = genjmpcc(a);
935 output_w32(0x54000000 | (offset << 5) | COND_PL);
be516ebe 936}
937
7c3a5182 938static void emit_jl(const void *a)
be516ebe 939{
d1e4ebd9 940 assem_debug("blt %p\n", a);
941 u_int offset = genjmpcc(a);
942 output_w32(0x54000000 | (offset << 5) | COND_LT);
be516ebe 943}
944
7c3a5182 945static void emit_jge(const void *a)
be516ebe 946{
d1e4ebd9 947 assem_debug("bge %p\n", a);
948 u_int offset = genjmpcc(a);
949 output_w32(0x54000000 | (offset << 5) | COND_GE);
be516ebe 950}
951
7c3a5182 952static void emit_jno(const void *a)
be516ebe 953{
d1e4ebd9 954 assem_debug("bvc %p\n", a);
955 u_int offset = genjmpcc(a);
956 output_w32(0x54000000 | (offset << 5) | COND_VC);
be516ebe 957}
958
7c3a5182 959static void emit_jc(const void *a)
be516ebe 960{
d1e4ebd9 961 assem_debug("bcs %p\n", a);
962 u_int offset = genjmpcc(a);
963 output_w32(0x54000000 | (offset << 5) | COND_CS);
be516ebe 964}
965
3968e69e 966static void emit_cb(u_int isnz, u_int is64, const void *a, u_int r)
be516ebe 967{
3968e69e 968 assem_debug("cb%sz %s,%p\n", isnz?"n":"", is64?regname64[r]:regname[r], a);
d1e4ebd9 969 u_int offset = genjmpcc(a);
3968e69e 970 is64 = is64 ? 0x80000000 : 0;
971 isnz = isnz ? 0x01000000 : 0;
972 output_w32(0x34000000 | is64 | isnz | imm19_rt(offset, r));
973}
974
975static void emit_cbz(const void *a, u_int r)
976{
977 emit_cb(0, 0, a, r);
be516ebe 978}
979
980static void emit_jmpreg(u_int r)
981{
3968e69e 982 assem_debug("br %s\n", regname64[r]);
d1e4ebd9 983 output_w32(0xd61f0000 | rm_rn_rd(0, r, 0));
be516ebe 984}
985
986static void emit_retreg(u_int r)
987{
d1e4ebd9 988 assem_debug("ret %s\n", r == LR ? "" : regname64[r]);
be516ebe 989 output_w32(0xd65f0000 | rm_rn_rd(0, r, 0));
990}
991
992static void emit_ret(void)
993{
994 emit_retreg(LR);
995}
996
d1e4ebd9 997static void emit_adr(void *addr, u_int rt)
998{
999 intptr_t offset = (u_char *)addr - out;
1000 assert(-1048576 <= offset && offset < 1048576);
3968e69e 1001 assert(rt < 31);
d1e4ebd9 1002 assem_debug("adr x%d,#%#lx\n", rt, offset);
1003 output_w32(0x10000000 | ((offset&0x3) << 29) | (((offset>>2)&0x7ffff) << 5) | rt);
1004}
1005
3968e69e 1006static void emit_adrp(void *addr, u_int rt)
1007{
1008 intptr_t offset = ((intptr_t)addr & ~0xfffl) - ((intptr_t)out & ~0xfffl);
1009 assert(-4294967296l <= offset && offset < 4294967296l);
1010 assert(rt < 31);
1011 offset >>= 12;
1012 assem_debug("adrp %s,#%#lx(000)\n",regname64[rt],offset);
1013 output_w32(0x90000000 | ((offset&0x3)<<29) | (((offset>>2)&0x7ffff)<<5) | rt);
1014}
1015
be516ebe 1016static void emit_readword_indexed(int offset, u_int rs, u_int rt)
1017{
d1e4ebd9 1018 assem_debug("ldur %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1019 assert(-256 <= offset && offset < 256);
1020 output_w32(0xb8400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
1021}
1022
1023static void emit_strb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1024{
1025 assem_debug("strb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1026 output_w32(0x38204800 | rm_rn_rd(rs2, rs1, rt));
1027}
1028
1029static void emit_strh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1030{
1031 assem_debug("strh %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1032 output_w32(0x78204800 | rm_rn_rd(rs2, rs1, rt));
1033}
1034
1035static void emit_str_dualindexed(u_int rs1, u_int rs2, u_int rt)
1036{
1037 assem_debug("str %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1038 output_w32(0xb8204800 | rm_rn_rd(rs2, rs1, rt));
1039}
1040
1041static void emit_readdword_dualindexedx8(u_int rs1, u_int rs2, u_int rt)
1042{
1043 assem_debug("ldr %s, [%s,%s, uxtw #3]\n",regname64[rt],regname64[rs1],regname[rs2]);
1044 output_w32(0xf8605800 | rm_rn_rd(rs2, rs1, rt));
1045}
1046
1047static void emit_ldrb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1048{
1049 assem_debug("ldrb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1050 output_w32(0x38604800 | rm_rn_rd(rs2, rs1, rt));
1051}
1052
1053static void emit_ldrsb_dualindexed(u_int rs1, u_int rs2, u_int rt)
1054{
1055 assem_debug("ldrsb %s, [%s,%s]\n",regname[rt],regname64[rs1],regname[rs2]);
1056 output_w32(0x38a04800 | rm_rn_rd(rs2, rs1, rt));
1057}
1058
1059static void emit_ldrh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1060{
1061 assem_debug("ldrh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1062 output_w32(0x78604800 | rm_rn_rd(rs2, rs1, rt));
1063}
1064
1065static void emit_ldrsh_dualindexed(u_int rs1, u_int rs2, u_int rt)
1066{
1067 assem_debug("ldrsh %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1068 output_w32(0x78a04800 | rm_rn_rd(rs2, rs1, rt));
1069}
1070
1071static void emit_ldr_dualindexed(u_int rs1, u_int rs2, u_int rt)
1072{
1073 assem_debug("ldr %s, [%s,%s, uxtw]\n",regname[rt],regname64[rs1],regname[rs2]);
1074 output_w32(0xb8604800 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1075}
1076
be516ebe 1077static void emit_movsbl_indexed(int offset, u_int rs, u_int rt)
1078{
d1e4ebd9 1079 assem_debug("ldursb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1080 assert(-256 <= offset && offset < 256);
1081 output_w32(0x38c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1082}
1083
1084static void emit_movswl_indexed(int offset, u_int rs, u_int rt)
1085{
d1e4ebd9 1086 assem_debug("ldursh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1087 assert(-256 <= offset && offset < 256);
1088 output_w32(0x78c00000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1089}
1090
1091static void emit_movzbl_indexed(int offset, u_int rs, u_int rt)
1092{
d1e4ebd9 1093 assem_debug("ldurb %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1094 assert(-256 <= offset && offset < 256);
1095 output_w32(0x38400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1096}
1097
1098static void emit_movzwl_indexed(int offset, u_int rs, u_int rt)
1099{
d1e4ebd9 1100 assem_debug("ldurh %s,[%s+%#x]\n",regname[rt],regname64[rs],offset);
1101 assert(-256 <= offset && offset < 256);
1102 output_w32(0x78400000 | imm9_rn_rt(offset&0x1ff, rs, rt));
be516ebe 1103}
1104
be516ebe 1105static void emit_writeword_indexed(u_int rt, int offset, u_int rs)
1106{
3968e69e 1107 if (!(offset & 3) && (u_int)offset <= 16380) {
1108 assem_debug("str %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
687b4580 1109 output_w32(0xb9000000 | imm12_rn_rd(offset >> 2, rs, rt));
3968e69e 1110 }
1111 else if (-256 <= offset && offset < 256) {
1112 assem_debug("stur %s,[%s+%#x]\n", regname[rt], regname[rs], offset);
1113 output_w32(0xb8000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1114 }
687b4580 1115 else
1116 assert(0);
be516ebe 1117}
1118
1119static void emit_writehword_indexed(u_int rt, int offset, u_int rs)
1120{
3968e69e 1121 if (!(offset & 1) && (u_int)offset <= 8190) {
1122 assem_debug("strh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1123 output_w32(0x79000000 | imm12_rn_rd(offset >> 1, rs, rt));
3968e69e 1124 }
1125 else if (-256 <= offset && offset < 256) {
1126 assem_debug("sturh %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1127 output_w32(0x78000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1128 }
687b4580 1129 else
1130 assert(0);
be516ebe 1131}
1132
1133static void emit_writebyte_indexed(u_int rt, int offset, u_int rs)
1134{
3968e69e 1135 if ((u_int)offset < 4096) {
1136 assem_debug("strb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
687b4580 1137 output_w32(0x39000000 | imm12_rn_rd(offset, rs, rt));
3968e69e 1138 }
1139 else if (-256 <= offset && offset < 256) {
1140 assem_debug("sturb %s,[%s+%#x]\n", regname[rt], regname64[rs], offset);
1141 output_w32(0x38000000 | imm9_rn_rt(offset & 0x1ff, rs, rt));
1142 }
687b4580 1143 else
1144 assert(0);
be516ebe 1145}
1146
3968e69e 1147static void emit_umull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1148{
3968e69e 1149 assem_debug("umull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1150 output_w32(0x9ba00000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
be516ebe 1151}
1152
3968e69e 1153static void emit_smull(u_int rs1, u_int rs2, u_int rt)
be516ebe 1154{
3968e69e 1155 assem_debug("smull %s,%s,%s\n",regname64[rt],regname[rs1],regname[rs2]);
1156 output_w32(0x9b200000 | rm_ra_rn_rd(rs2, WZR, rs1, rt));
1157}
1158
1159static void emit_msub(u_int rs1, u_int rs2, u_int rs3, u_int rt)
1160{
1161 assem_debug("msub %s,%s,%s,%s\n",regname[rt],regname[rs1],regname[rs2],regname[rs3]);
1162 output_w32(0x1b008000 | rm_ra_rn_rd(rs2, rs3, rs1, rt));
1163}
1164
1165static void emit_sdiv(u_int rs1, u_int rs2, u_int rt)
1166{
1167 assem_debug("sdiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0x1ac00c00 | rm_rn_rd(rs2, rs1, rt));
be516ebe 1169}
1170
3968e69e 1171static void emit_udiv(u_int rs1, u_int rs2, u_int rt)
1172{
1173 assem_debug("udiv %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1174 output_w32(0x1ac00800 | rm_rn_rd(rs2, rs1, rt));
1175}
1176
1177static void emit_clz(u_int rs, u_int rt)
be516ebe 1178{
1179 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
3968e69e 1180 output_w32(0x5ac01000 | rn_rd(rs, rt));
be516ebe 1181}
1182
be516ebe 1183// special case for checking invalid_code
d1e4ebd9 1184static void emit_cmpmem_indexedsr12_reg(u_int rbase, u_int r, u_int imm)
be516ebe 1185{
d1e4ebd9 1186 host_tempreg_acquire();
1187 emit_shrimm(r, 12, HOST_TEMPREG);
3968e69e 1188 assem_debug("ldrb %s,[%s,%s,uxtw]\n",regname[HOST_TEMPREG],regname64[rbase],regname[HOST_TEMPREG]);
1189 output_w32(0x38604800 | rm_rn_rd(HOST_TEMPREG, rbase, HOST_TEMPREG));
d1e4ebd9 1190 emit_cmpimm(HOST_TEMPREG, imm);
1191 host_tempreg_release();
be516ebe 1192}
1193
3968e69e 1194// special for loadlr_assemble, rs2 is destroyed
1195static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1196{
3968e69e 1197 emit_shl(rs2, shift, rs2);
1198 emit_bic(rs1, rs2, rt);
be516ebe 1199}
1200
3968e69e 1201static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
be516ebe 1202{
3968e69e 1203 emit_shr(rs2, shift, rs2);
1204 emit_bic(rs1, rs2, rt);
be516ebe 1205}
1206
d1e4ebd9 1207static void emit_loadlp_ofs(u_int ofs, u_int rt)
1208{
1209 output_w32(0x58000000 | imm19_rt(ofs, rt));
1210}
1211
687b4580 1212static void emit_ldst(int is_st, int is64, u_int rt, u_int rn, u_int ofs)
be516ebe 1213{
687b4580 1214 u_int op = 0xb9000000;
d1e4ebd9 1215 unused const char *ldst = is_st ? "st" : "ld";
1216 unused char rp = is64 ? 'x' : 'w';
687b4580 1217 assem_debug("%sr %c%d,[x%d,#%#x]\n", ldst, rp, rt, rn, ofs);
1218 is64 = is64 ? 1 : 0;
1219 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1220 ofs = (ofs >> (2+is64));
687b4580 1221 if (!is_st) op |= 0x00400000;
1222 if (is64) op |= 0x40000000;
d1e4ebd9 1223 output_w32(op | imm12_rn_rd(ofs, rn, rt));
be516ebe 1224}
1225
687b4580 1226static void emit_ldstp(int is_st, int is64, u_int rt1, u_int rt2, u_int rn, int ofs)
be516ebe 1227{
687b4580 1228 u_int op = 0x29000000;
d1e4ebd9 1229 unused const char *ldst = is_st ? "st" : "ld";
1230 unused char rp = is64 ? 'x' : 'w';
687b4580 1231 assem_debug("%sp %c%d,%c%d,[x%d,#%#x]\n", ldst, rp, rt1, rp, rt2, rn, ofs);
1232 is64 = is64 ? 1 : 0;
1233 assert((ofs & ((1 << (2+is64)) - 1)) == 0);
1234 ofs = (ofs >> (2+is64));
1235 assert(-64 <= ofs && ofs <= 63);
1236 ofs &= 0x7f;
1237 if (!is_st) op |= 0x00400000;
1238 if (is64) op |= 0x80000000;
d1e4ebd9 1239 output_w32(op | imm7_rt2_rn_rt(ofs, rt2, rn, rt1));
687b4580 1240}
1241
1242static void save_load_regs_all(int is_store, u_int reglist)
1243{
1244 int ofs = 0, c = 0;
1245 u_int r, pair[2];
1246 for (r = 0; reglist; r++, reglist >>= 1) {
1247 if (reglist & 1)
1248 pair[c++] = r;
1249 if (c == 2) {
1250 emit_ldstp(is_store, 1, pair[0], pair[1], SP, SSP_CALLEE_REGS + ofs);
1251 ofs += 8 * 2;
1252 c = 0;
1253 }
1254 }
1255 if (c) {
1256 emit_ldst(is_store, 1, pair[0], SP, SSP_CALLEE_REGS + ofs);
1257 ofs += 8;
1258 }
1259 assert(ofs <= SSP_CALLER_REGS);
be516ebe 1260}
1261
1262// Save registers before function call
1263static void save_regs(u_int reglist)
1264{
1265 reglist &= CALLER_SAVE_REGS; // only save the caller-save registers
687b4580 1266 save_load_regs_all(1, reglist);
be516ebe 1267}
1268
1269// Restore registers after function call
1270static void restore_regs(u_int reglist)
1271{
1272 reglist &= CALLER_SAVE_REGS;
687b4580 1273 save_load_regs_all(0, reglist);
be516ebe 1274}
1275
1276/* Stubs/epilogue */
1277
1278static void literal_pool(int n)
1279{
1280 (void)literals;
1281}
1282
1283static void literal_pool_jumpover(int n)
1284{
1285}
1286
d1e4ebd9 1287// parsed by get_pointer, find_extjump_insn
1288static void emit_extjump2(u_char *addr, u_int target, void *linker)
be516ebe 1289{
d1e4ebd9 1290 assert(((addr[3]&0xfc)==0x14) || ((addr[3]&0xff)==0x54)); // b or b.cond
be516ebe 1291
d1e4ebd9 1292 emit_movz(target & 0xffff, 0);
1293 emit_movk_lsl16(target >> 16, 0);
1294
1295 // addr is in the current recompiled block (max 256k)
1296 // offset shouldn't exceed +/-1MB
1297 emit_adr(addr, 1);
1298 emit_jmp(linker);
be516ebe 1299}
1300
d1e4ebd9 1301static void check_extjump2(void *src)
be516ebe 1302{
d1e4ebd9 1303 u_int *ptr = src;
1304 assert((ptr[0] & 0xffe0001f) == 0x52800000); // movz r0, #val
1305 (void)ptr;
be516ebe 1306}
1307
1308// put rt_val into rt, potentially making use of rs with value rs_val
d1e4ebd9 1309static void emit_movimm_from(u_int rs_val, u_int rs, u_int rt_val, u_int rt)
be516ebe 1310{
d1e4ebd9 1311 int diff = rt_val - rs_val;
3968e69e 1312 if ((-4096 < diff && diff < 4096)
1313 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff)))
687b4580 1314 emit_addimm(rs, diff, rt);
3968e69e 1315 else if (rt_val == ~rs_val)
1316 emit_not(rs, rt);
d1e4ebd9 1317 else if (is_rotated_mask(rs_val ^ rt_val))
1318 emit_xorimm(rs, rs_val ^ rt_val, rt);
687b4580 1319 else
d1e4ebd9 1320 emit_movimm(rt_val, rt);
be516ebe 1321}
1322
d1e4ebd9 1323// return 1 if the above function can do it's job cheaply
687b4580 1324static int is_similar_value(u_int v1, u_int v2)
be516ebe 1325{
687b4580 1326 int diff = v1 - v2;
3968e69e 1327 return (-4096 < diff && diff < 4096)
1328 || (-16777216 < diff && diff < 16777216 && !(diff & 0xfff))
1329 || v1 == ~v2
d1e4ebd9 1330 || is_rotated_mask(v1 ^ v2);
1331}
1332
1333// trashes r2
1334static void pass_args64(u_int a0, u_int a1)
1335{
1336 if(a0==1&&a1==0) {
1337 // must swap
1338 emit_mov64(a0,2); emit_mov64(a1,1); emit_mov64(2,0);
1339 }
1340 else if(a0!=0&&a1==0) {
1341 emit_mov64(a1,1);
1342 if (a0>=0) emit_mov64(a0,0);
1343 }
1344 else {
1345 if(a0>=0&&a0!=0) emit_mov64(a0,0);
1346 if(a1>=0&&a1!=1) emit_mov64(a1,1);
1347 }
be516ebe 1348}
1349
d1e4ebd9 1350static void loadstore_extend(enum stub_type type, u_int rs, u_int rt)
1351{
1352 switch(type) {
1353 case LOADB_STUB: emit_sbfm(rs, 7, rt); break;
1354 case LOADBU_STUB:
1355 case STOREB_STUB: emit_ubfm(rs, 7, rt); break;
1356 case LOADH_STUB: emit_sbfm(rs, 15, rt); break;
1357 case LOADHU_STUB:
1358 case STOREH_STUB: emit_ubfm(rs, 15, rt); break;
1359 case LOADW_STUB:
1360 case STOREW_STUB: if (rs != rt) emit_mov(rs, rt); break;
3968e69e 1361 default: assert(0);
d1e4ebd9 1362 }
1363}
1364
1365#include "pcsxmem.h"
be516ebe 1366//#include "pcsxmem_inline.c"
1367
1368static void do_readstub(int n)
1369{
1370 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
d1e4ebd9 1371 set_jump_target(stubs[n].addr, out);
1372 enum stub_type type = stubs[n].type;
1373 int i = stubs[n].a;
1374 int rs = stubs[n].b;
1375 const struct regstat *i_regs = (void *)stubs[n].c;
1376 u_int reglist = stubs[n].e;
1377 const signed char *i_regmap = i_regs->regmap;
1378 int rt;
1379 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
1380 rt=get_reg(i_regmap,FTEMP);
1381 }else{
1382 rt=get_reg(i_regmap,rt1[i]);
1383 }
1384 assert(rs>=0);
1385 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1386 void *restore_jump = NULL, *handler_jump = NULL;
1387 reglist|=(1<<rs);
1388 for (r = 0; r < HOST_CCREG; r++) {
1389 if (r != EXCLUDE_REG && ((1 << r) & reglist) == 0) {
1390 temp = r;
1391 break;
1392 }
1393 }
1394 if(rt>=0&&rt1[i]!=0)
1395 reglist&=~(1<<rt);
1396 if(temp==-1) {
1397 save_regs(reglist);
1398 regs_saved=1;
1399 temp=(rs==0)?2:0;
1400 }
1401 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1402 temp2=1;
1403 emit_readdword(&mem_rtab,temp);
1404 emit_shrimm(rs,12,temp2);
1405 emit_readdword_dualindexedx8(temp,temp2,temp2);
1406 emit_adds64(temp2,temp2,temp2);
1407 handler_jump=out;
1408 emit_jc(0);
1409 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1410 switch(type) {
1411 case LOADB_STUB: emit_ldrsb_dualindexed(temp2,rs,rt); break;
1412 case LOADBU_STUB: emit_ldrb_dualindexed(temp2,rs,rt); break;
1413 case LOADH_STUB: emit_ldrsh_dualindexed(temp2,rs,rt); break;
1414 case LOADHU_STUB: emit_ldrh_dualindexed(temp2,rs,rt); break;
1415 case LOADW_STUB: emit_ldr_dualindexed(temp2,rs,rt); break;
3968e69e 1416 default: assert(0);
d1e4ebd9 1417 }
1418 }
1419 if(regs_saved) {
1420 restore_jump=out;
1421 emit_jmp(0); // jump to reg restore
1422 }
1423 else
1424 emit_jmp(stubs[n].retaddr); // return address
1425 set_jump_target(handler_jump, out);
1426
1427 if(!regs_saved)
1428 save_regs(reglist);
1429 void *handler=NULL;
1430 if(type==LOADB_STUB||type==LOADBU_STUB)
1431 handler=jump_handler_read8;
1432 if(type==LOADH_STUB||type==LOADHU_STUB)
1433 handler=jump_handler_read16;
1434 if(type==LOADW_STUB)
1435 handler=jump_handler_read32;
1436 assert(handler);
1437 pass_args64(rs,temp2);
1438 int cc=get_reg(i_regmap,CCREG);
1439 if(cc<0)
1440 emit_loadreg(CCREG,2);
1441 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1442 emit_call(handler);
1443 // (no cycle reload after read)
1444 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1445 loadstore_extend(type,0,rt);
1446 }
1447 if(restore_jump)
1448 set_jump_target(restore_jump, out);
1449 restore_regs(reglist);
1450 emit_jmp(stubs[n].retaddr);
be516ebe 1451}
1452
1453static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1454{
d1e4ebd9 1455 int rs=get_reg(regmap,target);
1456 int rt=get_reg(regmap,target);
1457 if(rs<0) rs=get_reg(regmap,-1);
1458 assert(rs>=0);
1459 u_int is_dynamic=0;
1460 uintptr_t host_addr = 0;
1461 void *handler;
1462 int cc=get_reg(regmap,CCREG);
1463 //if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1464 // return;
1465 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1466 if (handler == NULL) {
1467 if(rt<0||rt1[i]==0)
1468 return;
1469 if (addr != host_addr) {
1470 if (host_addr >= 0x100000000ull)
1471 abort(); // ROREG not implemented
1472 emit_movimm_from(addr, rs, host_addr, rs);
1473 }
1474 switch(type) {
1475 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1476 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1477 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1478 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1479 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1480 default: assert(0);
1481 }
1482 return;
1483 }
1484 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1485 if(is_dynamic) {
1486 if(type==LOADB_STUB||type==LOADBU_STUB)
1487 handler=jump_handler_read8;
1488 if(type==LOADH_STUB||type==LOADHU_STUB)
1489 handler=jump_handler_read16;
1490 if(type==LOADW_STUB)
1491 handler=jump_handler_read32;
1492 }
1493
1494 // call a memhandler
1495 if(rt>=0&&rt1[i]!=0)
1496 reglist&=~(1<<rt);
1497 save_regs(reglist);
1498 if(target==0)
1499 emit_movimm(addr,0);
1500 else if(rs!=0)
1501 emit_mov(rs,0);
1502 if(cc<0)
1503 emit_loadreg(CCREG,2);
1504 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3968e69e 1505 if(is_dynamic) {
1506 uintptr_t l1 = ((uintptr_t *)mem_rtab)[addr>>12] << 1;
1507 emit_adrp((void *)l1, 1);
1508 emit_addimm64(1, l1 & 0xfff, 1);
1509 }
d1e4ebd9 1510 else
1511 emit_call(do_memhandler_pre);
1512
1513 emit_call(handler);
1514
1515 // (no cycle reload after read)
1516 if(rt>=0&&rt1[i]!=0)
1517 loadstore_extend(type, 0, rt);
1518 restore_regs(reglist);
be516ebe 1519}
1520
1521static void do_writestub(int n)
1522{
1523 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
d1e4ebd9 1524 set_jump_target(stubs[n].addr, out);
1525 enum stub_type type=stubs[n].type;
1526 int i=stubs[n].a;
1527 int rs=stubs[n].b;
1528 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1529 u_int reglist=stubs[n].e;
1530 signed char *i_regmap=i_regs->regmap;
1531 int rt,r;
1532 if(itype[i]==C1LS||itype[i]==C2LS) {
1533 rt=get_reg(i_regmap,r=FTEMP);
1534 }else{
1535 rt=get_reg(i_regmap,r=rs2[i]);
1536 }
1537 assert(rs>=0);
1538 assert(rt>=0);
1539 int rtmp,temp=-1,temp2,regs_saved=0;
1540 void *restore_jump = NULL, *handler_jump = NULL;
1541 int reglist2=reglist|(1<<rs)|(1<<rt);
1542 for (rtmp = 0; rtmp < HOST_CCREG; rtmp++) {
1543 if (rtmp != EXCLUDE_REG && ((1 << rtmp) & reglist) == 0) {
1544 temp = rtmp;
1545 break;
1546 }
1547 }
1548 if(temp==-1) {
1549 save_regs(reglist);
1550 regs_saved=1;
1551 for(rtmp=0;rtmp<=3;rtmp++)
1552 if(rtmp!=rs&&rtmp!=rt)
1553 {temp=rtmp;break;}
1554 }
1555 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1556 temp2=3;
1557 else {
1558 host_tempreg_acquire();
1559 temp2=HOST_TEMPREG;
1560 }
1561 emit_readdword(&mem_wtab,temp);
1562 emit_shrimm(rs,12,temp2);
1563 emit_readdword_dualindexedx8(temp,temp2,temp2);
1564 emit_adds64(temp2,temp2,temp2);
1565 handler_jump=out;
1566 emit_jc(0);
1567 switch(type) {
1568 case STOREB_STUB: emit_strb_dualindexed(temp2,rs,rt); break;
1569 case STOREH_STUB: emit_strh_dualindexed(temp2,rs,rt); break;
1570 case STOREW_STUB: emit_str_dualindexed(temp2,rs,rt); break;
1571 default: assert(0);
1572 }
1573 if(regs_saved) {
1574 restore_jump=out;
1575 emit_jmp(0); // jump to reg restore
1576 }
1577 else
1578 emit_jmp(stubs[n].retaddr); // return address (invcode check)
1579 set_jump_target(handler_jump, out);
1580
1581 // TODO FIXME: regalloc should prefer callee-saved regs
1582 if(!regs_saved)
1583 save_regs(reglist);
1584 void *handler=NULL;
1585 switch(type) {
1586 case STOREB_STUB: handler=jump_handler_write8; break;
1587 case STOREH_STUB: handler=jump_handler_write16; break;
1588 case STOREW_STUB: handler=jump_handler_write32; break;
3968e69e 1589 default: assert(0);
d1e4ebd9 1590 }
1591 assert(handler);
1592 pass_args(rs,rt);
1593 if(temp2!=3) {
1594 emit_mov64(temp2,3);
1595 host_tempreg_release();
1596 }
1597 int cc=get_reg(i_regmap,CCREG);
1598 if(cc<0)
1599 emit_loadreg(CCREG,2);
1600 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
1601 // returns new cycle_count
1602 emit_call(handler);
1603 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
1604 if(cc<0)
1605 emit_storereg(CCREG,2);
1606 if(restore_jump)
1607 set_jump_target(restore_jump, out);
1608 restore_regs(reglist);
1609 emit_jmp(stubs[n].retaddr);
be516ebe 1610}
1611
1612static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
1613{
687b4580 1614 int rs = get_reg(regmap,-1);
1615 int rt = get_reg(regmap,target);
1616 assert(rs >= 0);
1617 assert(rt >= 0);
1618 uintptr_t host_addr = 0;
1619 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1620 if (handler == NULL) {
d1e4ebd9 1621 if (addr != host_addr) {
1622 if (host_addr >= 0x100000000ull)
1623 abort(); // ROREG not implemented
687b4580 1624 emit_movimm_from(addr, rs, host_addr, rs);
d1e4ebd9 1625 }
1626 switch (type) {
687b4580 1627 case STOREB_STUB: emit_writebyte_indexed(rt, 0, rs); break;
1628 case STOREH_STUB: emit_writehword_indexed(rt, 0, rs); break;
1629 case STOREW_STUB: emit_writeword_indexed(rt, 0, rs); break;
1630 default: assert(0);
1631 }
1632 return;
1633 }
1634
1635 // call a memhandler
1636 save_regs(reglist);
687b4580 1637 emit_writeword(rs, &address); // some handlers still need it
d1e4ebd9 1638 loadstore_extend(type, rt, 0);
1639 int cc, cc_use;
1640 cc = cc_use = get_reg(regmap, CCREG);
1641 if (cc < 0)
1642 emit_loadreg(CCREG, (cc_use = 2));
1643 emit_addimm(cc_use, CLOCK_ADJUST(adj+1), 2);
1644
1645 emit_call(do_memhandler_pre);
687b4580 1646 emit_call(handler);
d1e4ebd9 1647 emit_call(do_memhandler_post);
1648 emit_addimm(0, -CLOCK_ADJUST(adj+1), cc_use);
1649 if (cc < 0)
1650 emit_storereg(CCREG, cc_use);
687b4580 1651 restore_regs(reglist);
be516ebe 1652}
1653
3968e69e 1654static int verify_code_arm64(const void *source, const void *copy, u_int size)
be516ebe 1655{
3968e69e 1656 int ret = memcmp(source, copy, size);
1657 //printf("%s %p,%#x = %d\n", __func__, source, size, ret);
1658 return ret;
1659}
1660
1661// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
1662static void do_dirty_stub_base(u_int vaddr)
1663{
1664 assert(slen <= MAXBLOCK);
1665 emit_loadlp_ofs(0, 0); // ldr x1, source
1666 emit_loadlp_ofs(0, 1); // ldr x2, copy
1667 emit_movz(slen*4, 2);
1668 emit_call(verify_code_arm64);
1669 void *jmp = out;
1670 emit_cbz(0, 0);
1671 emit_movz(vaddr & 0xffff, 0);
1672 emit_movk_lsl16(vaddr >> 16, 0);
1673 emit_call(get_addr);
1674 emit_jmpreg(0);
1675 set_jump_target(jmp, out);
1676}
1677
1678static void assert_dirty_stub(const u_int *ptr)
1679{
1680 assert((ptr[0] & 0xff00001f) == 0x58000000); // ldr x0, source
1681 assert((ptr[1] & 0xff00001f) == 0x58000001); // ldr x1, copy
1682 assert((ptr[2] & 0xffe0001f) == 0x52800002); // movz w2, #slen*4
1683 assert( ptr[8] == 0xd61f0000); // br x0
be516ebe 1684}
1685
d1e4ebd9 1686static void set_loadlp(u_int *loadl, void *lit)
be516ebe 1687{
d1e4ebd9 1688 uintptr_t ofs = (u_char *)lit - (u_char *)loadl;
1689 assert((*loadl & ~0x1f) == 0x58000000);
1690 assert((ofs & 3) == 0);
1691 assert(ofs < 0x100000);
1692 *loadl |= (ofs >> 2) << 5;
1693}
1694
d1e4ebd9 1695static void do_dirty_stub_emit_literals(u_int *loadlps)
1696{
1697 set_loadlp(&loadlps[0], out);
1698 output_w64((uintptr_t)source);
1699 set_loadlp(&loadlps[1], out);
1700 output_w64((uintptr_t)copy);
be516ebe 1701}
1702
d1e4ebd9 1703static void *do_dirty_stub(int i)
be516ebe 1704{
1705 assem_debug("do_dirty_stub %x\n",start+i*4);
d1e4ebd9 1706 u_int *loadlps = (void *)out;
3968e69e 1707 do_dirty_stub_base(start + i*4);
d1e4ebd9 1708 void *entry = out;
be516ebe 1709 load_regs_entry(i);
d1e4ebd9 1710 if (entry == out)
1711 entry = instr_addr[i];
1712 emit_jmp(instr_addr[i]);
1713 do_dirty_stub_emit_literals(loadlps);
1714 return entry;
be516ebe 1715}
1716
3968e69e 1717static void do_dirty_stub_ds(void)
be516ebe 1718{
d1e4ebd9 1719 u_int *loadlps = (void *)out;
3968e69e 1720 do_dirty_stub_base(start + 1);
1721 void *lit_jumpover = out;
d1e4ebd9 1722 emit_jmp(out + 8*2);
1723 do_dirty_stub_emit_literals(loadlps);
3968e69e 1724 set_jump_target(lit_jumpover, out);
be516ebe 1725}
1726
3968e69e 1727static uint64_t get_from_ldr_literal(const u_int *i)
1728{
1729 signed int ofs;
1730 assert((i[0] & 0xff000000) == 0x58000000);
1731 ofs = i[0] << 8;
1732 ofs >>= 5+8;
1733 return *(uint64_t *)(i + ofs);
1734}
be516ebe 1735
3968e69e 1736static uint64_t get_from_movz(const u_int *i)
1737{
1738 assert((i[0] & 0x7fe00000) == 0x52800000);
1739 return (i[0] >> 5) & 0xffff;
1740}
be516ebe 1741
3968e69e 1742// Find the "clean" entry point from a "dirty" entry point
1743// by skipping past the call to verify_code
1744static void *get_clean_addr(u_int *addr)
be516ebe 1745{
3968e69e 1746 assert_dirty_stub(addr);
1747 return addr + 9;
be516ebe 1748}
be516ebe 1749
3968e69e 1750static int verify_dirty(const u_int *ptr)
be516ebe 1751{
3968e69e 1752 const void *source, *copy;
1753 u_int len;
1754 assert_dirty_stub(ptr);
1755 source = (void *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1756 copy = (void *)get_from_ldr_literal(&ptr[1]); // ldr x1, copy
1757 len = get_from_movz(&ptr[2]); // movz w3, #slen*4
1758 return !memcmp(source, copy, len);
1759}
1760
1761static int isclean(void *addr)
1762{
1763 const u_int *ptr = addr;
1764 if ((*ptr >> 24) == 0x58) { // the only place ldr (literal) is used
1765 assert_dirty_stub(ptr);
1766 return 0;
1767 }
1768 return 1;
1769}
1770
1771// get source that block at addr was compiled from (host pointers)
1772static void get_bounds(void *addr, u_char **start, u_char **end)
1773{
1774 const u_int *ptr = addr;
1775 assert_dirty_stub(ptr);
1776 *start = (u_char *)get_from_ldr_literal(&ptr[0]); // ldr x1, source
1777 *end = *start + get_from_movz(&ptr[2]); // movz w3, #slen*4
1778}
1779
1780/* Special assem */
1781
1782static void c2op_prologue(u_int op,u_int reglist)
1783{
1784 save_load_regs_all(1, reglist);
1785#ifdef PCNT
1786 emit_movimm(op, 0);
1787 emit_call(pcnt_gte_start);
1788#endif
1789 // pointer to cop2 regs
1790 emit_addimm64(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0);
1791}
1792
1793static void c2op_epilogue(u_int op,u_int reglist)
1794{
1795#ifdef PCNT
1796 emit_movimm(op, 0);
1797 emit_call(pcnt_gte_end);
1798#endif
1799 save_load_regs_all(0, reglist);
be516ebe 1800}
1801
1802static void c2op_assemble(int i,struct regstat *i_regs)
1803{
3968e69e 1804 u_int c2op=source[i]&0x3f;
1805 u_int hr,reglist_full=0,reglist;
1806 int need_flags,need_ir;
1807 for(hr=0;hr<HOST_REGS;hr++) {
1808 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
1809 }
1810 reglist=reglist_full&CALLER_SAVE_REGS;
1811
1812 if (gte_handlers[c2op]!=NULL) {
1813 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
1814 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
1815 assem_debug("gte op %08x, unneeded %016lx, need_flags %d, need_ir %d\n",
1816 source[i],gte_unneeded[i+1],need_flags,need_ir);
1817 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
1818 need_flags=0;
1819 //int shift = (source[i] >> 19) & 1;
1820 //int lm = (source[i] >> 10) & 1;
1821 switch(c2op) {
1822 default:
1823 (void)need_ir;
1824 c2op_prologue(c2op,reglist);
1825 emit_movimm(source[i],1); // opcode
1826 emit_writeword(1,&psxRegs.code);
1827 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
1828 break;
1829 }
1830 c2op_epilogue(c2op,reglist);
1831 }
1832}
1833
1834static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
1835{
1836 //value = value & 0x7ffff000;
1837 //if (value & 0x7f87e000) value |= 0x80000000;
1838 emit_andimm(sl, 0x7fffe000, temp);
1839 emit_testimm(temp, 0xff87ffff);
1840 emit_andimm(sl, 0x7ffff000, temp);
1841 host_tempreg_acquire();
1842 emit_orimm(temp, 0x80000000, HOST_TEMPREG);
1843 emit_cmovne_reg(HOST_TEMPREG, temp);
1844 host_tempreg_release();
1845 assert(0); // testing needed
1846}
1847
1848static void do_mfc2_31_one(u_int copr,signed char temp)
1849{
1850 emit_readshword(&reg_cop2d[copr],temp);
1851 emit_bicsar_imm(temp,31,temp);
1852 emit_cmpimm(temp,0xf80);
1853 emit_csinvle_reg(temp,WZR,temp); // if (temp > 0xf80) temp = ~0;
1854 emit_andimm(temp,0xf80,temp);
1855}
1856
1857static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
1858{
1859 if (temp < 0) {
1860 host_tempreg_acquire();
1861 temp = HOST_TEMPREG;
1862 }
1863 do_mfc2_31_one(9,temp);
1864 emit_shrimm(temp,7,tl);
1865 do_mfc2_31_one(10,temp);
1866 emit_orrshr_imm(temp,2,tl);
1867 do_mfc2_31_one(11,temp);
1868 emit_orrshl_imm(temp,3,tl);
1869 emit_writeword(tl,&reg_cop2d[29]);
1870
1871 if (temp == HOST_TEMPREG)
1872 host_tempreg_release();
be516ebe 1873}
1874
1875static void multdiv_assemble_arm64(int i,struct regstat *i_regs)
1876{
3968e69e 1877 // case 0x18: MULT
1878 // case 0x19: MULTU
1879 // case 0x1A: DIV
1880 // case 0x1B: DIVU
1881 if(rs1[i]&&rs2[i])
1882 {
1883 switch(opcode2[i])
1884 {
1885 case 0x18: // MULT
1886 case 0x19: // MULTU
1887 {
1888 signed char m1=get_reg(i_regs->regmap,rs1[i]);
1889 signed char m2=get_reg(i_regs->regmap,rs2[i]);
1890 signed char hi=get_reg(i_regs->regmap,HIREG);
1891 signed char lo=get_reg(i_regs->regmap,LOREG);
1892 assert(m1>=0);
1893 assert(m2>=0);
1894 assert(hi>=0);
1895 assert(lo>=0);
1896
1897 if(opcode2[i]==0x18) // MULT
1898 emit_smull(m1,m2,hi);
1899 else // MULTU
1900 emit_umull(m1,m2,hi);
1901
1902 emit_mov(hi,lo);
1903 emit_shrimm64(hi,32,hi);
1904 break;
1905 }
1906 case 0x1A: // DIV
1907 case 0x1B: // DIVU
1908 {
1909 signed char numerator=get_reg(i_regs->regmap,rs1[i]);
1910 signed char denominator=get_reg(i_regs->regmap,rs2[i]);
1911 signed char quotient=get_reg(i_regs->regmap,LOREG);
1912 signed char remainder=get_reg(i_regs->regmap,HIREG);
1913 assert(numerator>=0);
1914 assert(denominator>=0);
1915 assert(quotient>=0);
1916 assert(remainder>=0);
1917
1918 if (opcode2[i] == 0x1A) // DIV
1919 emit_sdiv(numerator,denominator,quotient);
1920 else // DIVU
1921 emit_udiv(numerator,denominator,quotient);
1922 emit_msub(quotient,denominator,numerator,remainder);
1923
1924 // div 0 quotient (remainder is already correct)
1925 host_tempreg_acquire();
1926 if (opcode2[i] == 0x1A) // DIV
1927 emit_sub_asrimm(0,numerator,31,HOST_TEMPREG);
1928 else
1929 emit_movimm(~0,HOST_TEMPREG);
1930 emit_test(denominator,denominator);
1931 emit_cmoveq_reg(HOST_TEMPREG,quotient);
1932 host_tempreg_release();
1933 break;
1934 }
1935 default:
1936 assert(0);
1937 }
1938 }
1939 else
1940 {
1941 signed char hr=get_reg(i_regs->regmap,HIREG);
1942 signed char lr=get_reg(i_regs->regmap,LOREG);
1943 if ((opcode2[i]==0x1A || opcode2[i]==0x1B) && rs2[i]==0) // div 0
1944 {
1945 if (rs1[i]) {
1946 signed char numerator = get_reg(i_regs->regmap, rs1[i]);
1947 assert(numerator >= 0);
1948 if (hr >= 0)
1949 emit_mov(numerator,hr);
1950 if (lr >= 0) {
1951 if (opcode2[i] == 0x1A) // DIV
1952 emit_sub_asrimm(0,numerator,31,lr);
1953 else
1954 emit_movimm(~0,lr);
1955 }
1956 }
1957 else {
1958 if (hr >= 0) emit_zeroreg(hr);
1959 if (lr >= 0) emit_movimm(~0,lr);
1960 }
1961 }
1962 else
1963 {
1964 // Multiply by zero is zero.
1965 if (hr >= 0) emit_zeroreg(hr);
1966 if (lr >= 0) emit_zeroreg(lr);
1967 }
1968 }
be516ebe 1969}
1970#define multdiv_assemble multdiv_assemble_arm64
1971
d1e4ebd9 1972static void do_jump_vaddr(u_int rs)
1973{
1974 if (rs != 0)
1975 emit_mov(rs, 0);
1976 emit_call(get_addr_ht);
1977 emit_jmpreg(0);
1978}
1979
be516ebe 1980static void do_preload_rhash(u_int r) {
1981 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
1982 // register. On ARM the hash can be done with a single instruction (below)
1983}
1984
1985static void do_preload_rhtbl(u_int ht) {
d1e4ebd9 1986 emit_addimm64(FP, (u_char *)&mini_ht - (u_char *)&dynarec_local, ht);
be516ebe 1987}
1988
1989static void do_rhash(u_int rs,u_int rh) {
1990 emit_andimm(rs, 0xf8, rh);
1991}
1992
d1e4ebd9 1993static void do_miniht_load(int ht, u_int rh) {
1994 emit_add64(ht, rh, ht);
1995 emit_ldst(0, 0, rh, ht, 0);
be516ebe 1996}
1997
d1e4ebd9 1998static void do_miniht_jump(u_int rs, u_int rh, u_int ht) {
1999 emit_cmp(rh, rs);
2000 void *jaddr = out;
2001 emit_jeq(0);
2002 do_jump_vaddr(rs);
2003
2004 set_jump_target(jaddr, out);
2005 assem_debug("ldr %s,[%s,#8]\n",regname64[ht], regname64[ht]);
2006 output_w32(0xf9400000 | imm12_rn_rd(8 >> 3, ht, ht));
2007 emit_jmpreg(ht);
be516ebe 2008}
2009
d1e4ebd9 2010// parsed by set_jump_target?
be516ebe 2011static void do_miniht_insert(u_int return_address,u_int rt,int temp) {
d1e4ebd9 2012 emit_movz_lsl16((return_address>>16)&0xffff,rt);
2013 emit_movk(return_address&0xffff,rt);
2014 add_to_linker(out,return_address,1);
2015 emit_adr(out,temp);
2016 emit_writedword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
2017 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
be516ebe 2018}
2019
2020static void mark_clear_cache(void *target)
2021{
2022 u_long offset = (u_char *)target - translation_cache;
2023 u_int mask = 1u << ((offset >> 12) & 31);
2024 if (!(needs_clear_cache[offset >> 17] & mask)) {
2025 char *start = (char *)((u_long)target & ~4095ul);
2026 start_tcache_write(start, start + 4096);
2027 needs_clear_cache[offset >> 17] |= mask;
2028 }
2029}
2030
2031// Clearing the cache is rather slow on ARM Linux, so mark the areas
2032// that need to be cleared, and then only clear these areas once.
2033static void do_clear_cache()
2034{
2035 int i,j;
2036 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2037 {
2038 u_int bitmap=needs_clear_cache[i];
2039 if(bitmap) {
2040 u_char *start, *end;
2041 for(j=0;j<32;j++)
2042 {
2043 if(bitmap&(1<<j)) {
2044 start=translation_cache+i*131072+j*4096;
2045 end=start+4095;
2046 j++;
2047 while(j<32) {
2048 if(bitmap&(1<<j)) {
2049 end+=4096;
2050 j++;
2051 }else{
2052 end_tcache_write(start, end);
2053 break;
2054 }
2055 }
2056 }
2057 }
2058 needs_clear_cache[i]=0;
2059 }
2060 }
2061}
2062
2063// CPU-architecture-specific initialization
2064static void arch_init() {
2065}
2066
2067// vim:shiftwidth=2:expandtab