frontend: update libpicofe, fix missed callbacks
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
de0ed23d 22#include <strings.h> // ffs
6c0eefaf 23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
555d3b51 31#ifdef TC_WRITE_OFFSET
32#error "not implemented"
33#endif
34
dd114d7d 35#ifdef DRC_DBG
36#pragma GCC diagnostic ignored "-Wunused-function"
37#pragma GCC diagnostic ignored "-Wunused-variable"
38#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
39#endif
40
57871462 41void indirect_jump_indexed();
42void indirect_jump();
43void do_interrupt();
44void jump_vaddr_r0();
45void jump_vaddr_r1();
46void jump_vaddr_r2();
47void jump_vaddr_r3();
48void jump_vaddr_r4();
49void jump_vaddr_r5();
50void jump_vaddr_r6();
51void jump_vaddr_r7();
52void jump_vaddr_r8();
53void jump_vaddr_r9();
54void jump_vaddr_r10();
55void jump_vaddr_r12();
56
b14b6a8f 57void * const jump_vaddr_reg[16] = {
58 jump_vaddr_r0,
59 jump_vaddr_r1,
60 jump_vaddr_r2,
61 jump_vaddr_r3,
62 jump_vaddr_r4,
63 jump_vaddr_r5,
64 jump_vaddr_r6,
65 jump_vaddr_r7,
66 jump_vaddr_r8,
67 jump_vaddr_r9,
68 jump_vaddr_r10,
57871462 69 0,
b14b6a8f 70 jump_vaddr_r12,
57871462 71 0,
72 0,
b14b6a8f 73 0
74};
57871462 75
0bbd1454 76void invalidate_addr_r0();
77void invalidate_addr_r1();
78void invalidate_addr_r2();
79void invalidate_addr_r3();
80void invalidate_addr_r4();
81void invalidate_addr_r5();
82void invalidate_addr_r6();
83void invalidate_addr_r7();
84void invalidate_addr_r8();
85void invalidate_addr_r9();
86void invalidate_addr_r10();
87void invalidate_addr_r12();
88
14c9acee 89const void *invalidate_addr_reg[16] = {
90 invalidate_addr_r0,
91 invalidate_addr_r1,
92 invalidate_addr_r2,
93 invalidate_addr_r3,
94 invalidate_addr_r4,
95 invalidate_addr_r5,
96 invalidate_addr_r6,
97 invalidate_addr_r7,
98 invalidate_addr_r8,
99 invalidate_addr_r9,
100 invalidate_addr_r10,
0bbd1454 101 0,
14c9acee 102 invalidate_addr_r12,
0bbd1454 103 0,
104 0,
14c9acee 105 0
106};
0bbd1454 107
57871462 108/* Linker */
109
555d3b51 110static void set_jump_target_far1(u_int *insn, void *target)
111{
112 u_int ni = *insn & 0xff000000;
113 ni |= (((u_int)target - (u_int)insn - 8u) << 6) >> 8;
114 assert((ni & 0x0e000000) == 0x0a000000);
115 *insn = ni;
116}
117
df4dc2b1 118static void set_jump_target(void *addr, void *target_)
57871462 119{
555d3b51 120 const u_int target = (u_int)target_;
121 const u_char *ptr = addr;
122 u_int *ptr2 = (u_int *)ptr;
57871462 123 if(ptr[3]==0xe2) {
124 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 125 assert(((uintptr_t)addr&3)==0);
57871462 126 assert((target&3)==0);
127 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 128 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 129 }
130 else if(ptr[3]==0x72) {
131 // generated by emit_jno_unlikely
132 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 133 assert(((uintptr_t)addr&3)==0);
57871462 134 assert((target&3)==0);
135 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
136 }
137 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 138 assert(((uintptr_t)addr&3)==0);
57871462 139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
141 }
142 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144 else {
555d3b51 145 set_jump_target_far1(ptr2, target_);
57871462 146 }
147}
148
149// This optionally copies the instruction from the target of the branch into
150// the space before the branch. Works, but the difference in speed is
151// usually insignificant.
e2b5e7aa 152#if 0
153static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 154{
155 u_char *ptr=(u_char *)addr;
156 u_int *ptr2=(u_int *)ptr;
157 assert(!copy||ptr2[-1]==0xe28dd000);
158 if(ptr[3]==0xe2) {
159 assert(!copy);
160 assert((target-(u_int)ptr2-8)<4096);
161 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
162 }
163 else {
164 assert((ptr[3]&0x0e)==0xa);
165 u_int target_insn=*(u_int *)target;
166 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
167 copy=0;
168 }
169 if((target_insn&0x0c100000)==0x04100000) { // Load
170 copy=0;
171 }
172 if(target_insn&0x08000000) {
173 copy=0;
174 }
175 if(copy) {
176 ptr2[-1]=target_insn;
177 target+=4;
178 }
179 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
180 }
181}
e2b5e7aa 182#endif
57871462 183
184/* Literal pool */
e2b5e7aa 185static void add_literal(int addr,int val)
57871462 186{
15776b68 187 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 188 literals[literalcount][0]=addr;
189 literals[literalcount][1]=val;
9f51b4b9 190 literalcount++;
191}
57871462 192
d148d265 193// from a pointer to external jump stub (which was produced by emit_extjump2)
194// find where the jumping insn is
195static void *find_extjump_insn(void *stub)
57871462 196{
197 int *ptr=(int *)(stub+4);
d148d265 198 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 199 u_int offset=*ptr&0xfff;
d148d265 200 void **l_ptr=(void *)ptr+offset+8;
201 return *l_ptr;
57871462 202}
203
57871462 204// Allocate a specific ARM register.
e2b5e7aa 205static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 206{
207 int n;
f776eb14 208 int dirty=0;
9f51b4b9 209
57871462 210 // see if it's already allocated (and dealloc it)
211 for(n=0;n<HOST_REGS;n++)
212 {
f776eb14 213 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
214 dirty=(cur->dirty>>n)&1;
215 cur->regmap[n]=-1;
216 }
57871462 217 }
9f51b4b9 218
90f98e7c 219 assert(n == hr || cur->regmap[hr] < 0 || !((cur->noevict >> hr) & 1));
220 cur->regmap[hr] = reg;
221 cur->dirty &= ~(1 << hr);
222 cur->dirty |= dirty << hr;
223 cur->isconst &= ~(1u << hr);
224 cur->noevict |= 1u << hr;
57871462 225}
226
227// Alloc cycle count into dedicated register
90f98e7c 228static void alloc_cc(struct regstat *cur, int i)
57871462 229{
90f98e7c 230 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
231}
232
233static void alloc_cc_optional(struct regstat *cur, int i)
234{
235 if (cur->regmap[HOST_CCREG] < 0) {
236 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
237 cur->noevict &= ~(1u << HOST_CCREG);
238 }
57871462 239}
240
57871462 241/* Assembler */
242
0b1633d7 243static attr_unused char regname[16][4] = {
57871462 244 "r0",
245 "r1",
246 "r2",
247 "r3",
248 "r4",
249 "r5",
250 "r6",
251 "r7",
252 "r8",
253 "r9",
254 "r10",
255 "fp",
256 "r12",
257 "sp",
258 "lr",
259 "pc"};
260
e2b5e7aa 261static void output_w32(u_int word)
57871462 262{
263 *((u_int *)out)=word;
264 out+=4;
265}
e2b5e7aa 266
267static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 268{
269 assert(rd<16);
270 assert(rn<16);
271 assert(rm<16);
272 return((rn<<16)|(rd<<12)|rm);
273}
e2b5e7aa 274
275static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 276{
277 assert(rd<16);
278 assert(rn<16);
279 assert(imm<256);
280 assert((shift&1)==0);
281 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
282}
e2b5e7aa 283
284static u_int genimm(u_int imm,u_int *encoded)
57871462 285{
c2e3bd42 286 *encoded=0;
287 if(imm==0) return 1;
57871462 288 int i=32;
289 while(i>0)
290 {
291 if(imm<256) {
292 *encoded=((i&30)<<7)|imm;
293 return 1;
294 }
295 imm=(imm>>2)|(imm<<30);i-=2;
296 }
297 return 0;
298}
e2b5e7aa 299
300static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 301{
302 u_int ret=genimm(imm,encoded);
303 assert(ret);
581335b0 304 (void)ret;
cfbd3c6e 305}
e2b5e7aa 306
307static u_int genjmp(u_int addr)
57871462 308{
7c3a5182 309 if (addr < 3) return 0; // a branch that will be patched later
310 int offset = addr-(int)out-8;
311 if (offset < -33554432 || offset >= 33554432) {
312 SysPrintf("genjmp: out of range: %08x\n", offset);
313 abort();
e80343e2 314 return 0;
315 }
57871462 316 return ((u_int)offset>>2)&0xffffff;
317}
318
0b1633d7 319static attr_unused void emit_breakpoint(void)
d1e4ebd9 320{
321 assem_debug("bkpt #0\n");
322 //output_w32(0xe1200070);
323 output_w32(0xe7f001f0);
324}
325
e2b5e7aa 326static void emit_mov(int rs,int rt)
57871462 327{
328 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
329 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
330}
331
e2b5e7aa 332static void emit_movs(int rs,int rt)
57871462 333{
334 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
335 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
336}
337
e2b5e7aa 338static void emit_add(int rs1,int rs2,int rt)
57871462 339{
340 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
341 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
342}
343
39b71d9a 344static void emit_adds(int rs1,int rs2,int rt)
345{
346 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
347 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
348}
349#define emit_adds_ptr emit_adds
350
e2b5e7aa 351static void emit_adcs(int rs1,int rs2,int rt)
57871462 352{
353 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
354 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
355}
356
e2b5e7aa 357static void emit_neg(int rs, int rt)
57871462 358{
359 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
360 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
361}
362
a5cd72d0 363static void emit_negs(int rs, int rt)
364{
365 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
366 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
367}
368
e2b5e7aa 369static void emit_sub(int rs1,int rs2,int rt)
57871462 370{
371 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
372 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
373}
374
a5cd72d0 375static void emit_subs(int rs1,int rs2,int rt)
376{
377 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
378 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
379}
380
e2b5e7aa 381static void emit_zeroreg(int rt)
57871462 382{
383 assem_debug("mov %s,#0\n",regname[rt]);
384 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
385}
386
e2b5e7aa 387static void emit_loadlp(u_int imm,u_int rt)
790ee18e 388{
389 add_literal((int)out,imm);
390 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
391 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
392}
e2b5e7aa 393
33788798 394#ifdef HAVE_ARMV7
e2b5e7aa 395static void emit_movw(u_int imm,u_int rt)
790ee18e 396{
397 assert(imm<65536);
398 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
399 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
400}
e2b5e7aa 401
402static void emit_movt(u_int imm,u_int rt)
790ee18e 403{
404 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
405 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
406}
33788798 407#endif
e2b5e7aa 408
409static void emit_movimm(u_int imm,u_int rt)
790ee18e 410{
411 u_int armval;
412 if(genimm(imm,&armval)) {
413 assem_debug("mov %s,#%d\n",regname[rt],imm);
414 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
415 }else if(genimm(~imm,&armval)) {
416 assem_debug("mvn %s,#%d\n",regname[rt],imm);
417 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
418 }else if(imm<65536) {
665f33e1 419 #ifndef HAVE_ARMV7
790ee18e 420 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
421 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
422 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
423 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
424 #else
425 emit_movw(imm,rt);
426 #endif
427 }else{
665f33e1 428 #ifndef HAVE_ARMV7
790ee18e 429 emit_loadlp(imm,rt);
430 #else
431 emit_movw(imm&0x0000FFFF,rt);
432 emit_movt(imm&0xFFFF0000,rt);
433 #endif
434 }
435}
e2b5e7aa 436
437static void emit_pcreladdr(u_int rt)
790ee18e 438{
439 assem_debug("add %s,pc,#?\n",regname[rt]);
440 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
441}
442
e2b5e7aa 443static void emit_loadreg(int r, int hr)
57871462 444{
53358c1d 445 assert(hr != EXCLUDE_REG);
446 if (r == 0)
57871462 447 emit_zeroreg(hr);
448 else {
33788798 449 void *addr;
7c3a5182 450 switch (r) {
451 //case HIREG: addr = &hi; break;
452 //case LOREG: addr = &lo; break;
33788798 453 case CCREG: addr = &cycle_count; break;
33788798 454 case INVCP: addr = &invc_ptr; break;
455 case ROREG: addr = &ram_offset; break;
456 default:
457 assert(r < 34);
458 addr = &psxRegs.GPR.r[r];
459 break;
7c3a5182 460 }
33788798 461 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 462 assert(offset<4096);
6cc8d23c 463 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 464 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
465 }
466}
e2b5e7aa 467
468static void emit_storereg(int r, int hr)
57871462 469{
53358c1d 470 assert(hr != EXCLUDE_REG);
a7864494 471 void *addr;
7c3a5182 472 switch (r) {
473 //case HIREG: addr = &hi; break;
474 //case LOREG: addr = &lo; break;
a7864494 475 case CCREG: addr = &cycle_count; break;
476 default: assert(r < 34u); addr = &psxRegs.GPR.r[r]; break;
7c3a5182 477 }
a7864494 478 uintptr_t offset = (char *)addr - (char *)&dynarec_local;
479 assert(offset < 4096u);
6cc8d23c 480 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 481 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
482}
483
e2b5e7aa 484static void emit_test(int rs, int rt)
57871462 485{
486 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
487 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
488}
489
e2b5e7aa 490static void emit_testimm(int rs,int imm)
57871462 491{
492 u_int armval;
5a05d80c 493 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 494 genimm_checked(imm,&armval);
57871462 495 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
496}
497
e2b5e7aa 498static void emit_testeqimm(int rs,int imm)
b9b61529 499{
500 u_int armval;
501 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 502 genimm_checked(imm,&armval);
b9b61529 503 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
504}
505
e2b5e7aa 506static void emit_not(int rs,int rt)
57871462 507{
508 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
509 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
510}
511
a5cd72d0 512static void emit_mvneq(int rs,int rt)
513{
514 assem_debug("mvneq %s,%s\n",regname[rt],regname[rs]);
515 output_w32(0x01e00000|rd_rn_rm(rt,0,rs));
516}
517
e2b5e7aa 518static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 519{
520 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
521 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
522}
523
e2b5e7aa 524static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 525{
526 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
527 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
528}
e2b5e7aa 529
e2b5e7aa 530static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 531{
532 assert(rs<16);
533 assert(rt<16);
534 assert(imm<32);
535 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
536 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
537}
538
e2b5e7aa 539static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 540{
541 assert(rs<16);
542 assert(rt<16);
543 assert(imm<32);
544 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
545 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
546}
547
e2b5e7aa 548static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 549{
550 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
551 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
552}
553
3968e69e 554static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
555{
556 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
557 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
558}
559
e2b5e7aa 560static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 561{
562 assert(rs<16);
563 assert(rt<16);
564 if(imm!=0) {
57871462 565 u_int armval;
566 if(genimm(imm,&armval)) {
567 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
568 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
569 }else if(genimm(-imm,&armval)) {
8a0a8423 570 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 571 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 572 #ifdef HAVE_ARMV7
573 }else if(rt!=rs&&(u_int)imm<65536) {
574 emit_movw(imm&0x0000ffff,rt);
575 emit_add(rs,rt,rt);
576 }else if(rt!=rs&&(u_int)-imm<65536) {
577 emit_movw(-imm&0x0000ffff,rt);
578 emit_sub(rs,rt,rt);
579 #endif
580 }else if((u_int)-imm<65536) {
57871462 581 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
582 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
583 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
584 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 585 }else {
586 do {
587 int shift = (ffs(imm) - 1) & ~1;
588 int imm8 = imm & (0xff << shift);
589 genimm_checked(imm8,&armval);
590 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
591 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
592 rs = rt;
593 imm &= ~imm8;
594 }
595 while (imm != 0);
57871462 596 }
597 }
598 else if(rs!=rt) emit_mov(rs,rt);
599}
600
bc7c5acb 601static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
602{
603 emit_addimm(rs, imm, rt);
604}
605
a5cd72d0 606static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
57871462 607{
608 assert(imm>-65536&&imm<65536);
609 u_int armval;
a5cd72d0 610 if (genimm(imm, &armval)) {
611 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rs],imm);
612 output_w32(0xe2900000|rd_rn_rm(rt,rs,0)|armval);
613 } else if (genimm(-imm, &armval)) {
614 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rs],imm);
615 output_w32(0xe2500000|rd_rn_rm(rt,rs,0)|armval);
616 } else if (rs != rt) {
617 emit_movimm(imm, rt);
618 emit_adds(rs, rt, rt);
619 } else if (imm < 0) {
620 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
57871462 621 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
a5cd72d0 622 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
57871462 623 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
a5cd72d0 624 } else {
625 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
57871462 626 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
a5cd72d0 627 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
57871462 628 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
629 }
630}
e2b5e7aa 631
a5cd72d0 632static void emit_addimm_and_set_flags(int imm, u_int rt)
633{
634 emit_addimm_and_set_flags3(rt, imm, rt);
635}
636
e2b5e7aa 637static void emit_addnop(u_int r)
57871462 638{
639 assert(r<16);
640 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
641 output_w32(0xe2800000|rd_rn_rm(r,r,0));
642}
643
e2b5e7aa 644static void emit_andimm(int rs,int imm,int rt)
57871462 645{
646 u_int armval;
790ee18e 647 if(imm==0) {
648 emit_zeroreg(rt);
649 }else if(genimm(imm,&armval)) {
57871462 650 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
651 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
652 }else if(genimm(~imm,&armval)) {
653 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
654 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
655 }else if(imm==65535) {
332a4533 656 #ifndef HAVE_ARMV6
57871462 657 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
658 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
659 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
660 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
661 #else
662 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
663 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
664 #endif
665 }else{
666 assert(imm>0&&imm<65535);
665f33e1 667 #ifndef HAVE_ARMV7
57871462 668 assem_debug("mov r14,#%d\n",imm&0xFF00);
669 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
670 assem_debug("add r14,r14,#%d\n",imm&0xFF);
671 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
672 #else
673 emit_movw(imm,HOST_TEMPREG);
674 #endif
675 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
676 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
677 }
678}
679
e2b5e7aa 680static void emit_orimm(int rs,int imm,int rt)
57871462 681{
682 u_int armval;
790ee18e 683 if(imm==0) {
684 if(rs!=rt) emit_mov(rs,rt);
685 }else if(genimm(imm,&armval)) {
57871462 686 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
687 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
688 }else{
689 assert(imm>0&&imm<65536);
690 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
691 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
692 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
693 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
694 }
695}
696
e2b5e7aa 697static void emit_xorimm(int rs,int imm,int rt)
57871462 698{
57871462 699 u_int armval;
790ee18e 700 if(imm==0) {
701 if(rs!=rt) emit_mov(rs,rt);
702 }else if(genimm(imm,&armval)) {
57871462 703 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
704 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
705 }else{
514ed0d9 706 assert(imm>0&&imm<65536);
57871462 707 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
708 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
709 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
710 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
711 }
712}
713
e2b5e7aa 714static void emit_shlimm(int rs,u_int imm,int rt)
57871462 715{
716 assert(imm>0);
717 assert(imm<32);
718 //if(imm==1) ...
719 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
720 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
721}
722
e2b5e7aa 723static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 724{
725 assert(imm>0);
726 assert(imm<32);
727 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
728 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
729}
730
0b1633d7 731static attr_unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 732{
733 assert(imm>0);
734 assert(imm<32);
735 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
736 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
737}
738
e2b5e7aa 739static void emit_shrimm(int rs,u_int imm,int rt)
57871462 740{
741 assert(imm>0);
742 assert(imm<32);
743 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
744 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
745}
746
e2b5e7aa 747static void emit_sarimm(int rs,u_int imm,int rt)
57871462 748{
749 assert(imm>0);
750 assert(imm<32);
751 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
752 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
753}
754
e2b5e7aa 755static void emit_rorimm(int rs,u_int imm,int rt)
57871462 756{
757 assert(imm>0);
758 assert(imm<32);
759 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
760 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
761}
762
e2b5e7aa 763static void emit_signextend16(int rs,int rt)
b9b61529 764{
332a4533 765 #ifndef HAVE_ARMV6
b9b61529 766 emit_shlimm(rs,16,rt);
767 emit_sarimm(rt,16,rt);
768 #else
769 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
770 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
771 #endif
772}
773
e2b5e7aa 774static void emit_signextend8(int rs,int rt)
c6c3b1b3 775{
332a4533 776 #ifndef HAVE_ARMV6
c6c3b1b3 777 emit_shlimm(rs,24,rt);
778 emit_sarimm(rt,24,rt);
779 #else
780 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
781 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
782 #endif
783}
784
e2b5e7aa 785static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 786{
787 assert(rs<16);
788 assert(rt<16);
789 assert(shift<16);
790 //if(imm==1) ...
791 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
792 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
793}
e2b5e7aa 794
795static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 796{
797 assert(rs<16);
798 assert(rt<16);
799 assert(shift<16);
800 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
801 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
802}
e2b5e7aa 803
804static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 805{
806 assert(rs<16);
807 assert(rt<16);
808 assert(shift<16);
809 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
810 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
811}
57871462 812
0b1633d7 813static attr_unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 814{
815 assert(rs<16);
816 assert(rt<16);
817 assert(shift<16);
818 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
819 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
820}
e2b5e7aa 821
0b1633d7 822static attr_unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 823{
824 assert(rs<16);
825 assert(rt<16);
826 assert(shift<16);
827 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
828 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
829}
830
e2b5e7aa 831static void emit_cmpimm(int rs,int imm)
57871462 832{
833 u_int armval;
834 if(genimm(imm,&armval)) {
5a05d80c 835 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 836 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
837 }else if(genimm(-imm,&armval)) {
5a05d80c 838 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 839 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
840 }else if(imm>0) {
841 assert(imm<65536);
57871462 842 emit_movimm(imm,HOST_TEMPREG);
57871462 843 assem_debug("cmp %s,r14\n",regname[rs]);
844 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
845 }else{
846 assert(imm>-65536);
57871462 847 emit_movimm(-imm,HOST_TEMPREG);
57871462 848 assem_debug("cmn %s,r14\n",regname[rs]);
849 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
850 }
851}
852
e2b5e7aa 853static void emit_cmovne_imm(int imm,int rt)
57871462 854{
855 assem_debug("movne %s,#%d\n",regname[rt],imm);
856 u_int armval;
cfbd3c6e 857 genimm_checked(imm,&armval);
57871462 858 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
859}
e2b5e7aa 860
861static void emit_cmovl_imm(int imm,int rt)
57871462 862{
863 assem_debug("movlt %s,#%d\n",regname[rt],imm);
864 u_int armval;
cfbd3c6e 865 genimm_checked(imm,&armval);
57871462 866 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
867}
e2b5e7aa 868
869static void emit_cmovb_imm(int imm,int rt)
57871462 870{
871 assem_debug("movcc %s,#%d\n",regname[rt],imm);
872 u_int armval;
cfbd3c6e 873 genimm_checked(imm,&armval);
57871462 874 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
875}
e2b5e7aa 876
3968e69e 877static void emit_cmovae_imm(int imm,int rt)
878{
879 assem_debug("movcs %s,#%d\n",regname[rt],imm);
880 u_int armval;
881 genimm_checked(imm,&armval);
882 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
883}
884
9c997d19 885static void emit_cmovs_imm(int imm,int rt)
886{
887 assem_debug("movmi %s,#%d\n",regname[rt],imm);
888 u_int armval;
889 genimm_checked(imm,&armval);
890 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
891}
892
0b1633d7 893static attr_unused void emit_cmovne_reg(int rs,int rt)
57871462 894{
895 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
896 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
897}
e2b5e7aa 898
899static void emit_cmovl_reg(int rs,int rt)
57871462 900{
901 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
902 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
903}
e2b5e7aa 904
e3c6bdb5 905static void emit_cmovb_reg(int rs,int rt)
906{
907 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
908 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
909}
910
e2b5e7aa 911static void emit_cmovs_reg(int rs,int rt)
57871462 912{
913 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
914 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
915}
916
e2b5e7aa 917static void emit_slti32(int rs,int imm,int rt)
57871462 918{
919 if(rs!=rt) emit_zeroreg(rt);
920 emit_cmpimm(rs,imm);
921 if(rs==rt) emit_movimm(0,rt);
922 emit_cmovl_imm(1,rt);
923}
e2b5e7aa 924
925static void emit_sltiu32(int rs,int imm,int rt)
57871462 926{
927 if(rs!=rt) emit_zeroreg(rt);
928 emit_cmpimm(rs,imm);
929 if(rs==rt) emit_movimm(0,rt);
930 emit_cmovb_imm(1,rt);
931}
e2b5e7aa 932
e2b5e7aa 933static void emit_cmp(int rs,int rt)
57871462 934{
935 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
936 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
937}
e2b5e7aa 938
882a08fc 939static void emit_cmpcs(int rs,int rt)
940{
941 assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]);
942 output_w32(0x21500000|rd_rn_rm(0,rs,rt));
943}
944
e2b5e7aa 945static void emit_set_gz32(int rs, int rt)
57871462 946{
947 //assem_debug("set_gz32\n");
948 emit_cmpimm(rs,1);
949 emit_movimm(1,rt);
950 emit_cmovl_imm(0,rt);
951}
e2b5e7aa 952
953static void emit_set_nz32(int rs, int rt)
57871462 954{
955 //assem_debug("set_nz32\n");
956 if(rs!=rt) emit_movs(rs,rt);
957 else emit_test(rs,rs);
958 emit_cmovne_imm(1,rt);
959}
e2b5e7aa 960
e2b5e7aa 961static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 962{
963 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
964 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
965 emit_cmp(rs1,rs2);
966 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
967 emit_cmovl_imm(1,rt);
968}
e2b5e7aa 969
970static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 971{
972 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
973 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
974 emit_cmp(rs1,rs2);
975 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
976 emit_cmovb_imm(1,rt);
977}
e2b5e7aa 978
2a014d73 979static int can_jump_or_call(const void *a)
980{
981 intptr_t offset = (u_char *)a - out - 8;
982 return (-33554432 <= offset && offset < 33554432);
983}
984
643aeae3 985static void emit_call(const void *a_)
57871462 986{
643aeae3 987 int a = (int)a_;
14c9acee 988 assem_debug("bl %p%s\n", log_addr(a), func_name(a_));
57871462 989 u_int offset=genjmp(a);
990 output_w32(0xeb000000|offset);
991}
e2b5e7aa 992
b14b6a8f 993static void emit_jmp(const void *a_)
57871462 994{
b14b6a8f 995 int a = (int)a_;
14c9acee 996 assem_debug("b %p%s\n", log_addr(a_), func_name(a_));
57871462 997 u_int offset=genjmp(a);
998 output_w32(0xea000000|offset);
999}
e2b5e7aa 1000
643aeae3 1001static void emit_jne(const void *a_)
57871462 1002{
643aeae3 1003 int a = (int)a_;
14c9acee 1004 assem_debug("bne %p\n", log_addr(a_));
57871462 1005 u_int offset=genjmp(a);
1006 output_w32(0x1a000000|offset);
1007}
e2b5e7aa 1008
7c3a5182 1009static void emit_jeq(const void *a_)
57871462 1010{
7c3a5182 1011 int a = (int)a_;
14c9acee 1012 assem_debug("beq %p\n", log_addr(a_));
57871462 1013 u_int offset=genjmp(a);
1014 output_w32(0x0a000000|offset);
1015}
e2b5e7aa 1016
7c3a5182 1017static void emit_js(const void *a_)
57871462 1018{
7c3a5182 1019 int a = (int)a_;
14c9acee 1020 assem_debug("bmi %p\n", log_addr(a_));
57871462 1021 u_int offset=genjmp(a);
1022 output_w32(0x4a000000|offset);
1023}
e2b5e7aa 1024
7c3a5182 1025static void emit_jns(const void *a_)
57871462 1026{
7c3a5182 1027 int a = (int)a_;
14c9acee 1028 assem_debug("bpl %p\n", log_addr(a_));
57871462 1029 u_int offset=genjmp(a);
1030 output_w32(0x5a000000|offset);
1031}
e2b5e7aa 1032
7c3a5182 1033static void emit_jl(const void *a_)
57871462 1034{
7c3a5182 1035 int a = (int)a_;
14c9acee 1036 assem_debug("blt %p\n", log_addr(a_));
57871462 1037 u_int offset=genjmp(a);
1038 output_w32(0xba000000|offset);
1039}
e2b5e7aa 1040
7c3a5182 1041static void emit_jge(const void *a_)
57871462 1042{
7c3a5182 1043 int a = (int)a_;
14c9acee 1044 assem_debug("bge %p\n", log_addr(a_));
57871462 1045 u_int offset=genjmp(a);
1046 output_w32(0xaa000000|offset);
1047}
e2b5e7aa 1048
a5cd72d0 1049static void emit_jo(const void *a_)
1050{
1051 int a = (int)a_;
14c9acee 1052 assem_debug("bvs %p\n", log_addr(a_));
a5cd72d0 1053 u_int offset=genjmp(a);
1054 output_w32(0x6a000000|offset);
1055}
1056
7c3a5182 1057static void emit_jno(const void *a_)
57871462 1058{
7c3a5182 1059 int a = (int)a_;
14c9acee 1060 assem_debug("bvc %p\n", log_addr(a_));
57871462 1061 u_int offset=genjmp(a);
1062 output_w32(0x7a000000|offset);
1063}
e2b5e7aa 1064
7c3a5182 1065static void emit_jc(const void *a_)
57871462 1066{
7c3a5182 1067 int a = (int)a_;
14c9acee 1068 assem_debug("bcs %p\n", log_addr(a_));
57871462 1069 u_int offset=genjmp(a);
1070 output_w32(0x2a000000|offset);
1071}
e2b5e7aa 1072
7c3a5182 1073static void emit_jcc(const void *a_)
57871462 1074{
b14b6a8f 1075 int a = (int)a_;
14c9acee 1076 assem_debug("bcc %p\n", log_addr(a_));
57871462 1077 u_int offset=genjmp(a);
1078 output_w32(0x3a000000|offset);
1079}
1080
9b495f6e 1081static void *emit_cbz(int rs, const void *a)
1082{
1083 void *ret;
1084 emit_test(rs, rs);
1085 ret = out;
1086 emit_jeq(a);
1087 return ret;
1088}
1089
0b1633d7 1090static attr_unused void emit_callreg(u_int r)
57871462 1091{
c6c3b1b3 1092 assert(r<15);
1093 assem_debug("blx %s\n",regname[r]);
1094 output_w32(0xe12fff30|r);
57871462 1095}
e2b5e7aa 1096
1097static void emit_jmpreg(u_int r)
57871462 1098{
1099 assem_debug("mov pc,%s\n",regname[r]);
1100 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1101}
1102
be516ebe 1103static void emit_ret(void)
1104{
1105 emit_jmpreg(14);
1106}
1107
e2b5e7aa 1108static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1109{
1110 assert(offset>-4096&&offset<4096);
1111 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1112 if(offset>=0) {
1113 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1114 }else{
1115 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1116 }
1117}
e2b5e7aa 1118
1119static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1120{
1121 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1122 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1123}
39b71d9a 1124#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1125
1126static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1127{
1128 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1130}
e2b5e7aa 1131
1132static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1133{
1134 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1135 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1136}
e2b5e7aa 1137
37387d8b 1138static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1139{
1140 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1141 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1142}
1143
e2b5e7aa 1144static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1145{
1146 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1147 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1148}
e2b5e7aa 1149
37387d8b 1150static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1151{
1152 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1153 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1154}
1155
e2b5e7aa 1156static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1157{
1158 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1159 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1160}
e2b5e7aa 1161
37387d8b 1162static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1163{
1164 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1165 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1166}
1167
e2b5e7aa 1168static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1169{
1170 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1172}
e2b5e7aa 1173
37387d8b 1174static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1175{
1176 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1178}
1179
e2b5e7aa 1180static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1181{
1182 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1183 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1184}
1185
1186static void emit_str_dualindexed(int rs1, int rs2, int rt)
1187{
1188 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1190}
1191
1192static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1193{
1194 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1196}
1197
1198static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1199{
1200 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1201 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1202}
e2b5e7aa 1203
e2b5e7aa 1204static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1205{
1206 assert(offset>-256&&offset<256);
1207 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1208 if(offset>=0) {
1209 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1210 }else{
1211 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1212 }
1213}
e2b5e7aa 1214
e2b5e7aa 1215static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1216{
1217 assert(offset>-256&&offset<256);
1218 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1219 if(offset>=0) {
1220 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1221 }else{
1222 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1223 }
1224}
e2b5e7aa 1225
1226static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1227{
1228 assert(offset>-4096&&offset<4096);
1229 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1230 if(offset>=0) {
1231 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1232 }else{
1233 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1234 }
1235}
e2b5e7aa 1236
e2b5e7aa 1237static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1238{
1239 assert(offset>-256&&offset<256);
1240 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1241 if(offset>=0) {
1242 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1243 }else{
1244 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1245 }
1246}
e2b5e7aa 1247
054175e9 1248static void emit_ldrd(int offset, int rs, int rt)
1249{
1250 assert(offset>-256&&offset<256);
1251 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1252 if(offset>=0) {
1253 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1254 }else{
1255 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1256 }
1257}
e2b5e7aa 1258
643aeae3 1259static void emit_readword(void *addr, int rt)
57871462 1260{
643aeae3 1261 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1262 assert(offset<4096);
a5cd72d0 1263 assem_debug("ldr %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset));
57871462 1264 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1265}
39b71d9a 1266#define emit_readptr emit_readword
e2b5e7aa 1267
e2b5e7aa 1268static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1269{
1270 assert(offset>-4096&&offset<4096);
1271 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1272 if(offset>=0) {
1273 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1274 }else{
1275 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1276 }
1277}
e2b5e7aa 1278
e2b5e7aa 1279static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1280{
1281 assert(offset>-256&&offset<256);
1282 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1283 if(offset>=0) {
1284 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1285 }else{
1286 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1287 }
1288}
e2b5e7aa 1289
1290static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1291{
1292 assert(offset>-4096&&offset<4096);
1293 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1294 if(offset>=0) {
1295 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1296 }else{
1297 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1298 }
1299}
e2b5e7aa 1300
e2b5e7aa 1301static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1302{
1303 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1304 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1305}
e2b5e7aa 1306
1307static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1308{
1309 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1310 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1311}
e2b5e7aa 1312
1313static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1314{
1315 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1316 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1317}
e2b5e7aa 1318
643aeae3 1319static void emit_writeword(int rt, void *addr)
57871462 1320{
643aeae3 1321 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1322 assert(offset<4096);
a5cd72d0 1323 assem_debug("str %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset));
57871462 1324 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1325}
e2b5e7aa 1326
e2b5e7aa 1327static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1328{
1329 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1330 assert(rs1<16);
1331 assert(rs2<16);
1332 assert(hi<16);
1333 assert(lo<16);
1334 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1335}
e2b5e7aa 1336
1337static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1338{
1339 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1340 assert(rs1<16);
1341 assert(rs2<16);
1342 assert(hi<16);
1343 assert(lo<16);
1344 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1345}
1346
e2b5e7aa 1347static void emit_clz(int rs,int rt)
57871462 1348{
1349 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1350 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1351}
1352
e2b5e7aa 1353static void emit_subcs(int rs1,int rs2,int rt)
57871462 1354{
1355 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1356 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1357}
1358
e2b5e7aa 1359static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1360{
1361 assert(imm>0);
1362 assert(imm<32);
1363 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1364 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1365}
1366
e2b5e7aa 1367static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1368{
1369 assert(imm>0);
1370 assert(imm<32);
1371 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1373}
1374
e2b5e7aa 1375static void emit_negmi(int rs, int rt)
57871462 1376{
1377 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1378 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1379}
1380
e2b5e7aa 1381static void emit_negsmi(int rs, int rt)
57871462 1382{
1383 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1384 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1385}
1386
e2b5e7aa 1387static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1388{
1389 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1390 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1391}
1392
e2b5e7aa 1393static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1394{
1395 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1396 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1397}
1398
e2b5e7aa 1399static void emit_teq(int rs, int rt)
57871462 1400{
1401 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1402 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1403}
1404
0b1633d7 1405static attr_unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1406{
1407 u_int armval;
cfbd3c6e 1408 genimm_checked(imm,&armval);
57871462 1409 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1410 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1411}
1412
57871462 1413// Conditionally select one of two immediates, optimizing for small code size
1414// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1415static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1416{
1417 u_int armval;
1418 if(genimm(imm2-imm1,&armval)) {
1419 emit_movimm(imm1,rt);
1420 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1421 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1422 }else if(genimm(imm1-imm2,&armval)) {
1423 emit_movimm(imm1,rt);
1424 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1425 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1426 }
1427 else {
665f33e1 1428 #ifndef HAVE_ARMV7
57871462 1429 emit_movimm(imm1,rt);
1430 add_literal((int)out,imm2);
1431 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1432 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1433 #else
1434 emit_movw(imm1&0x0000FFFF,rt);
1435 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1436 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1437 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1438 }
1439 emit_movt(imm1&0xFFFF0000,rt);
1440 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1441 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1442 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1443 }
1444 #endif
1445 }
1446}
1447
57871462 1448// special case for checking invalid_code
9b495f6e 1449static void emit_ldrb_indexedsr12_reg(int base, int r, int rt)
57871462 1450{
9b495f6e 1451 assem_debug("ldrb %s,%s,%s lsr #12\n",regname[rt],regname[base],regname[r]);
1452 output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620);
57871462 1453}
1454
14c9acee 1455static void emit_callne(const void *a_)
0bbd1454 1456{
14c9acee 1457 int a = (int)a_;
1458 assem_debug("blne %p\n", log_addr(a_));
0bbd1454 1459 u_int offset=genjmp(a);
1460 output_w32(0x1b000000|offset);
1461}
1462
57871462 1463// Used to preload hash table entries
0b1633d7 1464static attr_unused void emit_prefetchreg(int r)
57871462 1465{
1466 assem_debug("pld %s\n",regname[r]);
1467 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1468}
1469
1470// Special case for mini_ht
e2b5e7aa 1471static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1472{
1473 assert(offset<4096);
1474 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1475 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1476}
1477
e2b5e7aa 1478static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1479{
1480 u_int armval;
cfbd3c6e 1481 genimm_checked(imm,&armval);
b9b61529 1482 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1483 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1484}
1485
0b1633d7 1486static attr_unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1487{
1488 u_int armval;
1489 genimm_checked(imm,&armval);
1490 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1491 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1492}
1493
14c9acee 1494static void emit_jno_unlikely(void *a_)
57871462 1495{
14c9acee 1496 //emit_jno(a_);
1497 assert(a_ == NULL);
1498 assem_debug("addvc pc,pc,#? (%p)\n", /*a-(int)out-8,*/ log_addr(a_));
57871462 1499 output_w32(0x72800000|rd_rn_rm(15,15,0));
1500}
1501
054175e9 1502static void save_regs_all(u_int reglist)
57871462 1503{
054175e9 1504 int i;
57871462 1505 if(!reglist) return;
1506 assem_debug("stmia fp,{");
054175e9 1507 for(i=0;i<16;i++)
1508 if(reglist&(1<<i))
1509 assem_debug("r%d,",i);
57871462 1510 assem_debug("}\n");
1511 output_w32(0xe88b0000|reglist);
1512}
e2b5e7aa 1513
054175e9 1514static void restore_regs_all(u_int reglist)
57871462 1515{
054175e9 1516 int i;
57871462 1517 if(!reglist) return;
1518 assem_debug("ldmia fp,{");
054175e9 1519 for(i=0;i<16;i++)
1520 if(reglist&(1<<i))
1521 assem_debug("r%d,",i);
57871462 1522 assem_debug("}\n");
1523 output_w32(0xe89b0000|reglist);
1524}
e2b5e7aa 1525
054175e9 1526// Save registers before function call
1527static void save_regs(u_int reglist)
1528{
4d646738 1529 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1530 save_regs_all(reglist);
1531}
e2b5e7aa 1532
054175e9 1533// Restore registers after function call
1534static void restore_regs(u_int reglist)
1535{
4d646738 1536 reglist&=CALLER_SAVE_REGS;
054175e9 1537 restore_regs_all(reglist);
1538}
57871462 1539
57871462 1540/* Stubs/epilogue */
1541
e2b5e7aa 1542static void literal_pool(int n)
57871462 1543{
1544 if(!literalcount) return;
1545 if(n) {
1546 if((int)out-literals[0][0]<4096-n) return;
1547 }
1548 u_int *ptr;
1549 int i;
1550 for(i=0;i<literalcount;i++)
1551 {
77750690 1552 u_int l_addr=(u_int)out;
1553 int j;
1554 for(j=0;j<i;j++) {
1555 if(literals[j][1]==literals[i][1]) {
1556 //printf("dup %08x\n",literals[i][1]);
1557 l_addr=literals[j][0];
1558 break;
1559 }
1560 }
57871462 1561 ptr=(u_int *)literals[i][0];
77750690 1562 u_int offset=l_addr-(u_int)ptr-8;
57871462 1563 assert(offset<4096);
1564 assert(!(offset&3));
1565 *ptr|=offset;
77750690 1566 if(l_addr==(u_int)out) {
1567 literals[i][0]=l_addr; // remember for dupes
1568 output_w32(literals[i][1]);
1569 }
57871462 1570 }
1571 literalcount=0;
1572}
1573
e2b5e7aa 1574static void literal_pool_jumpover(int n)
57871462 1575{
1576 if(!literalcount) return;
1577 if(n) {
1578 if((int)out-literals[0][0]<4096-n) return;
1579 }
df4dc2b1 1580 void *jaddr = out;
57871462 1581 emit_jmp(0);
1582 literal_pool(0);
df4dc2b1 1583 set_jump_target(jaddr, out);
57871462 1584}
1585
555d3b51 1586// parsed by find_extjump_insn, check_extjump2
104df9d3 1587static void emit_extjump(u_char *addr, u_int target)
57871462 1588{
1589 u_char *ptr=(u_char *)addr;
1590 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1591 (void)ptr;
1592
57871462 1593 emit_loadlp(target,0);
643aeae3 1594 emit_loadlp((u_int)addr,1);
66ea165f 1595 assert(ndrc->translation_cache <= addr &&
1596 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
104df9d3 1597 emit_far_jump(dyna_linker);
57871462 1598}
1599
d1e4ebd9 1600static void check_extjump2(void *src)
1601{
1602 u_int *ptr = src;
1603 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1604 (void)ptr;
1605}
1606
13e35c04 1607// put rt_val into rt, potentially making use of rs with value rs_val
1608static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1609{
8575a877 1610 u_int armval;
1611 int diff;
1612 if(genimm(rt_val,&armval)) {
1613 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1614 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1615 return;
1616 }
1617 if(genimm(~rt_val,&armval)) {
1618 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1619 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1620 return;
1621 }
1622 diff=rt_val-rs_val;
1623 if(genimm(diff,&armval)) {
1624 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1625 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1626 return;
1627 }else if(genimm(-diff,&armval)) {
1628 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1629 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1630 return;
1631 }
1632 emit_movimm(rt_val,rt);
1633}
1634
1635// return 1 if above function can do it's job cheaply
1636static int is_similar_value(u_int v1,u_int v2)
1637{
13e35c04 1638 u_int xs;
8575a877 1639 int diff;
1640 if(v1==v2) return 1;
1641 diff=v2-v1;
1642 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1643 ;
8575a877 1644 if(xs<0x100) return 1;
1645 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1646 ;
1647 if(xs<0x100) return 1;
1648 return 0;
13e35c04 1649}
cbbab9cd 1650
b14b6a8f 1651static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1652{
1653 switch(type) {
1654 case LOADB_STUB: emit_signextend8(rs,rt); break;
1655 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1656 case LOADH_STUB: emit_signextend16(rs,rt); break;
1657 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1658 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1659 default: assert(0);
1660 }
1661}
1662
b1be1eee 1663#include "pcsxmem.h"
1664#include "pcsxmem_inline.c"
b1be1eee 1665
e2b5e7aa 1666static void do_readstub(int n)
57871462 1667{
14c9acee 1668 assem_debug("do_readstub %p\n", log_addr(start + stubs[n].a*4));
57871462 1669 literal_pool(256);
b14b6a8f 1670 set_jump_target(stubs[n].addr, out);
1671 enum stub_type type=stubs[n].type;
1672 int i=stubs[n].a;
1673 int rs=stubs[n].b;
81dbbf4c 1674 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1675 u_int reglist=stubs[n].e;
81dbbf4c 1676 const signed char *i_regmap=i_regs->regmap;
581335b0 1677 int rt;
a5cd72d0 1678 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1679 rt=get_reg(i_regmap,FTEMP);
1680 }else{
cf95b4f0 1681 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1682 }
1683 assert(rs>=0);
df4dc2b1 1684 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1685 void *restore_jump = NULL;
c6c3b1b3 1686 reglist|=(1<<rs);
1687 for(r=0;r<=12;r++) {
1688 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1689 temp=r; break;
1690 }
1691 }
cf95b4f0 1692 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1693 reglist&=~(1<<rt);
1694 if(temp==-1) {
1695 save_regs(reglist);
1696 regs_saved=1;
1697 temp=(rs==0)?2:0;
1698 }
1699 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1700 temp2=1;
643aeae3 1701 emit_readword(&mem_rtab,temp);
c6c3b1b3 1702 emit_shrimm(rs,12,temp2);
1703 emit_readword_dualindexedx4(temp,temp2,temp2);
1704 emit_lsls_imm(temp2,1,temp2);
a5cd72d0 1705 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1706 switch(type) {
1707 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1708 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1709 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1710 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1711 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1712 default: assert(0);
c6c3b1b3 1713 }
1714 }
1715 if(regs_saved) {
df4dc2b1 1716 restore_jump=out;
c6c3b1b3 1717 emit_jcc(0); // jump to reg restore
1718 }
1719 else
b14b6a8f 1720 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1721
1722 if(!regs_saved)
1723 save_regs(reglist);
643aeae3 1724 void *handler=NULL;
c6c3b1b3 1725 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1726 handler=jump_handler_read8;
c6c3b1b3 1727 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1728 handler=jump_handler_read16;
c6c3b1b3 1729 if(type==LOADW_STUB)
643aeae3 1730 handler=jump_handler_read32;
1731 assert(handler);
b96d3df7 1732 pass_args(rs,temp2);
c6c3b1b3 1733 int cc=get_reg(i_regmap,CCREG);
1734 if(cc<0)
1735 emit_loadreg(CCREG,2);
2330734f 1736 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1737 emit_far_call(handler);
7da5c7ad 1738#if 0
1739 if (type == LOADW_STUB) {
1740 // new cycle_count returned in r2
1741 emit_addimm(2, -(int)stubs[n].d, cc<0?2:cc);
1742 if (cc < 0)
1743 emit_storereg(CCREG, 2);
1744 }
1745#endif
a5cd72d0 1746 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1747 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1748 }
1749 if(restore_jump)
df4dc2b1 1750 set_jump_target(restore_jump, out);
c6c3b1b3 1751 restore_regs(reglist);
b14b6a8f 1752 emit_jmp(stubs[n].retaddr); // return address
57871462 1753}
1754
81dbbf4c 1755static void inline_readstub(enum stub_type type, int i, u_int addr,
1756 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1757{
277718fa 1758 int ra = cinfo[i].addr;
1759 int rt = get_reg(regmap,target);
1760 assert(ra >= 0);
2a014d73 1761 u_int is_dynamic;
687b4580 1762 uintptr_t host_addr = 0;
643aeae3 1763 void *handler;
b1be1eee 1764 int cc=get_reg(regmap,CCREG);
277718fa 1765 if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
b1be1eee 1766 return;
643aeae3 1767 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1768 if (handler == NULL) {
cf95b4f0 1769 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1770 return;
13e35c04 1771 if(addr!=host_addr)
277718fa 1772 emit_movimm_from(addr,ra,host_addr,ra);
c6c3b1b3 1773 switch(type) {
277718fa 1774 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1775 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1776 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1777 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1778 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
c6c3b1b3 1779 default: assert(0);
1780 }
1781 return;
1782 }
b1be1eee 1783 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1784 if(is_dynamic) {
1785 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1786 handler=jump_handler_read8;
b1be1eee 1787 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1788 handler=jump_handler_read16;
b1be1eee 1789 if(type==LOADW_STUB)
643aeae3 1790 handler=jump_handler_read32;
b1be1eee 1791 }
c6c3b1b3 1792
1793 // call a memhandler
cf95b4f0 1794 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1795 reglist&=~(1<<rt);
1796 save_regs(reglist);
1797 if(target==0)
1798 emit_movimm(addr,0);
277718fa 1799 else if(ra!=0)
1800 emit_mov(ra,0);
b1be1eee 1801 if(cc<0)
1802 emit_loadreg(CCREG,2);
1803 if(is_dynamic) {
1804 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1805 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1806 }
b1be1eee 1807 else {
643aeae3 1808 emit_readword(&last_count,3);
2330734f 1809 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1810 emit_add(2,3,2);
d7546062 1811 emit_writeword(2,&psxRegs.cycle);
b1be1eee 1812 }
1813
2a014d73 1814 emit_far_call(handler);
b1be1eee 1815
7da5c7ad 1816#if 0
1817 if (type == LOADW_STUB) {
1818 // new cycle_count returned in r2
1819 emit_addimm(2, -adj, cc<0?2:cc);
1820 if (cc < 0)
1821 emit_storereg(CCREG, 2);
1822 }
1823#endif
cf95b4f0 1824 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1825 switch(type) {
1826 case LOADB_STUB: emit_signextend8(0,rt); break;
1827 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1828 case LOADH_STUB: emit_signextend16(0,rt); break;
1829 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1830 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1831 default: assert(0);
1832 }
1833 }
1834 restore_regs(reglist);
57871462 1835}
1836
e2b5e7aa 1837static void do_writestub(int n)
57871462 1838{
14c9acee 1839 assem_debug("do_writestub %p\n", log_addr(start + stubs[n].a*4));
57871462 1840 literal_pool(256);
b14b6a8f 1841 set_jump_target(stubs[n].addr, out);
1842 enum stub_type type=stubs[n].type;
1843 int i=stubs[n].a;
1844 int rs=stubs[n].b;
81dbbf4c 1845 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1846 u_int reglist=stubs[n].e;
81dbbf4c 1847 const signed char *i_regmap=i_regs->regmap;
581335b0 1848 int rt,r;
a5cd72d0 1849 if(dops[i].itype==C2LS) {
57871462 1850 rt=get_reg(i_regmap,r=FTEMP);
1851 }else{
cf95b4f0 1852 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1853 }
1854 assert(rs>=0);
1855 assert(rt>=0);
b14b6a8f 1856 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1857 void *restore_jump = NULL;
b96d3df7 1858 int reglist2=reglist|(1<<rs)|(1<<rt);
1859 for(rtmp=0;rtmp<=12;rtmp++) {
1860 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1861 temp=rtmp; break;
1862 }
1863 }
1864 if(temp==-1) {
1865 save_regs(reglist);
1866 regs_saved=1;
1867 for(rtmp=0;rtmp<=3;rtmp++)
1868 if(rtmp!=rs&&rtmp!=rt)
1869 {temp=rtmp;break;}
1870 }
1871 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1872 temp2=3;
643aeae3 1873 emit_readword(&mem_wtab,temp);
b96d3df7 1874 emit_shrimm(rs,12,temp2);
1875 emit_readword_dualindexedx4(temp,temp2,temp2);
1876 emit_lsls_imm(temp2,1,temp2);
1877 switch(type) {
1878 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1879 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1880 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1881 default: assert(0);
1882 }
1883 if(regs_saved) {
df4dc2b1 1884 restore_jump=out;
b96d3df7 1885 emit_jcc(0); // jump to reg restore
1886 }
1887 else
b14b6a8f 1888 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1889
1890 if(!regs_saved)
1891 save_regs(reglist);
643aeae3 1892 void *handler=NULL;
b96d3df7 1893 switch(type) {
643aeae3 1894 case STOREB_STUB: handler=jump_handler_write8; break;
1895 case STOREH_STUB: handler=jump_handler_write16; break;
1896 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1897 default: assert(0);
b96d3df7 1898 }
643aeae3 1899 assert(handler);
b96d3df7 1900 pass_args(rs,rt);
1901 if(temp2!=3)
1902 emit_mov(temp2,3);
1903 int cc=get_reg(i_regmap,CCREG);
1904 if(cc<0)
1905 emit_loadreg(CCREG,2);
2330734f 1906 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1907 emit_far_call(handler);
7da5c7ad 1908 // new cycle_count returned in r2
1909 emit_addimm(2,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1910 if(cc<0)
1911 emit_storereg(CCREG,2);
1912 if(restore_jump)
df4dc2b1 1913 set_jump_target(restore_jump, out);
b96d3df7 1914 restore_regs(reglist);
b14b6a8f 1915 emit_jmp(stubs[n].retaddr);
57871462 1916}
1917
81dbbf4c 1918static void inline_writestub(enum stub_type type, int i, u_int addr,
1919 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1920{
277718fa 1921 int ra = cinfo[i].addr;
1922 int rt = get_reg(regmap, target);
1923 assert(ra>=0);
57871462 1924 assert(rt>=0);
687b4580 1925 uintptr_t host_addr = 0;
643aeae3 1926 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1927 if (handler == NULL) {
13e35c04 1928 if(addr!=host_addr)
277718fa 1929 emit_movimm_from(addr,ra,host_addr,ra);
b96d3df7 1930 switch(type) {
277718fa 1931 case STOREB_STUB: emit_writebyte_indexed(rt,0,ra); break;
1932 case STOREH_STUB: emit_writehword_indexed(rt,0,ra); break;
1933 case STOREW_STUB: emit_writeword_indexed(rt,0,ra); break;
b96d3df7 1934 default: assert(0);
1935 }
1936 return;
1937 }
1938
1939 // call a memhandler
1940 save_regs(reglist);
277718fa 1941 pass_args(ra,rt);
b96d3df7 1942 int cc=get_reg(regmap,CCREG);
1943 if(cc<0)
1944 emit_loadreg(CCREG,2);
2330734f 1945 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1946 emit_movimm((u_int)handler,3);
2a014d73 1947 emit_far_call(jump_handler_write_h);
7da5c7ad 1948 // new cycle_count returned in r2
1949 emit_addimm(2,-adj,cc<0?2:cc);
b96d3df7 1950 if(cc<0)
1951 emit_storereg(CCREG,2);
1952 restore_regs(reglist);
57871462 1953}
1954
57871462 1955/* Special assem */
1956
81dbbf4c 1957static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 1958{
1959 save_regs_all(reglist);
32631e6a 1960 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 1961#ifdef PCNT
81dbbf4c 1962 emit_movimm(op, 0);
2a014d73 1963 emit_far_call(pcnt_gte_start);
82ed88eb 1964#endif
81dbbf4c 1965 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 1966}
1967
1968static void c2op_epilogue(u_int op,u_int reglist)
1969{
82ed88eb 1970#ifdef PCNT
1971 emit_movimm(op,0);
2a014d73 1972 emit_far_call(pcnt_gte_end);
82ed88eb 1973#endif
054175e9 1974 restore_regs_all(reglist);
1975}
1976
6c0eefaf 1977static void c2op_call_MACtoIR(int lm,int need_flags)
1978{
1979 if(need_flags)
2a014d73 1980 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 1981 else
2a014d73 1982 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 1983}
1984
1985static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1986{
2a014d73 1987 emit_far_call(func);
6c0eefaf 1988 // func is C code and trashes r0
1989 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1990 if(need_flags||need_ir)
1991 c2op_call_MACtoIR(lm,need_flags);
2a014d73 1992 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 1993}
1994
81dbbf4c 1995static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 1996{
81dbbf4c 1997 u_int c2op = source[i] & 0x3f;
1998 u_int reglist_full = get_host_reglist(i_regs->regmap);
1999 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2000 int need_flags, need_ir;
b9b61529 2001
2002 if (gte_handlers[c2op]!=NULL) {
bedfea38 2003 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2004 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2005 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2006 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2007 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2008 need_flags=0;
6c0eefaf 2009 int shift = (source[i] >> 19) & 1;
2010 int lm = (source[i] >> 10) & 1;
054175e9 2011 switch(c2op) {
19776aef 2012#ifndef DRC_DBG
054175e9 2013 case GTE_MVMVA: {
82336ba3 2014#ifdef HAVE_ARMV5
054175e9 2015 int v = (source[i] >> 15) & 3;
2016 int cv = (source[i] >> 13) & 3;
2017 int mx = (source[i] >> 17) & 3;
4d646738 2018 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2019 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2020 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2021 if(v<3)
2022 emit_ldrd(v*8,0,4);
2023 else {
2024 emit_movzwl_indexed(9*4,0,4); // gteIR
2025 emit_movzwl_indexed(10*4,0,6);
2026 emit_movzwl_indexed(11*4,0,5);
2027 emit_orrshl_imm(6,16,4);
2028 }
2029 if(mx<3)
2030 emit_addimm(0,32*4+mx*8*4,6);
2031 else
643aeae3 2032 emit_readword(&zeromem_ptr,6);
054175e9 2033 if(cv<3)
2034 emit_addimm(0,32*4+(cv*8+5)*4,7);
2035 else
643aeae3 2036 emit_readword(&zeromem_ptr,7);
054175e9 2037#ifdef __ARM_NEON__
2038 emit_movimm(source[i],1); // opcode
2a014d73 2039 emit_far_call(gteMVMVA_part_neon);
054175e9 2040 if(need_flags) {
2041 emit_movimm(lm,1);
2a014d73 2042 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2043 }
2044#else
2045 if(cv==3&&shift)
33788798 2046 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 2047 else {
2048 emit_movimm(shift,1);
33788798 2049 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 2050 }
6c0eefaf 2051 if(need_flags||need_ir)
2052 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2053#endif
2054#else /* if not HAVE_ARMV5 */
81dbbf4c 2055 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2056 emit_movimm(source[i],1); // opcode
643aeae3 2057 emit_writeword(1,&psxRegs.code);
2a014d73 2058 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2059#endif
2060 break;
2061 }
6c0eefaf 2062 case GTE_OP:
81dbbf4c 2063 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2064 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2065 if(need_flags||need_ir) {
2066 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2067 c2op_call_MACtoIR(lm,need_flags);
2068 }
2069 break;
2070 case GTE_DPCS:
81dbbf4c 2071 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2072 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2073 break;
2074 case GTE_INTPL:
81dbbf4c 2075 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2076 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2077 break;
2078 case GTE_SQR:
81dbbf4c 2079 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2080 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2081 if(need_flags||need_ir) {
2082 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2083 c2op_call_MACtoIR(lm,need_flags);
2084 }
2085 break;
2086 case GTE_DCPL:
81dbbf4c 2087 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2088 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2089 break;
2090 case GTE_GPF:
81dbbf4c 2091 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2092 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2093 break;
2094 case GTE_GPL:
81dbbf4c 2095 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2096 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2097 break;
19776aef 2098#endif
054175e9 2099 default:
81dbbf4c 2100 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2101#ifdef DRC_DBG
2102 emit_movimm(source[i],1); // opcode
643aeae3 2103 emit_writeword(1,&psxRegs.code);
19776aef 2104#endif
2a014d73 2105 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2106 break;
2107 }
2108 c2op_epilogue(c2op,reglist);
2109 }
b9b61529 2110}
2111
3968e69e 2112static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2113{
2114 //value = value & 0x7ffff000;
2115 //if (value & 0x7f87e000) value |= 0x80000000;
2116 emit_shrimm(sl,12,temp);
2117 emit_shlimm(temp,12,temp);
2118 emit_testimm(temp,0x7f000000);
2119 emit_testeqimm(temp,0x00870000);
2120 emit_testeqimm(temp,0x0000e000);
2121 emit_orrne_imm(temp,0x80000000,temp);
2122}
2123
2124static void do_mfc2_31_one(u_int copr,signed char temp)
2125{
2126 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2127 emit_lsls_imm(temp,16,temp);
2128 emit_cmovs_imm(0,temp);
2129 emit_cmpimm(temp,0xf80<<16);
2130 emit_andimm(temp,0xf80<<16,temp);
2131 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2132}
2133
2134static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2135{
2136 if (temp < 0) {
2137 host_tempreg_acquire();
2138 temp = HOST_TEMPREG;
2139 }
2140 do_mfc2_31_one(9,temp);
9c997d19 2141 emit_shrimm(temp,7+16,tl);
3968e69e 2142 do_mfc2_31_one(10,temp);
9c997d19 2143 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2144 do_mfc2_31_one(11,temp);
9c997d19 2145 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2146 emit_writeword(tl,&reg_cop2d[29]);
2147 if (temp == HOST_TEMPREG)
2148 host_tempreg_release();
2149}
2150
2330734f 2151static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2152{
2153 // case 0x18: MULT
2154 // case 0x19: MULTU
2155 // case 0x1A: DIV
2156 // case 0x1B: DIVU
cf95b4f0 2157 if(dops[i].rs1&&dops[i].rs2)
57871462 2158 {
a5cd72d0 2159 switch (dops[i].opcode2)
57871462 2160 {
a5cd72d0 2161 case 0x18: // MULT
57871462 2162 {
cf95b4f0 2163 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2164 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2165 signed char hi=get_reg(i_regs->regmap,HIREG);
2166 signed char lo=get_reg(i_regs->regmap,LOREG);
2167 assert(m1>=0);
2168 assert(m2>=0);
2169 assert(hi>=0);
2170 assert(lo>=0);
2171 emit_smull(m1,m2,hi,lo);
2172 }
a5cd72d0 2173 break;
2174 case 0x19: // MULTU
57871462 2175 {
cf95b4f0 2176 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2177 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2178 signed char hi=get_reg(i_regs->regmap,HIREG);
2179 signed char lo=get_reg(i_regs->regmap,LOREG);
2180 assert(m1>=0);
2181 assert(m2>=0);
2182 assert(hi>=0);
2183 assert(lo>=0);
2184 emit_umull(m1,m2,hi,lo);
2185 }
a5cd72d0 2186 break;
2187 case 0x1A: // DIV
57871462 2188 {
cf95b4f0 2189 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2190 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2191 signed char quotient=get_reg(i_regs->regmap,LOREG);
2192 signed char remainder=get_reg(i_regs->regmap,HIREG);
a5cd72d0 2193 void *jaddr_div0;
2194 assert(d1>=0);
2195 assert(d2>=0);
57871462 2196 assert(quotient>=0);
2197 assert(remainder>=0);
2198 emit_movs(d1,remainder);
44a80f6a 2199 emit_movimm(0xffffffff,quotient);
2200 emit_negmi(quotient,quotient); // .. quotient and ..
2201 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2202 emit_movs(d2,HOST_TEMPREG);
a5cd72d0 2203 jaddr_div0 = out;
2204 emit_jeq(0); // Division by zero
82336ba3 2205 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2206#ifdef HAVE_ARMV5
57871462 2207 emit_clz(HOST_TEMPREG,quotient);
a5cd72d0 2208 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); // shifted divisor
665f33e1 2209#else
2210 emit_movimm(0,quotient);
2211 emit_addpl_imm(quotient,1,quotient);
2212 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2213 emit_jns(out-2*4);
665f33e1 2214#endif
57871462 2215 emit_orimm(quotient,1<<31,quotient);
2216 emit_shr(quotient,quotient,quotient);
2217 emit_cmp(remainder,HOST_TEMPREG);
2218 emit_subcs(remainder,HOST_TEMPREG,remainder);
2219 emit_adcs(quotient,quotient,quotient);
2220 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2221 emit_jcc(out-16); // -4
57871462 2222 emit_teq(d1,d2);
2223 emit_negmi(quotient,quotient);
a5cd72d0 2224 set_jump_target(jaddr_div0, out);
57871462 2225 emit_test(d1,d1);
2226 emit_negmi(remainder,remainder);
2227 }
a5cd72d0 2228 break;
2229 case 0x1B: // DIVU
57871462 2230 {
cf95b4f0 2231 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2232 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2233 signed char quotient=get_reg(i_regs->regmap,LOREG);
2234 signed char remainder=get_reg(i_regs->regmap,HIREG);
a5cd72d0 2235 void *jaddr_div0;
2236 assert(d1>=0);
2237 assert(d2>=0);
57871462 2238 assert(quotient>=0);
2239 assert(remainder>=0);
44a80f6a 2240 emit_mov(d1,remainder);
2241 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2242 emit_test(d2,d2);
a5cd72d0 2243 jaddr_div0 = out;
2244 emit_jeq(0); // Division by zero
665f33e1 2245#ifdef HAVE_ARMV5
57871462 2246 emit_clz(d2,HOST_TEMPREG);
2247 emit_movimm(1<<31,quotient);
2248 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2249#else
2250 emit_movimm(0,HOST_TEMPREG);
82336ba3 2251 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2252 emit_lslpls_imm(d2,1,d2);
7c3a5182 2253 emit_jns(out-2*4);
665f33e1 2254 emit_movimm(1<<31,quotient);
2255#endif
57871462 2256 emit_shr(quotient,HOST_TEMPREG,quotient);
2257 emit_cmp(remainder,d2);
2258 emit_subcs(remainder,d2,remainder);
2259 emit_adcs(quotient,quotient,quotient);
2260 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2261 emit_jcc(out-16); // -4
a5cd72d0 2262 set_jump_target(jaddr_div0, out);
57871462 2263 }
a5cd72d0 2264 break;
57871462 2265 }
57871462 2266 }
2267 else
2268 {
57871462 2269 signed char hr=get_reg(i_regs->regmap,HIREG);
2270 signed char lr=get_reg(i_regs->regmap,LOREG);
a5cd72d0 2271 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
2272 {
2273 if (dops[i].rs1) {
2274 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
2275 assert(numerator >= 0);
2276 if (hr < 0)
2277 hr = HOST_TEMPREG;
2278 emit_movs(numerator, hr);
2279 if (lr >= 0) {
2280 if (dops[i].opcode2 == 0x1A) { // DIV
2281 emit_movimm(0xffffffff, lr);
2282 emit_negmi(lr, lr);
2283 }
2284 else
2285 emit_movimm(~0, lr);
2286 }
2287 }
2288 else {
2289 if (hr >= 0) emit_zeroreg(hr);
2290 if (lr >= 0) emit_movimm(~0,lr);
2291 }
2292 }
2293 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
2294 {
2295 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
2296 assert(denominator >= 0);
2297 if (hr >= 0) emit_zeroreg(hr);
2298 if (lr >= 0) {
2299 emit_zeroreg(lr);
2300 emit_test(denominator, denominator);
2301 emit_mvneq(lr, lr);
2302 }
2303 }
2304 else
2305 {
2306 // Multiply by zero is zero.
2307 if (hr >= 0) emit_zeroreg(hr);
2308 if (lr >= 0) emit_zeroreg(lr);
2309 }
57871462 2310 }
2311}
2312#define multdiv_assemble multdiv_assemble_arm
2313
d1e4ebd9 2314static void do_jump_vaddr(int rs)
2315{
2a014d73 2316 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2317}
2318
e2b5e7aa 2319static void do_preload_rhash(int r) {
57871462 2320 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2321 // register. On ARM the hash can be done with a single instruction (below)
2322}
2323
e2b5e7aa 2324static void do_preload_rhtbl(int ht) {
57871462 2325 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2326}
2327
e2b5e7aa 2328static void do_rhash(int rs,int rh) {
57871462 2329 emit_andimm(rs,0xf8,rh);
2330}
2331
e2b5e7aa 2332static void do_miniht_load(int ht,int rh) {
57871462 2333 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2334 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2335}
2336
e2b5e7aa 2337static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2338 emit_cmp(rh,rs);
2339 emit_ldreq_indexed(ht,4,15);
2340 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2341 if(rs!=7)
2342 emit_mov(rs,7);
2343 rs=7;
57871462 2344 #endif
d1e4ebd9 2345 do_jump_vaddr(rs);
57871462 2346}
2347
e2b5e7aa 2348static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2349 #ifndef HAVE_ARMV7
57871462 2350 emit_movimm(return_address,rt); // PC into link register
643aeae3 2351 add_to_linker(out,return_address,1);
57871462 2352 emit_pcreladdr(temp);
643aeae3 2353 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2354 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2355 #else
2356 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2357 add_to_linker(out,return_address,1);
57871462 2358 emit_pcreladdr(temp);
643aeae3 2359 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2360 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2361 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2362 #endif
2363}
2364
57871462 2365// CPU-architecture-specific initialization
2a014d73 2366static void arch_init(void)
2367{
2368 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2369 struct tramp_insns *ops = ndrc->tramp.ops;
2370 size_t i;
2371 assert(!(diff & 3));
2372 assert(diff < 0x1000);
2373 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2374 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2375 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2376 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2377}
b9b61529 2378
2379// vim:shiftwidth=2:expandtab