drc: a bit more sophisticated f1 hack
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
d148d265 104static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 105
57871462 106/* Linker */
107
df4dc2b1 108static void set_jump_target(void *addr, void *target_)
57871462 109{
df4dc2b1 110 u_int target = (u_int)target_;
111 u_char *ptr = addr;
57871462 112 u_int *ptr2=(u_int *)ptr;
113 if(ptr[3]==0xe2) {
114 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 115 assert(((uintptr_t)addr&3)==0);
57871462 116 assert((target&3)==0);
117 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 118 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 119 }
120 else if(ptr[3]==0x72) {
121 // generated by emit_jno_unlikely
122 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 123 assert(((uintptr_t)addr&3)==0);
57871462 124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 }
127 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 128 assert(((uintptr_t)addr&3)==0);
57871462 129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
131 }
132 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134 else {
135 assert((ptr[3]&0x0e)==0xa);
136 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138}
139
140// This optionally copies the instruction from the target of the branch into
141// the space before the branch. Works, but the difference in speed is
142// usually insignificant.
e2b5e7aa 143#if 0
144static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 145{
146 u_char *ptr=(u_char *)addr;
147 u_int *ptr2=(u_int *)ptr;
148 assert(!copy||ptr2[-1]==0xe28dd000);
149 if(ptr[3]==0xe2) {
150 assert(!copy);
151 assert((target-(u_int)ptr2-8)<4096);
152 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
153 }
154 else {
155 assert((ptr[3]&0x0e)==0xa);
156 u_int target_insn=*(u_int *)target;
157 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
158 copy=0;
159 }
160 if((target_insn&0x0c100000)==0x04100000) { // Load
161 copy=0;
162 }
163 if(target_insn&0x08000000) {
164 copy=0;
165 }
166 if(copy) {
167 ptr2[-1]=target_insn;
168 target+=4;
169 }
170 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
171 }
172}
e2b5e7aa 173#endif
57871462 174
175/* Literal pool */
e2b5e7aa 176static void add_literal(int addr,int val)
57871462 177{
15776b68 178 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 179 literals[literalcount][0]=addr;
180 literals[literalcount][1]=val;
9f51b4b9 181 literalcount++;
182}
57871462 183
d148d265 184// from a pointer to external jump stub (which was produced by emit_extjump2)
185// find where the jumping insn is
186static void *find_extjump_insn(void *stub)
57871462 187{
188 int *ptr=(int *)(stub+4);
d148d265 189 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 190 u_int offset=*ptr&0xfff;
d148d265 191 void **l_ptr=(void *)ptr+offset+8;
192 return *l_ptr;
57871462 193}
194
f968d35d 195// find where external branch is liked to using addr of it's stub:
196// get address that insn one after stub loads (dyna_linker arg1),
197// treat it as a pointer to branch insn,
198// return addr where that branch jumps to
643aeae3 199static void *get_pointer(void *stub)
57871462 200{
201 //printf("get_pointer(%x)\n",(int)stub);
d148d265 202 int *i_ptr=find_extjump_insn(stub);
3d680478 203 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 204 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
df4dc2b1 209static void *get_clean_addr(void *addr)
57871462 210{
df4dc2b1 211 signed int *ptr = addr;
665f33e1 212 #ifndef HAVE_ARMV7
57871462 213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 221 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 222 }
df4dc2b1 223 return ptr;
57871462 224}
225
3968e69e 226static int verify_dirty(const u_int *ptr)
57871462 227{
665f33e1 228 #ifndef HAVE_ARMV7
16c8be17 229 u_int offset;
57871462 230 // get from literal pool
15776b68 231 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 232 offset=*ptr&0xfff;
233 u_int source=*(u_int*)((void *)ptr+offset+8);
234 ptr++;
235 assert((*ptr&0xFFFF0000)==0xe59f0000);
236 offset=*ptr&0xfff;
237 u_int copy=*(u_int*)((void *)ptr+offset+8);
238 ptr++;
239 assert((*ptr&0xFFFF0000)==0xe59f0000);
240 offset=*ptr&0xfff;
241 u_int len=*(u_int*)((void *)ptr+offset+8);
242 ptr++;
243 ptr++;
57871462 244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 254 //printf("verify_dirty: %x %x %x\n",source,copy,len);
255 return !memcmp((void *)source,(void *)copy,len);
256}
257
258// This doesn't necessarily find all clean entry points, just
259// guarantees that it's not dirty
df4dc2b1 260static int isclean(void *addr)
57871462 261{
665f33e1 262 #ifndef HAVE_ARMV7
581335b0 263 u_int *ptr=((u_int *)addr)+4;
57871462 264 #else
581335b0 265 u_int *ptr=((u_int *)addr)+6;
57871462 266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 270 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
271 return 1;
272}
273
4a35de07 274// get source that block at addr was compiled from (host pointers)
01d26796 275static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 276{
643aeae3 277 u_int *ptr = addr;
665f33e1 278 #ifndef HAVE_ARMV7
16c8be17 279 u_int offset;
57871462 280 // get from literal pool
15776b68 281 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 282 offset=*ptr&0xfff;
283 u_int source=*(u_int*)((void *)ptr+offset+8);
284 ptr++;
285 //assert((*ptr&0xFFFF0000)==0xe59f0000);
286 //offset=*ptr&0xfff;
287 //u_int copy=*(u_int*)((void *)ptr+offset+8);
288 ptr++;
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 offset=*ptr&0xfff;
291 u_int len=*(u_int*)((void *)ptr+offset+8);
292 ptr++;
293 ptr++;
57871462 294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 304 *start=(u_char *)source;
305 *end=(u_char *)source+len;
57871462 306}
307
57871462 308// Allocate a specific ARM register.
e2b5e7aa 309static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 310{
311 int n;
f776eb14 312 int dirty=0;
9f51b4b9 313
57871462 314 // see if it's already allocated (and dealloc it)
315 for(n=0;n<HOST_REGS;n++)
316 {
f776eb14 317 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
318 dirty=(cur->dirty>>n)&1;
319 cur->regmap[n]=-1;
320 }
57871462 321 }
9f51b4b9 322
57871462 323 cur->regmap[hr]=reg;
324 cur->dirty&=~(1<<hr);
f776eb14 325 cur->dirty|=dirty<<hr;
57871462 326 cur->isconst&=~(1<<hr);
327}
328
329// Alloc cycle count into dedicated register
e2b5e7aa 330static void alloc_cc(struct regstat *cur,int i)
57871462 331{
332 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
333}
334
57871462 335/* Assembler */
336
e2b5e7aa 337static unused char regname[16][4] = {
57871462 338 "r0",
339 "r1",
340 "r2",
341 "r3",
342 "r4",
343 "r5",
344 "r6",
345 "r7",
346 "r8",
347 "r9",
348 "r10",
349 "fp",
350 "r12",
351 "sp",
352 "lr",
353 "pc"};
354
e2b5e7aa 355static void output_w32(u_int word)
57871462 356{
357 *((u_int *)out)=word;
358 out+=4;
359}
e2b5e7aa 360
361static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 362{
363 assert(rd<16);
364 assert(rn<16);
365 assert(rm<16);
366 return((rn<<16)|(rd<<12)|rm);
367}
e2b5e7aa 368
369static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 370{
371 assert(rd<16);
372 assert(rn<16);
373 assert(imm<256);
374 assert((shift&1)==0);
375 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
376}
e2b5e7aa 377
378static u_int genimm(u_int imm,u_int *encoded)
57871462 379{
c2e3bd42 380 *encoded=0;
381 if(imm==0) return 1;
57871462 382 int i=32;
383 while(i>0)
384 {
385 if(imm<256) {
386 *encoded=((i&30)<<7)|imm;
387 return 1;
388 }
389 imm=(imm>>2)|(imm<<30);i-=2;
390 }
391 return 0;
392}
e2b5e7aa 393
394static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 395{
396 u_int ret=genimm(imm,encoded);
397 assert(ret);
581335b0 398 (void)ret;
cfbd3c6e 399}
e2b5e7aa 400
401static u_int genjmp(u_int addr)
57871462 402{
7c3a5182 403 if (addr < 3) return 0; // a branch that will be patched later
404 int offset = addr-(int)out-8;
405 if (offset < -33554432 || offset >= 33554432) {
406 SysPrintf("genjmp: out of range: %08x\n", offset);
407 abort();
e80343e2 408 return 0;
409 }
57871462 410 return ((u_int)offset>>2)&0xffffff;
411}
412
d1e4ebd9 413static unused void emit_breakpoint(void)
414{
415 assem_debug("bkpt #0\n");
416 //output_w32(0xe1200070);
417 output_w32(0xe7f001f0);
418}
419
e2b5e7aa 420static void emit_mov(int rs,int rt)
57871462 421{
422 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
423 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
424}
425
e2b5e7aa 426static void emit_movs(int rs,int rt)
57871462 427{
428 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_add(int rs1,int rs2,int rt)
57871462 433{
434 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
435 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
436}
437
39b71d9a 438static void emit_adds(int rs1,int rs2,int rt)
439{
440 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
442}
443#define emit_adds_ptr emit_adds
444
e2b5e7aa 445static void emit_adcs(int rs1,int rs2,int rt)
57871462 446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
e2b5e7aa 451static void emit_neg(int rs, int rt)
57871462 452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
e2b5e7aa 457static void emit_sub(int rs1,int rs2,int rt)
57871462 458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
e2b5e7aa 463static void emit_zeroreg(int rt)
57871462 464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
e2b5e7aa 469static void emit_loadlp(u_int imm,u_int rt)
790ee18e 470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
e2b5e7aa 475
33788798 476#ifdef HAVE_ARMV7
e2b5e7aa 477static void emit_movw(u_int imm,u_int rt)
790ee18e 478{
479 assert(imm<65536);
480 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
481 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
482}
e2b5e7aa 483
484static void emit_movt(u_int imm,u_int rt)
790ee18e 485{
486 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
487 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
488}
33788798 489#endif
e2b5e7aa 490
491static void emit_movimm(u_int imm,u_int rt)
790ee18e 492{
493 u_int armval;
494 if(genimm(imm,&armval)) {
495 assem_debug("mov %s,#%d\n",regname[rt],imm);
496 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
497 }else if(genimm(~imm,&armval)) {
498 assem_debug("mvn %s,#%d\n",regname[rt],imm);
499 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
500 }else if(imm<65536) {
665f33e1 501 #ifndef HAVE_ARMV7
790ee18e 502 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
503 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
504 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
505 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
506 #else
507 emit_movw(imm,rt);
508 #endif
509 }else{
665f33e1 510 #ifndef HAVE_ARMV7
790ee18e 511 emit_loadlp(imm,rt);
512 #else
513 emit_movw(imm&0x0000FFFF,rt);
514 emit_movt(imm&0xFFFF0000,rt);
515 #endif
516 }
517}
e2b5e7aa 518
519static void emit_pcreladdr(u_int rt)
790ee18e 520{
521 assem_debug("add %s,pc,#?\n",regname[rt]);
522 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
523}
524
e2b5e7aa 525static void emit_loadreg(int r, int hr)
57871462 526{
3d624f89 527 if(r&64) {
c43b5311 528 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 529 assert(0);
530 return;
3d624f89 531 }
57871462 532 if((r&63)==0)
533 emit_zeroreg(hr);
534 else {
33788798 535 void *addr;
7c3a5182 536 switch (r) {
537 //case HIREG: addr = &hi; break;
538 //case LOREG: addr = &lo; break;
33788798 539 case CCREG: addr = &cycle_count; break;
540 case CSREG: addr = &Status; break;
541 case INVCP: addr = &invc_ptr; break;
542 case ROREG: addr = &ram_offset; break;
543 default:
544 assert(r < 34);
545 addr = &psxRegs.GPR.r[r];
546 break;
7c3a5182 547 }
33788798 548 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 549 assert(offset<4096);
550 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
551 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
552 }
553}
e2b5e7aa 554
555static void emit_storereg(int r, int hr)
57871462 556{
3d624f89 557 if(r&64) {
c43b5311 558 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 559 assert(0);
560 return;
3d624f89 561 }
7c3a5182 562 int addr = (int)&psxRegs.GPR.r[r];
563 switch (r) {
564 //case HIREG: addr = &hi; break;
565 //case LOREG: addr = &lo; break;
566 case CCREG: addr = (int)&cycle_count; break;
567 default: assert(r < 34); break;
568 }
57871462 569 u_int offset = addr-(u_int)&dynarec_local;
570 assert(offset<4096);
571 assem_debug("str %s,fp+%d\n",regname[hr],offset);
572 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
573}
574
e2b5e7aa 575static void emit_test(int rs, int rt)
57871462 576{
577 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
578 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
579}
580
e2b5e7aa 581static void emit_testimm(int rs,int imm)
57871462 582{
583 u_int armval;
5a05d80c 584 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 585 genimm_checked(imm,&armval);
57871462 586 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
587}
588
e2b5e7aa 589static void emit_testeqimm(int rs,int imm)
b9b61529 590{
591 u_int armval;
592 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 593 genimm_checked(imm,&armval);
b9b61529 594 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
595}
596
e2b5e7aa 597static void emit_not(int rs,int rt)
57871462 598{
599 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
600 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
601}
602
e2b5e7aa 603static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 604{
605 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
606 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
607}
608
e2b5e7aa 609static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 610{
611 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
612 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
613}
e2b5e7aa 614
e2b5e7aa 615static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 616{
617 assert(rs<16);
618 assert(rt<16);
619 assert(imm<32);
620 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
621 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
622}
623
e2b5e7aa 624static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 625{
626 assert(rs<16);
627 assert(rt<16);
628 assert(imm<32);
629 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
630 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
631}
632
e2b5e7aa 633static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 634{
635 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
636 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
637}
638
3968e69e 639static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
640{
641 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
642 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
643}
644
e2b5e7aa 645static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 646{
647 assert(rs<16);
648 assert(rt<16);
649 if(imm!=0) {
57871462 650 u_int armval;
651 if(genimm(imm,&armval)) {
652 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
653 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
654 }else if(genimm(-imm,&armval)) {
8a0a8423 655 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 656 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 657 #ifdef HAVE_ARMV7
658 }else if(rt!=rs&&(u_int)imm<65536) {
659 emit_movw(imm&0x0000ffff,rt);
660 emit_add(rs,rt,rt);
661 }else if(rt!=rs&&(u_int)-imm<65536) {
662 emit_movw(-imm&0x0000ffff,rt);
663 emit_sub(rs,rt,rt);
664 #endif
665 }else if((u_int)-imm<65536) {
57871462 666 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
668 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 670 }else {
671 do {
672 int shift = (ffs(imm) - 1) & ~1;
673 int imm8 = imm & (0xff << shift);
674 genimm_checked(imm8,&armval);
675 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
676 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
677 rs = rt;
678 imm &= ~imm8;
679 }
680 while (imm != 0);
57871462 681 }
682 }
683 else if(rs!=rt) emit_mov(rs,rt);
684}
685
e2b5e7aa 686static void emit_addimm_and_set_flags(int imm,int rt)
57871462 687{
688 assert(imm>-65536&&imm<65536);
689 u_int armval;
690 if(genimm(imm,&armval)) {
691 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
692 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
693 }else if(genimm(-imm,&armval)) {
694 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
695 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
696 }else if(imm<0) {
697 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
698 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
699 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
700 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
701 }else{
702 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
703 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
704 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
705 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
706 }
707}
e2b5e7aa 708
e2b5e7aa 709static void emit_addnop(u_int r)
57871462 710{
711 assert(r<16);
712 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
713 output_w32(0xe2800000|rd_rn_rm(r,r,0));
714}
715
e2b5e7aa 716static void emit_andimm(int rs,int imm,int rt)
57871462 717{
718 u_int armval;
790ee18e 719 if(imm==0) {
720 emit_zeroreg(rt);
721 }else if(genimm(imm,&armval)) {
57871462 722 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
723 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
724 }else if(genimm(~imm,&armval)) {
725 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
726 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
727 }else if(imm==65535) {
332a4533 728 #ifndef HAVE_ARMV6
57871462 729 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
731 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
732 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
733 #else
734 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
735 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
736 #endif
737 }else{
738 assert(imm>0&&imm<65535);
665f33e1 739 #ifndef HAVE_ARMV7
57871462 740 assem_debug("mov r14,#%d\n",imm&0xFF00);
741 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
742 assem_debug("add r14,r14,#%d\n",imm&0xFF);
743 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
744 #else
745 emit_movw(imm,HOST_TEMPREG);
746 #endif
747 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
748 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
749 }
750}
751
e2b5e7aa 752static void emit_orimm(int rs,int imm,int rt)
57871462 753{
754 u_int armval;
790ee18e 755 if(imm==0) {
756 if(rs!=rt) emit_mov(rs,rt);
757 }else if(genimm(imm,&armval)) {
57871462 758 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
759 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
760 }else{
761 assert(imm>0&&imm<65536);
762 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
763 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
764 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
765 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
766 }
767}
768
e2b5e7aa 769static void emit_xorimm(int rs,int imm,int rt)
57871462 770{
57871462 771 u_int armval;
790ee18e 772 if(imm==0) {
773 if(rs!=rt) emit_mov(rs,rt);
774 }else if(genimm(imm,&armval)) {
57871462 775 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
776 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
777 }else{
514ed0d9 778 assert(imm>0&&imm<65536);
57871462 779 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
780 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
781 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
782 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
783 }
784}
785
e2b5e7aa 786static void emit_shlimm(int rs,u_int imm,int rt)
57871462 787{
788 assert(imm>0);
789 assert(imm<32);
790 //if(imm==1) ...
791 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
792 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
793}
794
e2b5e7aa 795static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 796{
797 assert(imm>0);
798 assert(imm<32);
799 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
800 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
801}
802
e2b5e7aa 803static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 804{
805 assert(imm>0);
806 assert(imm<32);
807 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
808 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
809}
810
e2b5e7aa 811static void emit_shrimm(int rs,u_int imm,int rt)
57871462 812{
813 assert(imm>0);
814 assert(imm<32);
815 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
816 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
817}
818
e2b5e7aa 819static void emit_sarimm(int rs,u_int imm,int rt)
57871462 820{
821 assert(imm>0);
822 assert(imm<32);
823 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
824 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
825}
826
e2b5e7aa 827static void emit_rorimm(int rs,u_int imm,int rt)
57871462 828{
829 assert(imm>0);
830 assert(imm<32);
831 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
832 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
833}
834
e2b5e7aa 835static void emit_signextend16(int rs,int rt)
b9b61529 836{
332a4533 837 #ifndef HAVE_ARMV6
b9b61529 838 emit_shlimm(rs,16,rt);
839 emit_sarimm(rt,16,rt);
840 #else
841 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
842 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
843 #endif
844}
845
e2b5e7aa 846static void emit_signextend8(int rs,int rt)
c6c3b1b3 847{
332a4533 848 #ifndef HAVE_ARMV6
c6c3b1b3 849 emit_shlimm(rs,24,rt);
850 emit_sarimm(rt,24,rt);
851 #else
852 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
853 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
854 #endif
855}
856
e2b5e7aa 857static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 858{
859 assert(rs<16);
860 assert(rt<16);
861 assert(shift<16);
862 //if(imm==1) ...
863 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
864 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
865}
e2b5e7aa 866
867static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 868{
869 assert(rs<16);
870 assert(rt<16);
871 assert(shift<16);
872 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
873 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
874}
e2b5e7aa 875
876static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 877{
878 assert(rs<16);
879 assert(rt<16);
880 assert(shift<16);
881 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
882 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
883}
57871462 884
3968e69e 885static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 886{
887 assert(rs<16);
888 assert(rt<16);
889 assert(shift<16);
890 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
891 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
892}
e2b5e7aa 893
3968e69e 894static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 895{
896 assert(rs<16);
897 assert(rt<16);
898 assert(shift<16);
899 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
900 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
901}
902
e2b5e7aa 903static void emit_cmpimm(int rs,int imm)
57871462 904{
905 u_int armval;
906 if(genimm(imm,&armval)) {
5a05d80c 907 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 908 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
909 }else if(genimm(-imm,&armval)) {
5a05d80c 910 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 911 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
912 }else if(imm>0) {
913 assert(imm<65536);
57871462 914 emit_movimm(imm,HOST_TEMPREG);
57871462 915 assem_debug("cmp %s,r14\n",regname[rs]);
916 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
917 }else{
918 assert(imm>-65536);
57871462 919 emit_movimm(-imm,HOST_TEMPREG);
57871462 920 assem_debug("cmn %s,r14\n",regname[rs]);
921 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
922 }
923}
924
e2b5e7aa 925static void emit_cmovne_imm(int imm,int rt)
57871462 926{
927 assem_debug("movne %s,#%d\n",regname[rt],imm);
928 u_int armval;
cfbd3c6e 929 genimm_checked(imm,&armval);
57871462 930 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
931}
e2b5e7aa 932
933static void emit_cmovl_imm(int imm,int rt)
57871462 934{
935 assem_debug("movlt %s,#%d\n",regname[rt],imm);
936 u_int armval;
cfbd3c6e 937 genimm_checked(imm,&armval);
57871462 938 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
939}
e2b5e7aa 940
941static void emit_cmovb_imm(int imm,int rt)
57871462 942{
943 assem_debug("movcc %s,#%d\n",regname[rt],imm);
944 u_int armval;
cfbd3c6e 945 genimm_checked(imm,&armval);
57871462 946 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
947}
e2b5e7aa 948
3968e69e 949static void emit_cmovae_imm(int imm,int rt)
950{
951 assem_debug("movcs %s,#%d\n",regname[rt],imm);
952 u_int armval;
953 genimm_checked(imm,&armval);
954 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
955}
956
9c997d19 957static void emit_cmovs_imm(int imm,int rt)
958{
959 assem_debug("movmi %s,#%d\n",regname[rt],imm);
960 u_int armval;
961 genimm_checked(imm,&armval);
962 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
963}
964
e2b5e7aa 965static void emit_cmovne_reg(int rs,int rt)
57871462 966{
967 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
968 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
969}
e2b5e7aa 970
971static void emit_cmovl_reg(int rs,int rt)
57871462 972{
973 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
974 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
975}
e2b5e7aa 976
e3c6bdb5 977static void emit_cmovb_reg(int rs,int rt)
978{
979 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
980 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
981}
982
e2b5e7aa 983static void emit_cmovs_reg(int rs,int rt)
57871462 984{
985 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
986 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
987}
988
e2b5e7aa 989static void emit_slti32(int rs,int imm,int rt)
57871462 990{
991 if(rs!=rt) emit_zeroreg(rt);
992 emit_cmpimm(rs,imm);
993 if(rs==rt) emit_movimm(0,rt);
994 emit_cmovl_imm(1,rt);
995}
e2b5e7aa 996
997static void emit_sltiu32(int rs,int imm,int rt)
57871462 998{
999 if(rs!=rt) emit_zeroreg(rt);
1000 emit_cmpimm(rs,imm);
1001 if(rs==rt) emit_movimm(0,rt);
1002 emit_cmovb_imm(1,rt);
1003}
e2b5e7aa 1004
e2b5e7aa 1005static void emit_cmp(int rs,int rt)
57871462 1006{
1007 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1008 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1009}
e2b5e7aa 1010
1011static void emit_set_gz32(int rs, int rt)
57871462 1012{
1013 //assem_debug("set_gz32\n");
1014 emit_cmpimm(rs,1);
1015 emit_movimm(1,rt);
1016 emit_cmovl_imm(0,rt);
1017}
e2b5e7aa 1018
1019static void emit_set_nz32(int rs, int rt)
57871462 1020{
1021 //assem_debug("set_nz32\n");
1022 if(rs!=rt) emit_movs(rs,rt);
1023 else emit_test(rs,rs);
1024 emit_cmovne_imm(1,rt);
1025}
e2b5e7aa 1026
e2b5e7aa 1027static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1028{
1029 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1030 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1031 emit_cmp(rs1,rs2);
1032 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1033 emit_cmovl_imm(1,rt);
1034}
e2b5e7aa 1035
1036static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1037{
1038 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1039 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1040 emit_cmp(rs1,rs2);
1041 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1042 emit_cmovb_imm(1,rt);
1043}
e2b5e7aa 1044
2a014d73 1045static int can_jump_or_call(const void *a)
1046{
1047 intptr_t offset = (u_char *)a - out - 8;
1048 return (-33554432 <= offset && offset < 33554432);
1049}
1050
643aeae3 1051static void emit_call(const void *a_)
57871462 1052{
643aeae3 1053 int a = (int)a_;
d1e4ebd9 1054 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1055 u_int offset=genjmp(a);
1056 output_w32(0xeb000000|offset);
1057}
e2b5e7aa 1058
b14b6a8f 1059static void emit_jmp(const void *a_)
57871462 1060{
b14b6a8f 1061 int a = (int)a_;
d1e4ebd9 1062 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1063 u_int offset=genjmp(a);
1064 output_w32(0xea000000|offset);
1065}
e2b5e7aa 1066
643aeae3 1067static void emit_jne(const void *a_)
57871462 1068{
643aeae3 1069 int a = (int)a_;
57871462 1070 assem_debug("bne %x\n",a);
1071 u_int offset=genjmp(a);
1072 output_w32(0x1a000000|offset);
1073}
e2b5e7aa 1074
7c3a5182 1075static void emit_jeq(const void *a_)
57871462 1076{
7c3a5182 1077 int a = (int)a_;
57871462 1078 assem_debug("beq %x\n",a);
1079 u_int offset=genjmp(a);
1080 output_w32(0x0a000000|offset);
1081}
e2b5e7aa 1082
7c3a5182 1083static void emit_js(const void *a_)
57871462 1084{
7c3a5182 1085 int a = (int)a_;
57871462 1086 assem_debug("bmi %x\n",a);
1087 u_int offset=genjmp(a);
1088 output_w32(0x4a000000|offset);
1089}
e2b5e7aa 1090
7c3a5182 1091static void emit_jns(const void *a_)
57871462 1092{
7c3a5182 1093 int a = (int)a_;
57871462 1094 assem_debug("bpl %x\n",a);
1095 u_int offset=genjmp(a);
1096 output_w32(0x5a000000|offset);
1097}
e2b5e7aa 1098
7c3a5182 1099static void emit_jl(const void *a_)
57871462 1100{
7c3a5182 1101 int a = (int)a_;
57871462 1102 assem_debug("blt %x\n",a);
1103 u_int offset=genjmp(a);
1104 output_w32(0xba000000|offset);
1105}
e2b5e7aa 1106
7c3a5182 1107static void emit_jge(const void *a_)
57871462 1108{
7c3a5182 1109 int a = (int)a_;
57871462 1110 assem_debug("bge %x\n",a);
1111 u_int offset=genjmp(a);
1112 output_w32(0xaa000000|offset);
1113}
e2b5e7aa 1114
7c3a5182 1115static void emit_jno(const void *a_)
57871462 1116{
7c3a5182 1117 int a = (int)a_;
57871462 1118 assem_debug("bvc %x\n",a);
1119 u_int offset=genjmp(a);
1120 output_w32(0x7a000000|offset);
1121}
e2b5e7aa 1122
7c3a5182 1123static void emit_jc(const void *a_)
57871462 1124{
7c3a5182 1125 int a = (int)a_;
57871462 1126 assem_debug("bcs %x\n",a);
1127 u_int offset=genjmp(a);
1128 output_w32(0x2a000000|offset);
1129}
e2b5e7aa 1130
7c3a5182 1131static void emit_jcc(const void *a_)
57871462 1132{
b14b6a8f 1133 int a = (int)a_;
57871462 1134 assem_debug("bcc %x\n",a);
1135 u_int offset=genjmp(a);
1136 output_w32(0x3a000000|offset);
1137}
1138
3968e69e 1139static unused void emit_callreg(u_int r)
57871462 1140{
c6c3b1b3 1141 assert(r<15);
1142 assem_debug("blx %s\n",regname[r]);
1143 output_w32(0xe12fff30|r);
57871462 1144}
e2b5e7aa 1145
1146static void emit_jmpreg(u_int r)
57871462 1147{
1148 assem_debug("mov pc,%s\n",regname[r]);
1149 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1150}
1151
be516ebe 1152static void emit_ret(void)
1153{
1154 emit_jmpreg(14);
1155}
1156
e2b5e7aa 1157static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1158{
1159 assert(offset>-4096&&offset<4096);
1160 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1161 if(offset>=0) {
1162 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1163 }else{
1164 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1165 }
1166}
e2b5e7aa 1167
1168static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1169{
1170 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1172}
39b71d9a 1173#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1174
1175static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1176{
1177 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1179}
e2b5e7aa 1180
1181static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1182{
1183 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1184 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1185}
e2b5e7aa 1186
37387d8b 1187static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1188{
1189 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1190 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1191}
1192
e2b5e7aa 1193static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1194{
1195 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1196 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1197}
e2b5e7aa 1198
37387d8b 1199static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1200{
1201 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1202 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1203}
1204
e2b5e7aa 1205static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1206{
1207 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1208 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1209}
e2b5e7aa 1210
37387d8b 1211static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1212{
1213 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1214 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1215}
1216
e2b5e7aa 1217static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1218{
1219 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1220 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1221}
e2b5e7aa 1222
37387d8b 1223static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1224{
1225 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1226 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1227}
1228
e2b5e7aa 1229static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1230{
1231 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1232 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1233}
1234
1235static void emit_str_dualindexed(int rs1, int rs2, int rt)
1236{
1237 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1238 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1239}
1240
1241static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1242{
1243 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1244 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1245}
1246
1247static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1248{
1249 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1251}
e2b5e7aa 1252
e2b5e7aa 1253static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1254{
1255 assert(offset>-256&&offset<256);
1256 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1257 if(offset>=0) {
1258 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1259 }else{
1260 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1261 }
1262}
e2b5e7aa 1263
e2b5e7aa 1264static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1265{
1266 assert(offset>-256&&offset<256);
1267 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1268 if(offset>=0) {
1269 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1270 }else{
1271 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1272 }
1273}
e2b5e7aa 1274
1275static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1276{
1277 assert(offset>-4096&&offset<4096);
1278 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1279 if(offset>=0) {
1280 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1281 }else{
1282 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1283 }
1284}
e2b5e7aa 1285
e2b5e7aa 1286static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1287{
1288 assert(offset>-256&&offset<256);
1289 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1290 if(offset>=0) {
1291 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1292 }else{
1293 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1294 }
1295}
e2b5e7aa 1296
054175e9 1297static void emit_ldrd(int offset, int rs, int rt)
1298{
1299 assert(offset>-256&&offset<256);
1300 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1301 if(offset>=0) {
1302 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1303 }else{
1304 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1305 }
1306}
e2b5e7aa 1307
643aeae3 1308static void emit_readword(void *addr, int rt)
57871462 1309{
643aeae3 1310 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1311 assert(offset<4096);
1312 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1313 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1314}
39b71d9a 1315#define emit_readptr emit_readword
e2b5e7aa 1316
e2b5e7aa 1317static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1318{
1319 assert(offset>-4096&&offset<4096);
1320 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1321 if(offset>=0) {
1322 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1323 }else{
1324 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1325 }
1326}
e2b5e7aa 1327
e2b5e7aa 1328static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1329{
1330 assert(offset>-256&&offset<256);
1331 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1332 if(offset>=0) {
1333 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1334 }else{
1335 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1336 }
1337}
e2b5e7aa 1338
1339static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1340{
1341 assert(offset>-4096&&offset<4096);
1342 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1343 if(offset>=0) {
1344 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1345 }else{
1346 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1347 }
1348}
e2b5e7aa 1349
e2b5e7aa 1350static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1351{
1352 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1353 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1354}
e2b5e7aa 1355
1356static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1357{
1358 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1359 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1360}
e2b5e7aa 1361
1362static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1363{
1364 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1365 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1366}
e2b5e7aa 1367
643aeae3 1368static void emit_writeword(int rt, void *addr)
57871462 1369{
643aeae3 1370 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1371 assert(offset<4096);
1372 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1373 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1374}
e2b5e7aa 1375
e2b5e7aa 1376static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1377{
1378 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1379 assert(rs1<16);
1380 assert(rs2<16);
1381 assert(hi<16);
1382 assert(lo<16);
1383 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1384}
e2b5e7aa 1385
1386static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1387{
1388 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1389 assert(rs1<16);
1390 assert(rs2<16);
1391 assert(hi<16);
1392 assert(lo<16);
1393 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1394}
1395
e2b5e7aa 1396static void emit_clz(int rs,int rt)
57871462 1397{
1398 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1399 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1400}
1401
e2b5e7aa 1402static void emit_subcs(int rs1,int rs2,int rt)
57871462 1403{
1404 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1405 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1406}
1407
e2b5e7aa 1408static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1409{
1410 assert(imm>0);
1411 assert(imm<32);
1412 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1413 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1414}
1415
e2b5e7aa 1416static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1417{
1418 assert(imm>0);
1419 assert(imm<32);
1420 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1421 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1422}
1423
e2b5e7aa 1424static void emit_negmi(int rs, int rt)
57871462 1425{
1426 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1427 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1428}
1429
e2b5e7aa 1430static void emit_negsmi(int rs, int rt)
57871462 1431{
1432 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1433 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1434}
1435
e2b5e7aa 1436static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1437{
1438 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1439 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1440}
1441
e2b5e7aa 1442static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1443{
1444 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1445 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1446}
1447
e2b5e7aa 1448static void emit_teq(int rs, int rt)
57871462 1449{
1450 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1451 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1452}
1453
3968e69e 1454static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1455{
1456 u_int armval;
cfbd3c6e 1457 genimm_checked(imm,&armval);
57871462 1458 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1459 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1460}
1461
57871462 1462// Conditionally select one of two immediates, optimizing for small code size
1463// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1464static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1465{
1466 u_int armval;
1467 if(genimm(imm2-imm1,&armval)) {
1468 emit_movimm(imm1,rt);
1469 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1470 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1471 }else if(genimm(imm1-imm2,&armval)) {
1472 emit_movimm(imm1,rt);
1473 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1474 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1475 }
1476 else {
665f33e1 1477 #ifndef HAVE_ARMV7
57871462 1478 emit_movimm(imm1,rt);
1479 add_literal((int)out,imm2);
1480 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1481 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1482 #else
1483 emit_movw(imm1&0x0000FFFF,rt);
1484 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1485 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1486 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1487 }
1488 emit_movt(imm1&0xFFFF0000,rt);
1489 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1490 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1491 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1492 }
1493 #endif
1494 }
1495}
1496
57871462 1497// special case for checking invalid_code
e2b5e7aa 1498static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1499{
1500 assert(imm<128&&imm>=0);
1501 assert(r>=0&&r<16);
1502 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1503 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1504 emit_cmpimm(HOST_TEMPREG,imm);
1505}
1506
e2b5e7aa 1507static void emit_callne(int a)
0bbd1454 1508{
1509 assem_debug("blne %x\n",a);
1510 u_int offset=genjmp(a);
1511 output_w32(0x1b000000|offset);
1512}
1513
57871462 1514// Used to preload hash table entries
e2b5e7aa 1515static unused void emit_prefetchreg(int r)
57871462 1516{
1517 assem_debug("pld %s\n",regname[r]);
1518 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1519}
1520
1521// Special case for mini_ht
e2b5e7aa 1522static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1523{
1524 assert(offset<4096);
1525 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1526 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1527}
1528
e2b5e7aa 1529static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1530{
1531 u_int armval;
cfbd3c6e 1532 genimm_checked(imm,&armval);
b9b61529 1533 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1534 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1535}
1536
e2b5e7aa 1537static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1538{
1539 u_int armval;
1540 genimm_checked(imm,&armval);
1541 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1542 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1543}
1544
e2b5e7aa 1545static void emit_jno_unlikely(int a)
57871462 1546{
1547 //emit_jno(a);
1548 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1549 output_w32(0x72800000|rd_rn_rm(15,15,0));
1550}
1551
054175e9 1552static void save_regs_all(u_int reglist)
57871462 1553{
054175e9 1554 int i;
57871462 1555 if(!reglist) return;
1556 assem_debug("stmia fp,{");
054175e9 1557 for(i=0;i<16;i++)
1558 if(reglist&(1<<i))
1559 assem_debug("r%d,",i);
57871462 1560 assem_debug("}\n");
1561 output_w32(0xe88b0000|reglist);
1562}
e2b5e7aa 1563
054175e9 1564static void restore_regs_all(u_int reglist)
57871462 1565{
054175e9 1566 int i;
57871462 1567 if(!reglist) return;
1568 assem_debug("ldmia fp,{");
054175e9 1569 for(i=0;i<16;i++)
1570 if(reglist&(1<<i))
1571 assem_debug("r%d,",i);
57871462 1572 assem_debug("}\n");
1573 output_w32(0xe89b0000|reglist);
1574}
e2b5e7aa 1575
054175e9 1576// Save registers before function call
1577static void save_regs(u_int reglist)
1578{
4d646738 1579 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1580 save_regs_all(reglist);
1581}
e2b5e7aa 1582
054175e9 1583// Restore registers after function call
1584static void restore_regs(u_int reglist)
1585{
4d646738 1586 reglist&=CALLER_SAVE_REGS;
054175e9 1587 restore_regs_all(reglist);
1588}
57871462 1589
57871462 1590/* Stubs/epilogue */
1591
e2b5e7aa 1592static void literal_pool(int n)
57871462 1593{
1594 if(!literalcount) return;
1595 if(n) {
1596 if((int)out-literals[0][0]<4096-n) return;
1597 }
1598 u_int *ptr;
1599 int i;
1600 for(i=0;i<literalcount;i++)
1601 {
77750690 1602 u_int l_addr=(u_int)out;
1603 int j;
1604 for(j=0;j<i;j++) {
1605 if(literals[j][1]==literals[i][1]) {
1606 //printf("dup %08x\n",literals[i][1]);
1607 l_addr=literals[j][0];
1608 break;
1609 }
1610 }
57871462 1611 ptr=(u_int *)literals[i][0];
77750690 1612 u_int offset=l_addr-(u_int)ptr-8;
57871462 1613 assert(offset<4096);
1614 assert(!(offset&3));
1615 *ptr|=offset;
77750690 1616 if(l_addr==(u_int)out) {
1617 literals[i][0]=l_addr; // remember for dupes
1618 output_w32(literals[i][1]);
1619 }
57871462 1620 }
1621 literalcount=0;
1622}
1623
e2b5e7aa 1624static void literal_pool_jumpover(int n)
57871462 1625{
1626 if(!literalcount) return;
1627 if(n) {
1628 if((int)out-literals[0][0]<4096-n) return;
1629 }
df4dc2b1 1630 void *jaddr = out;
57871462 1631 emit_jmp(0);
1632 literal_pool(0);
df4dc2b1 1633 set_jump_target(jaddr, out);
57871462 1634}
1635
7c3a5182 1636// parsed by get_pointer, find_extjump_insn
1637static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1638{
1639 u_char *ptr=(u_char *)addr;
1640 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1641 (void)ptr;
1642
57871462 1643 emit_loadlp(target,0);
643aeae3 1644 emit_loadlp((u_int)addr,1);
d62c125a 1645 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1646 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1647//DEBUG >
1648#ifdef DEBUG_CYCLE_COUNT
643aeae3 1649 emit_readword(&last_count,ECX);
57871462 1650 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1651 emit_readword(&next_interupt,ECX);
1652 emit_writeword(HOST_CCREG,&Count);
57871462 1653 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1654 emit_writeword(ECX,&last_count);
57871462 1655#endif
1656//DEBUG <
2a014d73 1657 emit_far_jump(linker);
57871462 1658}
1659
d1e4ebd9 1660static void check_extjump2(void *src)
1661{
1662 u_int *ptr = src;
1663 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1664 (void)ptr;
1665}
1666
13e35c04 1667// put rt_val into rt, potentially making use of rs with value rs_val
1668static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1669{
8575a877 1670 u_int armval;
1671 int diff;
1672 if(genimm(rt_val,&armval)) {
1673 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1674 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1675 return;
1676 }
1677 if(genimm(~rt_val,&armval)) {
1678 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1679 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1680 return;
1681 }
1682 diff=rt_val-rs_val;
1683 if(genimm(diff,&armval)) {
1684 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1685 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1686 return;
1687 }else if(genimm(-diff,&armval)) {
1688 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1689 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1690 return;
1691 }
1692 emit_movimm(rt_val,rt);
1693}
1694
1695// return 1 if above function can do it's job cheaply
1696static int is_similar_value(u_int v1,u_int v2)
1697{
13e35c04 1698 u_int xs;
8575a877 1699 int diff;
1700 if(v1==v2) return 1;
1701 diff=v2-v1;
1702 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1703 ;
8575a877 1704 if(xs<0x100) return 1;
1705 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1706 ;
1707 if(xs<0x100) return 1;
1708 return 0;
13e35c04 1709}
cbbab9cd 1710
b14b6a8f 1711static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1712{
1713 switch(type) {
1714 case LOADB_STUB: emit_signextend8(rs,rt); break;
1715 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1716 case LOADH_STUB: emit_signextend16(rs,rt); break;
1717 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1718 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1719 default: assert(0);
1720 }
1721}
1722
b1be1eee 1723#include "pcsxmem.h"
1724#include "pcsxmem_inline.c"
b1be1eee 1725
e2b5e7aa 1726static void do_readstub(int n)
57871462 1727{
b14b6a8f 1728 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1729 literal_pool(256);
b14b6a8f 1730 set_jump_target(stubs[n].addr, out);
1731 enum stub_type type=stubs[n].type;
1732 int i=stubs[n].a;
1733 int rs=stubs[n].b;
81dbbf4c 1734 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1735 u_int reglist=stubs[n].e;
81dbbf4c 1736 const signed char *i_regmap=i_regs->regmap;
581335b0 1737 int rt;
cf95b4f0 1738 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1739 rt=get_reg(i_regmap,FTEMP);
1740 }else{
cf95b4f0 1741 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1742 }
1743 assert(rs>=0);
df4dc2b1 1744 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1745 void *restore_jump = NULL;
c6c3b1b3 1746 reglist|=(1<<rs);
1747 for(r=0;r<=12;r++) {
1748 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1749 temp=r; break;
1750 }
1751 }
cf95b4f0 1752 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1753 reglist&=~(1<<rt);
1754 if(temp==-1) {
1755 save_regs(reglist);
1756 regs_saved=1;
1757 temp=(rs==0)?2:0;
1758 }
1759 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1760 temp2=1;
643aeae3 1761 emit_readword(&mem_rtab,temp);
c6c3b1b3 1762 emit_shrimm(rs,12,temp2);
1763 emit_readword_dualindexedx4(temp,temp2,temp2);
1764 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1765 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1766 switch(type) {
1767 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1768 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1769 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1770 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1771 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1772 default: assert(0);
c6c3b1b3 1773 }
1774 }
1775 if(regs_saved) {
df4dc2b1 1776 restore_jump=out;
c6c3b1b3 1777 emit_jcc(0); // jump to reg restore
1778 }
1779 else
b14b6a8f 1780 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1781
1782 if(!regs_saved)
1783 save_regs(reglist);
643aeae3 1784 void *handler=NULL;
c6c3b1b3 1785 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1786 handler=jump_handler_read8;
c6c3b1b3 1787 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1788 handler=jump_handler_read16;
c6c3b1b3 1789 if(type==LOADW_STUB)
643aeae3 1790 handler=jump_handler_read32;
1791 assert(handler);
b96d3df7 1792 pass_args(rs,temp2);
c6c3b1b3 1793 int cc=get_reg(i_regmap,CCREG);
1794 if(cc<0)
1795 emit_loadreg(CCREG,2);
2330734f 1796 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1797 emit_far_call(handler);
cf95b4f0 1798 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1799 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1800 }
1801 if(restore_jump)
df4dc2b1 1802 set_jump_target(restore_jump, out);
c6c3b1b3 1803 restore_regs(reglist);
b14b6a8f 1804 emit_jmp(stubs[n].retaddr); // return address
57871462 1805}
1806
81dbbf4c 1807static void inline_readstub(enum stub_type type, int i, u_int addr,
1808 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1809{
1810 int rs=get_reg(regmap,target);
57871462 1811 int rt=get_reg(regmap,target);
535d208a 1812 if(rs<0) rs=get_reg(regmap,-1);
57871462 1813 assert(rs>=0);
2a014d73 1814 u_int is_dynamic;
687b4580 1815 uintptr_t host_addr = 0;
643aeae3 1816 void *handler;
b1be1eee 1817 int cc=get_reg(regmap,CCREG);
2330734f 1818 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
b1be1eee 1819 return;
643aeae3 1820 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1821 if (handler == NULL) {
cf95b4f0 1822 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1823 return;
13e35c04 1824 if(addr!=host_addr)
1825 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1826 switch(type) {
1827 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1828 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1829 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1830 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1831 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1832 default: assert(0);
1833 }
1834 return;
1835 }
b1be1eee 1836 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1837 if(is_dynamic) {
1838 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1839 handler=jump_handler_read8;
b1be1eee 1840 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1841 handler=jump_handler_read16;
b1be1eee 1842 if(type==LOADW_STUB)
643aeae3 1843 handler=jump_handler_read32;
b1be1eee 1844 }
c6c3b1b3 1845
1846 // call a memhandler
cf95b4f0 1847 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1848 reglist&=~(1<<rt);
1849 save_regs(reglist);
1850 if(target==0)
1851 emit_movimm(addr,0);
1852 else if(rs!=0)
1853 emit_mov(rs,0);
b1be1eee 1854 if(cc<0)
1855 emit_loadreg(CCREG,2);
1856 if(is_dynamic) {
1857 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1858 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1859 }
b1be1eee 1860 else {
643aeae3 1861 emit_readword(&last_count,3);
2330734f 1862 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1863 emit_add(2,3,2);
643aeae3 1864 emit_writeword(2,&Count);
b1be1eee 1865 }
1866
2a014d73 1867 emit_far_call(handler);
b1be1eee 1868
cf95b4f0 1869 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1870 switch(type) {
1871 case LOADB_STUB: emit_signextend8(0,rt); break;
1872 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1873 case LOADH_STUB: emit_signextend16(0,rt); break;
1874 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1875 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1876 default: assert(0);
1877 }
1878 }
1879 restore_regs(reglist);
57871462 1880}
1881
e2b5e7aa 1882static void do_writestub(int n)
57871462 1883{
b14b6a8f 1884 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1885 literal_pool(256);
b14b6a8f 1886 set_jump_target(stubs[n].addr, out);
1887 enum stub_type type=stubs[n].type;
1888 int i=stubs[n].a;
1889 int rs=stubs[n].b;
81dbbf4c 1890 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1891 u_int reglist=stubs[n].e;
81dbbf4c 1892 const signed char *i_regmap=i_regs->regmap;
581335b0 1893 int rt,r;
cf95b4f0 1894 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1895 rt=get_reg(i_regmap,r=FTEMP);
1896 }else{
cf95b4f0 1897 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1898 }
1899 assert(rs>=0);
1900 assert(rt>=0);
b14b6a8f 1901 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1902 void *restore_jump = NULL;
b96d3df7 1903 int reglist2=reglist|(1<<rs)|(1<<rt);
1904 for(rtmp=0;rtmp<=12;rtmp++) {
1905 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1906 temp=rtmp; break;
1907 }
1908 }
1909 if(temp==-1) {
1910 save_regs(reglist);
1911 regs_saved=1;
1912 for(rtmp=0;rtmp<=3;rtmp++)
1913 if(rtmp!=rs&&rtmp!=rt)
1914 {temp=rtmp;break;}
1915 }
1916 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1917 temp2=3;
643aeae3 1918 emit_readword(&mem_wtab,temp);
b96d3df7 1919 emit_shrimm(rs,12,temp2);
1920 emit_readword_dualindexedx4(temp,temp2,temp2);
1921 emit_lsls_imm(temp2,1,temp2);
1922 switch(type) {
1923 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1924 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1925 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1926 default: assert(0);
1927 }
1928 if(regs_saved) {
df4dc2b1 1929 restore_jump=out;
b96d3df7 1930 emit_jcc(0); // jump to reg restore
1931 }
1932 else
b14b6a8f 1933 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1934
1935 if(!regs_saved)
1936 save_regs(reglist);
643aeae3 1937 void *handler=NULL;
b96d3df7 1938 switch(type) {
643aeae3 1939 case STOREB_STUB: handler=jump_handler_write8; break;
1940 case STOREH_STUB: handler=jump_handler_write16; break;
1941 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1942 default: assert(0);
b96d3df7 1943 }
643aeae3 1944 assert(handler);
b96d3df7 1945 pass_args(rs,rt);
1946 if(temp2!=3)
1947 emit_mov(temp2,3);
1948 int cc=get_reg(i_regmap,CCREG);
1949 if(cc<0)
1950 emit_loadreg(CCREG,2);
2330734f 1951 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
b96d3df7 1952 // returns new cycle_count
2a014d73 1953 emit_far_call(handler);
2330734f 1954 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1955 if(cc<0)
1956 emit_storereg(CCREG,2);
1957 if(restore_jump)
df4dc2b1 1958 set_jump_target(restore_jump, out);
b96d3df7 1959 restore_regs(reglist);
b14b6a8f 1960 emit_jmp(stubs[n].retaddr);
57871462 1961}
1962
81dbbf4c 1963static void inline_writestub(enum stub_type type, int i, u_int addr,
1964 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1965{
1966 int rs=get_reg(regmap,-1);
57871462 1967 int rt=get_reg(regmap,target);
1968 assert(rs>=0);
1969 assert(rt>=0);
687b4580 1970 uintptr_t host_addr = 0;
643aeae3 1971 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1972 if (handler == NULL) {
13e35c04 1973 if(addr!=host_addr)
1974 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1975 switch(type) {
1976 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1977 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1978 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1979 default: assert(0);
1980 }
1981 return;
1982 }
1983
1984 // call a memhandler
1985 save_regs(reglist);
13e35c04 1986 pass_args(rs,rt);
b96d3df7 1987 int cc=get_reg(regmap,CCREG);
1988 if(cc<0)
1989 emit_loadreg(CCREG,2);
2330734f 1990 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1991 emit_movimm((u_int)handler,3);
b96d3df7 1992 // returns new cycle_count
2a014d73 1993 emit_far_call(jump_handler_write_h);
2330734f 1994 emit_addimm(0,-adj,cc<0?2:cc);
b96d3df7 1995 if(cc<0)
1996 emit_storereg(CCREG,2);
1997 restore_regs(reglist);
57871462 1998}
1999
d1e4ebd9 2000// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 2001static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 2002{
665f33e1 2003 #ifndef HAVE_ARMV7
7c3a5182 2004 emit_loadlp((int)source, 1);
2005 emit_loadlp((int)copy, 2);
3d680478 2006 emit_loadlp(source_len, 3);
57871462 2007 #else
7c3a5182 2008 emit_movw(((u_int)source)&0x0000FFFF, 1);
2009 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2010 emit_movt(((u_int)source)&0xFFFF0000, 1);
2011 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 2012 emit_movw(source_len, 3);
57871462 2013 #endif
7c3a5182 2014 emit_movimm(arg0, 0);
2015}
2016
3d680478 2017static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 2018{
2019 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 2020 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 2021 emit_far_call(verify_code);
df4dc2b1 2022 void *entry = out;
57871462 2023 load_regs_entry(i);
df4dc2b1 2024 if (entry == out)
2025 entry = instr_addr[i];
57871462 2026 emit_jmp(instr_addr[i]);
2027 return entry;
2028}
2029
3d680478 2030static void do_dirty_stub_ds(u_int source_len)
57871462 2031{
3d680478 2032 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 2033 emit_far_call(verify_code_ds);
57871462 2034}
2035
57871462 2036/* Special assem */
2037
81dbbf4c 2038static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2039{
2040 save_regs_all(reglist);
32631e6a 2041 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2042#ifdef PCNT
81dbbf4c 2043 emit_movimm(op, 0);
2a014d73 2044 emit_far_call(pcnt_gte_start);
82ed88eb 2045#endif
81dbbf4c 2046 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2047}
2048
2049static void c2op_epilogue(u_int op,u_int reglist)
2050{
82ed88eb 2051#ifdef PCNT
2052 emit_movimm(op,0);
2a014d73 2053 emit_far_call(pcnt_gte_end);
82ed88eb 2054#endif
054175e9 2055 restore_regs_all(reglist);
2056}
2057
6c0eefaf 2058static void c2op_call_MACtoIR(int lm,int need_flags)
2059{
2060 if(need_flags)
2a014d73 2061 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2062 else
2a014d73 2063 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2064}
2065
2066static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2067{
2a014d73 2068 emit_far_call(func);
6c0eefaf 2069 // func is C code and trashes r0
2070 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2071 if(need_flags||need_ir)
2072 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2073 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2074}
2075
81dbbf4c 2076static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2077{
81dbbf4c 2078 u_int c2op = source[i] & 0x3f;
2079 u_int reglist_full = get_host_reglist(i_regs->regmap);
2080 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2081 int need_flags, need_ir;
b9b61529 2082
2083 if (gte_handlers[c2op]!=NULL) {
bedfea38 2084 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2085 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2086 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2087 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2088 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2089 need_flags=0;
6c0eefaf 2090 int shift = (source[i] >> 19) & 1;
2091 int lm = (source[i] >> 10) & 1;
054175e9 2092 switch(c2op) {
19776aef 2093#ifndef DRC_DBG
054175e9 2094 case GTE_MVMVA: {
82336ba3 2095#ifdef HAVE_ARMV5
054175e9 2096 int v = (source[i] >> 15) & 3;
2097 int cv = (source[i] >> 13) & 3;
2098 int mx = (source[i] >> 17) & 3;
4d646738 2099 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2100 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2101 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2102 if(v<3)
2103 emit_ldrd(v*8,0,4);
2104 else {
2105 emit_movzwl_indexed(9*4,0,4); // gteIR
2106 emit_movzwl_indexed(10*4,0,6);
2107 emit_movzwl_indexed(11*4,0,5);
2108 emit_orrshl_imm(6,16,4);
2109 }
2110 if(mx<3)
2111 emit_addimm(0,32*4+mx*8*4,6);
2112 else
643aeae3 2113 emit_readword(&zeromem_ptr,6);
054175e9 2114 if(cv<3)
2115 emit_addimm(0,32*4+(cv*8+5)*4,7);
2116 else
643aeae3 2117 emit_readword(&zeromem_ptr,7);
054175e9 2118#ifdef __ARM_NEON__
2119 emit_movimm(source[i],1); // opcode
2a014d73 2120 emit_far_call(gteMVMVA_part_neon);
054175e9 2121 if(need_flags) {
2122 emit_movimm(lm,1);
2a014d73 2123 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2124 }
2125#else
2126 if(cv==3&&shift)
33788798 2127 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 2128 else {
2129 emit_movimm(shift,1);
33788798 2130 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 2131 }
6c0eefaf 2132 if(need_flags||need_ir)
2133 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2134#endif
2135#else /* if not HAVE_ARMV5 */
81dbbf4c 2136 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2137 emit_movimm(source[i],1); // opcode
643aeae3 2138 emit_writeword(1,&psxRegs.code);
2a014d73 2139 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2140#endif
2141 break;
2142 }
6c0eefaf 2143 case GTE_OP:
81dbbf4c 2144 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2145 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2146 if(need_flags||need_ir) {
2147 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2148 c2op_call_MACtoIR(lm,need_flags);
2149 }
2150 break;
2151 case GTE_DPCS:
81dbbf4c 2152 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2153 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2154 break;
2155 case GTE_INTPL:
81dbbf4c 2156 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2157 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2158 break;
2159 case GTE_SQR:
81dbbf4c 2160 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2161 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2162 if(need_flags||need_ir) {
2163 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2164 c2op_call_MACtoIR(lm,need_flags);
2165 }
2166 break;
2167 case GTE_DCPL:
81dbbf4c 2168 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2169 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2170 break;
2171 case GTE_GPF:
81dbbf4c 2172 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2173 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2174 break;
2175 case GTE_GPL:
81dbbf4c 2176 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2177 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2178 break;
19776aef 2179#endif
054175e9 2180 default:
81dbbf4c 2181 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2182#ifdef DRC_DBG
2183 emit_movimm(source[i],1); // opcode
643aeae3 2184 emit_writeword(1,&psxRegs.code);
19776aef 2185#endif
2a014d73 2186 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2187 break;
2188 }
2189 c2op_epilogue(c2op,reglist);
2190 }
b9b61529 2191}
2192
3968e69e 2193static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2194{
2195 //value = value & 0x7ffff000;
2196 //if (value & 0x7f87e000) value |= 0x80000000;
2197 emit_shrimm(sl,12,temp);
2198 emit_shlimm(temp,12,temp);
2199 emit_testimm(temp,0x7f000000);
2200 emit_testeqimm(temp,0x00870000);
2201 emit_testeqimm(temp,0x0000e000);
2202 emit_orrne_imm(temp,0x80000000,temp);
2203}
2204
2205static void do_mfc2_31_one(u_int copr,signed char temp)
2206{
2207 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2208 emit_lsls_imm(temp,16,temp);
2209 emit_cmovs_imm(0,temp);
2210 emit_cmpimm(temp,0xf80<<16);
2211 emit_andimm(temp,0xf80<<16,temp);
2212 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2213}
2214
2215static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2216{
2217 if (temp < 0) {
2218 host_tempreg_acquire();
2219 temp = HOST_TEMPREG;
2220 }
2221 do_mfc2_31_one(9,temp);
9c997d19 2222 emit_shrimm(temp,7+16,tl);
3968e69e 2223 do_mfc2_31_one(10,temp);
9c997d19 2224 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2225 do_mfc2_31_one(11,temp);
9c997d19 2226 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2227 emit_writeword(tl,&reg_cop2d[29]);
2228 if (temp == HOST_TEMPREG)
2229 host_tempreg_release();
2230}
2231
2330734f 2232static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2233{
2234 // case 0x18: MULT
2235 // case 0x19: MULTU
2236 // case 0x1A: DIV
2237 // case 0x1B: DIVU
2238 // case 0x1C: DMULT
2239 // case 0x1D: DMULTU
2240 // case 0x1E: DDIV
2241 // case 0x1F: DDIVU
cf95b4f0 2242 if(dops[i].rs1&&dops[i].rs2)
57871462 2243 {
cf95b4f0 2244 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2245 {
cf95b4f0 2246 if(dops[i].opcode2==0x18) // MULT
57871462 2247 {
cf95b4f0 2248 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2249 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2250 signed char hi=get_reg(i_regs->regmap,HIREG);
2251 signed char lo=get_reg(i_regs->regmap,LOREG);
2252 assert(m1>=0);
2253 assert(m2>=0);
2254 assert(hi>=0);
2255 assert(lo>=0);
2256 emit_smull(m1,m2,hi,lo);
2257 }
cf95b4f0 2258 if(dops[i].opcode2==0x19) // MULTU
57871462 2259 {
cf95b4f0 2260 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2261 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2262 signed char hi=get_reg(i_regs->regmap,HIREG);
2263 signed char lo=get_reg(i_regs->regmap,LOREG);
2264 assert(m1>=0);
2265 assert(m2>=0);
2266 assert(hi>=0);
2267 assert(lo>=0);
2268 emit_umull(m1,m2,hi,lo);
2269 }
cf95b4f0 2270 if(dops[i].opcode2==0x1A) // DIV
57871462 2271 {
cf95b4f0 2272 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2273 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2274 assert(d1>=0);
2275 assert(d2>=0);
2276 signed char quotient=get_reg(i_regs->regmap,LOREG);
2277 signed char remainder=get_reg(i_regs->regmap,HIREG);
2278 assert(quotient>=0);
2279 assert(remainder>=0);
2280 emit_movs(d1,remainder);
44a80f6a 2281 emit_movimm(0xffffffff,quotient);
2282 emit_negmi(quotient,quotient); // .. quotient and ..
2283 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2284 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2285 emit_jeq(out+52); // Division by zero
82336ba3 2286 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2287#ifdef HAVE_ARMV5
57871462 2288 emit_clz(HOST_TEMPREG,quotient);
2289 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2290#else
2291 emit_movimm(0,quotient);
2292 emit_addpl_imm(quotient,1,quotient);
2293 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2294 emit_jns(out-2*4);
665f33e1 2295#endif
57871462 2296 emit_orimm(quotient,1<<31,quotient);
2297 emit_shr(quotient,quotient,quotient);
2298 emit_cmp(remainder,HOST_TEMPREG);
2299 emit_subcs(remainder,HOST_TEMPREG,remainder);
2300 emit_adcs(quotient,quotient,quotient);
2301 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2302 emit_jcc(out-16); // -4
57871462 2303 emit_teq(d1,d2);
2304 emit_negmi(quotient,quotient);
2305 emit_test(d1,d1);
2306 emit_negmi(remainder,remainder);
2307 }
cf95b4f0 2308 if(dops[i].opcode2==0x1B) // DIVU
57871462 2309 {
cf95b4f0 2310 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2311 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2312 assert(d1>=0);
2313 assert(d2>=0);
2314 signed char quotient=get_reg(i_regs->regmap,LOREG);
2315 signed char remainder=get_reg(i_regs->regmap,HIREG);
2316 assert(quotient>=0);
2317 assert(remainder>=0);
44a80f6a 2318 emit_mov(d1,remainder);
2319 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2320 emit_test(d2,d2);
7c3a5182 2321 emit_jeq(out+40); // Division by zero
665f33e1 2322#ifdef HAVE_ARMV5
57871462 2323 emit_clz(d2,HOST_TEMPREG);
2324 emit_movimm(1<<31,quotient);
2325 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2326#else
2327 emit_movimm(0,HOST_TEMPREG);
82336ba3 2328 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2329 emit_lslpls_imm(d2,1,d2);
7c3a5182 2330 emit_jns(out-2*4);
665f33e1 2331 emit_movimm(1<<31,quotient);
2332#endif
57871462 2333 emit_shr(quotient,HOST_TEMPREG,quotient);
2334 emit_cmp(remainder,d2);
2335 emit_subcs(remainder,d2,remainder);
2336 emit_adcs(quotient,quotient,quotient);
2337 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2338 emit_jcc(out-16); // -4
57871462 2339 }
2340 }
2341 else // 64-bit
71e490c5 2342 assert(0);
57871462 2343 }
2344 else
2345 {
2346 // Multiply by zero is zero.
2347 // MIPS does not have a divide by zero exception.
2348 // The result is undefined, we return zero.
2349 signed char hr=get_reg(i_regs->regmap,HIREG);
2350 signed char lr=get_reg(i_regs->regmap,LOREG);
2351 if(hr>=0) emit_zeroreg(hr);
2352 if(lr>=0) emit_zeroreg(lr);
2353 }
2354}
2355#define multdiv_assemble multdiv_assemble_arm
2356
d1e4ebd9 2357static void do_jump_vaddr(int rs)
2358{
2a014d73 2359 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2360}
2361
e2b5e7aa 2362static void do_preload_rhash(int r) {
57871462 2363 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2364 // register. On ARM the hash can be done with a single instruction (below)
2365}
2366
e2b5e7aa 2367static void do_preload_rhtbl(int ht) {
57871462 2368 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2369}
2370
e2b5e7aa 2371static void do_rhash(int rs,int rh) {
57871462 2372 emit_andimm(rs,0xf8,rh);
2373}
2374
e2b5e7aa 2375static void do_miniht_load(int ht,int rh) {
57871462 2376 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2377 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2378}
2379
e2b5e7aa 2380static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2381 emit_cmp(rh,rs);
2382 emit_ldreq_indexed(ht,4,15);
2383 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2384 if(rs!=7)
2385 emit_mov(rs,7);
2386 rs=7;
57871462 2387 #endif
d1e4ebd9 2388 do_jump_vaddr(rs);
57871462 2389}
2390
e2b5e7aa 2391static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2392 #ifndef HAVE_ARMV7
57871462 2393 emit_movimm(return_address,rt); // PC into link register
643aeae3 2394 add_to_linker(out,return_address,1);
57871462 2395 emit_pcreladdr(temp);
643aeae3 2396 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2397 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2398 #else
2399 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2400 add_to_linker(out,return_address,1);
57871462 2401 emit_pcreladdr(temp);
643aeae3 2402 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2403 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2404 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2405 #endif
2406}
2407
57871462 2408// CPU-architecture-specific initialization
2a014d73 2409static void arch_init(void)
2410{
2411 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2412 struct tramp_insns *ops = ndrc->tramp.ops;
2413 size_t i;
2414 assert(!(diff & 3));
2415 assert(diff < 0x1000);
2416 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2417 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2418 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2419 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2420}
b9b61529 2421
2422// vim:shiftwidth=2:expandtab