drc: prefer callee-saved regs on alloc
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
d148d265 104static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 105
57871462 106/* Linker */
107
df4dc2b1 108static void set_jump_target(void *addr, void *target_)
57871462 109{
df4dc2b1 110 u_int target = (u_int)target_;
111 u_char *ptr = addr;
57871462 112 u_int *ptr2=(u_int *)ptr;
113 if(ptr[3]==0xe2) {
114 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 115 assert(((uintptr_t)addr&3)==0);
57871462 116 assert((target&3)==0);
117 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 118 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 119 }
120 else if(ptr[3]==0x72) {
121 // generated by emit_jno_unlikely
122 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 123 assert(((uintptr_t)addr&3)==0);
57871462 124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 }
127 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 128 assert(((uintptr_t)addr&3)==0);
57871462 129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
131 }
132 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134 else {
135 assert((ptr[3]&0x0e)==0xa);
136 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138}
139
140// This optionally copies the instruction from the target of the branch into
141// the space before the branch. Works, but the difference in speed is
142// usually insignificant.
e2b5e7aa 143#if 0
144static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 145{
146 u_char *ptr=(u_char *)addr;
147 u_int *ptr2=(u_int *)ptr;
148 assert(!copy||ptr2[-1]==0xe28dd000);
149 if(ptr[3]==0xe2) {
150 assert(!copy);
151 assert((target-(u_int)ptr2-8)<4096);
152 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
153 }
154 else {
155 assert((ptr[3]&0x0e)==0xa);
156 u_int target_insn=*(u_int *)target;
157 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
158 copy=0;
159 }
160 if((target_insn&0x0c100000)==0x04100000) { // Load
161 copy=0;
162 }
163 if(target_insn&0x08000000) {
164 copy=0;
165 }
166 if(copy) {
167 ptr2[-1]=target_insn;
168 target+=4;
169 }
170 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
171 }
172}
e2b5e7aa 173#endif
57871462 174
175/* Literal pool */
e2b5e7aa 176static void add_literal(int addr,int val)
57871462 177{
15776b68 178 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 179 literals[literalcount][0]=addr;
180 literals[literalcount][1]=val;
9f51b4b9 181 literalcount++;
182}
57871462 183
d148d265 184// from a pointer to external jump stub (which was produced by emit_extjump2)
185// find where the jumping insn is
186static void *find_extjump_insn(void *stub)
57871462 187{
188 int *ptr=(int *)(stub+4);
d148d265 189 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 190 u_int offset=*ptr&0xfff;
d148d265 191 void **l_ptr=(void *)ptr+offset+8;
192 return *l_ptr;
57871462 193}
194
f968d35d 195// find where external branch is liked to using addr of it's stub:
196// get address that insn one after stub loads (dyna_linker arg1),
197// treat it as a pointer to branch insn,
198// return addr where that branch jumps to
643aeae3 199static void *get_pointer(void *stub)
57871462 200{
201 //printf("get_pointer(%x)\n",(int)stub);
d148d265 202 int *i_ptr=find_extjump_insn(stub);
3d680478 203 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 204 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
df4dc2b1 209static void *get_clean_addr(void *addr)
57871462 210{
df4dc2b1 211 signed int *ptr = addr;
665f33e1 212 #ifndef HAVE_ARMV7
57871462 213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 221 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 222 }
df4dc2b1 223 return ptr;
57871462 224}
225
3968e69e 226static int verify_dirty(const u_int *ptr)
57871462 227{
665f33e1 228 #ifndef HAVE_ARMV7
16c8be17 229 u_int offset;
57871462 230 // get from literal pool
15776b68 231 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 232 offset=*ptr&0xfff;
233 u_int source=*(u_int*)((void *)ptr+offset+8);
234 ptr++;
235 assert((*ptr&0xFFFF0000)==0xe59f0000);
236 offset=*ptr&0xfff;
237 u_int copy=*(u_int*)((void *)ptr+offset+8);
238 ptr++;
239 assert((*ptr&0xFFFF0000)==0xe59f0000);
240 offset=*ptr&0xfff;
241 u_int len=*(u_int*)((void *)ptr+offset+8);
242 ptr++;
243 ptr++;
57871462 244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 254 //printf("verify_dirty: %x %x %x\n",source,copy,len);
255 return !memcmp((void *)source,(void *)copy,len);
256}
257
258// This doesn't necessarily find all clean entry points, just
259// guarantees that it's not dirty
df4dc2b1 260static int isclean(void *addr)
57871462 261{
665f33e1 262 #ifndef HAVE_ARMV7
581335b0 263 u_int *ptr=((u_int *)addr)+4;
57871462 264 #else
581335b0 265 u_int *ptr=((u_int *)addr)+6;
57871462 266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 270 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
271 return 1;
272}
273
4a35de07 274// get source that block at addr was compiled from (host pointers)
01d26796 275static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 276{
643aeae3 277 u_int *ptr = addr;
665f33e1 278 #ifndef HAVE_ARMV7
16c8be17 279 u_int offset;
57871462 280 // get from literal pool
15776b68 281 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 282 offset=*ptr&0xfff;
283 u_int source=*(u_int*)((void *)ptr+offset+8);
284 ptr++;
285 //assert((*ptr&0xFFFF0000)==0xe59f0000);
286 //offset=*ptr&0xfff;
287 //u_int copy=*(u_int*)((void *)ptr+offset+8);
288 ptr++;
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 offset=*ptr&0xfff;
291 u_int len=*(u_int*)((void *)ptr+offset+8);
292 ptr++;
293 ptr++;
57871462 294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 304 *start=(u_char *)source;
305 *end=(u_char *)source+len;
57871462 306}
307
57871462 308// Allocate a specific ARM register.
e2b5e7aa 309static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 310{
311 int n;
f776eb14 312 int dirty=0;
9f51b4b9 313
57871462 314 // see if it's already allocated (and dealloc it)
315 for(n=0;n<HOST_REGS;n++)
316 {
f776eb14 317 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
318 dirty=(cur->dirty>>n)&1;
319 cur->regmap[n]=-1;
320 }
57871462 321 }
9f51b4b9 322
57871462 323 cur->regmap[hr]=reg;
324 cur->dirty&=~(1<<hr);
f776eb14 325 cur->dirty|=dirty<<hr;
57871462 326 cur->isconst&=~(1<<hr);
327}
328
329// Alloc cycle count into dedicated register
e2b5e7aa 330static void alloc_cc(struct regstat *cur,int i)
57871462 331{
332 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
333}
334
57871462 335/* Assembler */
336
e2b5e7aa 337static unused char regname[16][4] = {
57871462 338 "r0",
339 "r1",
340 "r2",
341 "r3",
342 "r4",
343 "r5",
344 "r6",
345 "r7",
346 "r8",
347 "r9",
348 "r10",
349 "fp",
350 "r12",
351 "sp",
352 "lr",
353 "pc"};
354
e2b5e7aa 355static void output_w32(u_int word)
57871462 356{
357 *((u_int *)out)=word;
358 out+=4;
359}
e2b5e7aa 360
361static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 362{
363 assert(rd<16);
364 assert(rn<16);
365 assert(rm<16);
366 return((rn<<16)|(rd<<12)|rm);
367}
e2b5e7aa 368
369static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 370{
371 assert(rd<16);
372 assert(rn<16);
373 assert(imm<256);
374 assert((shift&1)==0);
375 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
376}
e2b5e7aa 377
378static u_int genimm(u_int imm,u_int *encoded)
57871462 379{
c2e3bd42 380 *encoded=0;
381 if(imm==0) return 1;
57871462 382 int i=32;
383 while(i>0)
384 {
385 if(imm<256) {
386 *encoded=((i&30)<<7)|imm;
387 return 1;
388 }
389 imm=(imm>>2)|(imm<<30);i-=2;
390 }
391 return 0;
392}
e2b5e7aa 393
394static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 395{
396 u_int ret=genimm(imm,encoded);
397 assert(ret);
581335b0 398 (void)ret;
cfbd3c6e 399}
e2b5e7aa 400
401static u_int genjmp(u_int addr)
57871462 402{
7c3a5182 403 if (addr < 3) return 0; // a branch that will be patched later
404 int offset = addr-(int)out-8;
405 if (offset < -33554432 || offset >= 33554432) {
406 SysPrintf("genjmp: out of range: %08x\n", offset);
407 abort();
e80343e2 408 return 0;
409 }
57871462 410 return ((u_int)offset>>2)&0xffffff;
411}
412
d1e4ebd9 413static unused void emit_breakpoint(void)
414{
415 assem_debug("bkpt #0\n");
416 //output_w32(0xe1200070);
417 output_w32(0xe7f001f0);
418}
419
e2b5e7aa 420static void emit_mov(int rs,int rt)
57871462 421{
422 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
423 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
424}
425
e2b5e7aa 426static void emit_movs(int rs,int rt)
57871462 427{
428 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_add(int rs1,int rs2,int rt)
57871462 433{
434 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
435 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
436}
437
39b71d9a 438static void emit_adds(int rs1,int rs2,int rt)
439{
440 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
442}
443#define emit_adds_ptr emit_adds
444
e2b5e7aa 445static void emit_adcs(int rs1,int rs2,int rt)
57871462 446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
e2b5e7aa 451static void emit_neg(int rs, int rt)
57871462 452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
e2b5e7aa 457static void emit_sub(int rs1,int rs2,int rt)
57871462 458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
e2b5e7aa 463static void emit_zeroreg(int rt)
57871462 464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
e2b5e7aa 469static void emit_loadlp(u_int imm,u_int rt)
790ee18e 470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
e2b5e7aa 475
476static void emit_movw(u_int imm,u_int rt)
790ee18e 477{
478 assert(imm<65536);
479 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
480 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
481}
e2b5e7aa 482
483static void emit_movt(u_int imm,u_int rt)
790ee18e 484{
485 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
486 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
487}
e2b5e7aa 488
489static void emit_movimm(u_int imm,u_int rt)
790ee18e 490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
665f33e1 499 #ifndef HAVE_ARMV7
790ee18e 500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
665f33e1 508 #ifndef HAVE_ARMV7
790ee18e 509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
e2b5e7aa 516
517static void emit_pcreladdr(u_int rt)
790ee18e 518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
e2b5e7aa 523static void emit_loadreg(int r, int hr)
57871462 524{
3d624f89 525 if(r&64) {
c43b5311 526 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 527 assert(0);
528 return;
3d624f89 529 }
57871462 530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
7c3a5182 533 int addr = (int)&psxRegs.GPR.r[r];
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = (int)&cycle_count; break;
538 case CSREG: addr = (int)&Status; break;
539 case INVCP: addr = (int)&invc_ptr; break;
37387d8b 540 case ROREG: addr = (int)&ram_offset; break;
7c3a5182 541 default: assert(r < 34); break;
542 }
57871462 543 u_int offset = addr-(u_int)&dynarec_local;
544 assert(offset<4096);
545 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
546 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
547 }
548}
e2b5e7aa 549
550static void emit_storereg(int r, int hr)
57871462 551{
3d624f89 552 if(r&64) {
c43b5311 553 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 554 assert(0);
555 return;
3d624f89 556 }
7c3a5182 557 int addr = (int)&psxRegs.GPR.r[r];
558 switch (r) {
559 //case HIREG: addr = &hi; break;
560 //case LOREG: addr = &lo; break;
561 case CCREG: addr = (int)&cycle_count; break;
562 default: assert(r < 34); break;
563 }
57871462 564 u_int offset = addr-(u_int)&dynarec_local;
565 assert(offset<4096);
566 assem_debug("str %s,fp+%d\n",regname[hr],offset);
567 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
568}
569
e2b5e7aa 570static void emit_test(int rs, int rt)
57871462 571{
572 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
573 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
574}
575
e2b5e7aa 576static void emit_testimm(int rs,int imm)
57871462 577{
578 u_int armval;
5a05d80c 579 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 580 genimm_checked(imm,&armval);
57871462 581 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
582}
583
e2b5e7aa 584static void emit_testeqimm(int rs,int imm)
b9b61529 585{
586 u_int armval;
587 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 588 genimm_checked(imm,&armval);
b9b61529 589 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
590}
591
e2b5e7aa 592static void emit_not(int rs,int rt)
57871462 593{
594 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
595 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
596}
597
e2b5e7aa 598static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 599{
600 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
601 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
602}
603
e2b5e7aa 604static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 605{
606 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
608}
e2b5e7aa 609
e2b5e7aa 610static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 611{
612 assert(rs<16);
613 assert(rt<16);
614 assert(imm<32);
615 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
616 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
617}
618
e2b5e7aa 619static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 620{
621 assert(rs<16);
622 assert(rt<16);
623 assert(imm<32);
624 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
625 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
626}
627
e2b5e7aa 628static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 629{
630 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
631 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
632}
633
3968e69e 634static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
635{
636 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
637 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
638}
639
e2b5e7aa 640static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 641{
642 assert(rs<16);
643 assert(rt<16);
644 if(imm!=0) {
57871462 645 u_int armval;
646 if(genimm(imm,&armval)) {
647 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
648 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
649 }else if(genimm(-imm,&armval)) {
8a0a8423 650 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 651 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 652 #ifdef HAVE_ARMV7
653 }else if(rt!=rs&&(u_int)imm<65536) {
654 emit_movw(imm&0x0000ffff,rt);
655 emit_add(rs,rt,rt);
656 }else if(rt!=rs&&(u_int)-imm<65536) {
657 emit_movw(-imm&0x0000ffff,rt);
658 emit_sub(rs,rt,rt);
659 #endif
660 }else if((u_int)-imm<65536) {
57871462 661 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
662 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
663 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
664 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 665 }else {
666 do {
667 int shift = (ffs(imm) - 1) & ~1;
668 int imm8 = imm & (0xff << shift);
669 genimm_checked(imm8,&armval);
670 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
671 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
672 rs = rt;
673 imm &= ~imm8;
674 }
675 while (imm != 0);
57871462 676 }
677 }
678 else if(rs!=rt) emit_mov(rs,rt);
679}
680
e2b5e7aa 681static void emit_addimm_and_set_flags(int imm,int rt)
57871462 682{
683 assert(imm>-65536&&imm<65536);
684 u_int armval;
685 if(genimm(imm,&armval)) {
686 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
687 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
688 }else if(genimm(-imm,&armval)) {
689 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
690 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
691 }else if(imm<0) {
692 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
693 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
694 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
695 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
696 }else{
697 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
698 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
699 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
700 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
701 }
702}
e2b5e7aa 703
e2b5e7aa 704static void emit_addnop(u_int r)
57871462 705{
706 assert(r<16);
707 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
708 output_w32(0xe2800000|rd_rn_rm(r,r,0));
709}
710
e2b5e7aa 711static void emit_andimm(int rs,int imm,int rt)
57871462 712{
713 u_int armval;
790ee18e 714 if(imm==0) {
715 emit_zeroreg(rt);
716 }else if(genimm(imm,&armval)) {
57871462 717 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
718 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
719 }else if(genimm(~imm,&armval)) {
720 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(imm==65535) {
332a4533 723 #ifndef HAVE_ARMV6
57871462 724 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
725 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
726 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
727 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
728 #else
729 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
730 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
731 #endif
732 }else{
733 assert(imm>0&&imm<65535);
665f33e1 734 #ifndef HAVE_ARMV7
57871462 735 assem_debug("mov r14,#%d\n",imm&0xFF00);
736 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
737 assem_debug("add r14,r14,#%d\n",imm&0xFF);
738 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
739 #else
740 emit_movw(imm,HOST_TEMPREG);
741 #endif
742 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
743 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
744 }
745}
746
e2b5e7aa 747static void emit_orimm(int rs,int imm,int rt)
57871462 748{
749 u_int armval;
790ee18e 750 if(imm==0) {
751 if(rs!=rt) emit_mov(rs,rt);
752 }else if(genimm(imm,&armval)) {
57871462 753 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
754 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
755 }else{
756 assert(imm>0&&imm<65536);
757 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
758 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
759 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
760 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
761 }
762}
763
e2b5e7aa 764static void emit_xorimm(int rs,int imm,int rt)
57871462 765{
57871462 766 u_int armval;
790ee18e 767 if(imm==0) {
768 if(rs!=rt) emit_mov(rs,rt);
769 }else if(genimm(imm,&armval)) {
57871462 770 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
771 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
772 }else{
514ed0d9 773 assert(imm>0&&imm<65536);
57871462 774 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
775 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
776 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
777 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
778 }
779}
780
e2b5e7aa 781static void emit_shlimm(int rs,u_int imm,int rt)
57871462 782{
783 assert(imm>0);
784 assert(imm<32);
785 //if(imm==1) ...
786 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
787 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
788}
789
e2b5e7aa 790static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 791{
792 assert(imm>0);
793 assert(imm<32);
794 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
795 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
796}
797
e2b5e7aa 798static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 799{
800 assert(imm>0);
801 assert(imm<32);
802 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
803 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
804}
805
e2b5e7aa 806static void emit_shrimm(int rs,u_int imm,int rt)
57871462 807{
808 assert(imm>0);
809 assert(imm<32);
810 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
811 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
812}
813
e2b5e7aa 814static void emit_sarimm(int rs,u_int imm,int rt)
57871462 815{
816 assert(imm>0);
817 assert(imm<32);
818 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
819 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
820}
821
e2b5e7aa 822static void emit_rorimm(int rs,u_int imm,int rt)
57871462 823{
824 assert(imm>0);
825 assert(imm<32);
826 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
827 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
828}
829
e2b5e7aa 830static void emit_signextend16(int rs,int rt)
b9b61529 831{
332a4533 832 #ifndef HAVE_ARMV6
b9b61529 833 emit_shlimm(rs,16,rt);
834 emit_sarimm(rt,16,rt);
835 #else
836 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
837 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
838 #endif
839}
840
e2b5e7aa 841static void emit_signextend8(int rs,int rt)
c6c3b1b3 842{
332a4533 843 #ifndef HAVE_ARMV6
c6c3b1b3 844 emit_shlimm(rs,24,rt);
845 emit_sarimm(rt,24,rt);
846 #else
847 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
e2b5e7aa 852static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 853{
854 assert(rs<16);
855 assert(rt<16);
856 assert(shift<16);
857 //if(imm==1) ...
858 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
859 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
860}
e2b5e7aa 861
862static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 863{
864 assert(rs<16);
865 assert(rt<16);
866 assert(shift<16);
867 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
868 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
869}
e2b5e7aa 870
871static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 872{
873 assert(rs<16);
874 assert(rt<16);
875 assert(shift<16);
876 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
877 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
878}
57871462 879
3968e69e 880static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 881{
882 assert(rs<16);
883 assert(rt<16);
884 assert(shift<16);
885 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
886 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
887}
e2b5e7aa 888
3968e69e 889static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 890{
891 assert(rs<16);
892 assert(rt<16);
893 assert(shift<16);
894 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
895 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
896}
897
e2b5e7aa 898static void emit_cmpimm(int rs,int imm)
57871462 899{
900 u_int armval;
901 if(genimm(imm,&armval)) {
5a05d80c 902 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 903 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
904 }else if(genimm(-imm,&armval)) {
5a05d80c 905 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 906 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
907 }else if(imm>0) {
908 assert(imm<65536);
57871462 909 emit_movimm(imm,HOST_TEMPREG);
57871462 910 assem_debug("cmp %s,r14\n",regname[rs]);
911 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
912 }else{
913 assert(imm>-65536);
57871462 914 emit_movimm(-imm,HOST_TEMPREG);
57871462 915 assem_debug("cmn %s,r14\n",regname[rs]);
916 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
917 }
918}
919
e2b5e7aa 920static void emit_cmovne_imm(int imm,int rt)
57871462 921{
922 assem_debug("movne %s,#%d\n",regname[rt],imm);
923 u_int armval;
cfbd3c6e 924 genimm_checked(imm,&armval);
57871462 925 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
926}
e2b5e7aa 927
928static void emit_cmovl_imm(int imm,int rt)
57871462 929{
930 assem_debug("movlt %s,#%d\n",regname[rt],imm);
931 u_int armval;
cfbd3c6e 932 genimm_checked(imm,&armval);
57871462 933 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
934}
e2b5e7aa 935
936static void emit_cmovb_imm(int imm,int rt)
57871462 937{
938 assem_debug("movcc %s,#%d\n",regname[rt],imm);
939 u_int armval;
cfbd3c6e 940 genimm_checked(imm,&armval);
57871462 941 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
942}
e2b5e7aa 943
3968e69e 944static void emit_cmovae_imm(int imm,int rt)
945{
946 assem_debug("movcs %s,#%d\n",regname[rt],imm);
947 u_int armval;
948 genimm_checked(imm,&armval);
949 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
950}
951
e2b5e7aa 952static void emit_cmovne_reg(int rs,int rt)
57871462 953{
954 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
955 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
956}
e2b5e7aa 957
958static void emit_cmovl_reg(int rs,int rt)
57871462 959{
960 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
961 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
962}
e2b5e7aa 963
e3c6bdb5 964static void emit_cmovb_reg(int rs,int rt)
965{
966 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
967 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
968}
969
e2b5e7aa 970static void emit_cmovs_reg(int rs,int rt)
57871462 971{
972 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
973 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
974}
975
e2b5e7aa 976static void emit_slti32(int rs,int imm,int rt)
57871462 977{
978 if(rs!=rt) emit_zeroreg(rt);
979 emit_cmpimm(rs,imm);
980 if(rs==rt) emit_movimm(0,rt);
981 emit_cmovl_imm(1,rt);
982}
e2b5e7aa 983
984static void emit_sltiu32(int rs,int imm,int rt)
57871462 985{
986 if(rs!=rt) emit_zeroreg(rt);
987 emit_cmpimm(rs,imm);
988 if(rs==rt) emit_movimm(0,rt);
989 emit_cmovb_imm(1,rt);
990}
e2b5e7aa 991
e2b5e7aa 992static void emit_cmp(int rs,int rt)
57871462 993{
994 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
995 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
996}
e2b5e7aa 997
998static void emit_set_gz32(int rs, int rt)
57871462 999{
1000 //assem_debug("set_gz32\n");
1001 emit_cmpimm(rs,1);
1002 emit_movimm(1,rt);
1003 emit_cmovl_imm(0,rt);
1004}
e2b5e7aa 1005
1006static void emit_set_nz32(int rs, int rt)
57871462 1007{
1008 //assem_debug("set_nz32\n");
1009 if(rs!=rt) emit_movs(rs,rt);
1010 else emit_test(rs,rs);
1011 emit_cmovne_imm(1,rt);
1012}
e2b5e7aa 1013
e2b5e7aa 1014static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1015{
1016 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1017 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1018 emit_cmp(rs1,rs2);
1019 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1020 emit_cmovl_imm(1,rt);
1021}
e2b5e7aa 1022
1023static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1024{
1025 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1026 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1027 emit_cmp(rs1,rs2);
1028 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1029 emit_cmovb_imm(1,rt);
1030}
e2b5e7aa 1031
2a014d73 1032static int can_jump_or_call(const void *a)
1033{
1034 intptr_t offset = (u_char *)a - out - 8;
1035 return (-33554432 <= offset && offset < 33554432);
1036}
1037
643aeae3 1038static void emit_call(const void *a_)
57871462 1039{
643aeae3 1040 int a = (int)a_;
d1e4ebd9 1041 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1042 u_int offset=genjmp(a);
1043 output_w32(0xeb000000|offset);
1044}
e2b5e7aa 1045
b14b6a8f 1046static void emit_jmp(const void *a_)
57871462 1047{
b14b6a8f 1048 int a = (int)a_;
d1e4ebd9 1049 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1050 u_int offset=genjmp(a);
1051 output_w32(0xea000000|offset);
1052}
e2b5e7aa 1053
643aeae3 1054static void emit_jne(const void *a_)
57871462 1055{
643aeae3 1056 int a = (int)a_;
57871462 1057 assem_debug("bne %x\n",a);
1058 u_int offset=genjmp(a);
1059 output_w32(0x1a000000|offset);
1060}
e2b5e7aa 1061
7c3a5182 1062static void emit_jeq(const void *a_)
57871462 1063{
7c3a5182 1064 int a = (int)a_;
57871462 1065 assem_debug("beq %x\n",a);
1066 u_int offset=genjmp(a);
1067 output_w32(0x0a000000|offset);
1068}
e2b5e7aa 1069
7c3a5182 1070static void emit_js(const void *a_)
57871462 1071{
7c3a5182 1072 int a = (int)a_;
57871462 1073 assem_debug("bmi %x\n",a);
1074 u_int offset=genjmp(a);
1075 output_w32(0x4a000000|offset);
1076}
e2b5e7aa 1077
7c3a5182 1078static void emit_jns(const void *a_)
57871462 1079{
7c3a5182 1080 int a = (int)a_;
57871462 1081 assem_debug("bpl %x\n",a);
1082 u_int offset=genjmp(a);
1083 output_w32(0x5a000000|offset);
1084}
e2b5e7aa 1085
7c3a5182 1086static void emit_jl(const void *a_)
57871462 1087{
7c3a5182 1088 int a = (int)a_;
57871462 1089 assem_debug("blt %x\n",a);
1090 u_int offset=genjmp(a);
1091 output_w32(0xba000000|offset);
1092}
e2b5e7aa 1093
7c3a5182 1094static void emit_jge(const void *a_)
57871462 1095{
7c3a5182 1096 int a = (int)a_;
57871462 1097 assem_debug("bge %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0xaa000000|offset);
1100}
e2b5e7aa 1101
7c3a5182 1102static void emit_jno(const void *a_)
57871462 1103{
7c3a5182 1104 int a = (int)a_;
57871462 1105 assem_debug("bvc %x\n",a);
1106 u_int offset=genjmp(a);
1107 output_w32(0x7a000000|offset);
1108}
e2b5e7aa 1109
7c3a5182 1110static void emit_jc(const void *a_)
57871462 1111{
7c3a5182 1112 int a = (int)a_;
57871462 1113 assem_debug("bcs %x\n",a);
1114 u_int offset=genjmp(a);
1115 output_w32(0x2a000000|offset);
1116}
e2b5e7aa 1117
7c3a5182 1118static void emit_jcc(const void *a_)
57871462 1119{
b14b6a8f 1120 int a = (int)a_;
57871462 1121 assem_debug("bcc %x\n",a);
1122 u_int offset=genjmp(a);
1123 output_w32(0x3a000000|offset);
1124}
1125
3968e69e 1126static unused void emit_callreg(u_int r)
57871462 1127{
c6c3b1b3 1128 assert(r<15);
1129 assem_debug("blx %s\n",regname[r]);
1130 output_w32(0xe12fff30|r);
57871462 1131}
e2b5e7aa 1132
1133static void emit_jmpreg(u_int r)
57871462 1134{
1135 assem_debug("mov pc,%s\n",regname[r]);
1136 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1137}
1138
be516ebe 1139static void emit_ret(void)
1140{
1141 emit_jmpreg(14);
1142}
1143
e2b5e7aa 1144static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1145{
1146 assert(offset>-4096&&offset<4096);
1147 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1148 if(offset>=0) {
1149 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1150 }else{
1151 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1152 }
1153}
e2b5e7aa 1154
1155static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1156{
1157 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1158 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1159}
39b71d9a 1160#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1161
1162static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1163{
1164 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1165 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1166}
e2b5e7aa 1167
1168static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1169{
1170 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1172}
e2b5e7aa 1173
37387d8b 1174static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1175{
1176 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1178}
1179
e2b5e7aa 1180static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1181{
1182 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1183 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1184}
e2b5e7aa 1185
37387d8b 1186static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1187{
1188 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1190}
1191
e2b5e7aa 1192static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1193{
1194 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1196}
e2b5e7aa 1197
37387d8b 1198static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1199{
1200 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1201 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1202}
1203
e2b5e7aa 1204static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1205{
1206 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1207 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1208}
e2b5e7aa 1209
37387d8b 1210static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1211{
1212 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1213 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1214}
1215
e2b5e7aa 1216static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1217{
1218 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1219 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1220}
1221
1222static void emit_str_dualindexed(int rs1, int rs2, int rt)
1223{
1224 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1225 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1226}
1227
1228static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1229{
1230 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1231 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1232}
1233
1234static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1235{
1236 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1237 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1238}
e2b5e7aa 1239
e2b5e7aa 1240static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1241{
1242 assert(offset>-256&&offset<256);
1243 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1244 if(offset>=0) {
1245 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1246 }else{
1247 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1248 }
1249}
e2b5e7aa 1250
e2b5e7aa 1251static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1252{
1253 assert(offset>-256&&offset<256);
1254 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1255 if(offset>=0) {
1256 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1257 }else{
1258 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1259 }
1260}
e2b5e7aa 1261
1262static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1263{
1264 assert(offset>-4096&&offset<4096);
1265 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1266 if(offset>=0) {
1267 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1268 }else{
1269 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1270 }
1271}
e2b5e7aa 1272
e2b5e7aa 1273static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1274{
1275 assert(offset>-256&&offset<256);
1276 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1277 if(offset>=0) {
1278 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1279 }else{
1280 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1281 }
1282}
e2b5e7aa 1283
054175e9 1284static void emit_ldrd(int offset, int rs, int rt)
1285{
1286 assert(offset>-256&&offset<256);
1287 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1288 if(offset>=0) {
1289 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1290 }else{
1291 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1292 }
1293}
e2b5e7aa 1294
643aeae3 1295static void emit_readword(void *addr, int rt)
57871462 1296{
643aeae3 1297 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1298 assert(offset<4096);
1299 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1300 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1301}
39b71d9a 1302#define emit_readptr emit_readword
e2b5e7aa 1303
e2b5e7aa 1304static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1305{
1306 assert(offset>-4096&&offset<4096);
1307 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1308 if(offset>=0) {
1309 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1310 }else{
1311 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1312 }
1313}
e2b5e7aa 1314
e2b5e7aa 1315static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1316{
1317 assert(offset>-256&&offset<256);
1318 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1319 if(offset>=0) {
1320 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1321 }else{
1322 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1323 }
1324}
e2b5e7aa 1325
1326static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1327{
1328 assert(offset>-4096&&offset<4096);
1329 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1330 if(offset>=0) {
1331 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1332 }else{
1333 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1334 }
1335}
e2b5e7aa 1336
e2b5e7aa 1337static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1338{
1339 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1340 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1341}
e2b5e7aa 1342
1343static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1344{
1345 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1346 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1347}
e2b5e7aa 1348
1349static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1350{
1351 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1352 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1353}
e2b5e7aa 1354
643aeae3 1355static void emit_writeword(int rt, void *addr)
57871462 1356{
643aeae3 1357 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1358 assert(offset<4096);
1359 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1360 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1361}
e2b5e7aa 1362
e2b5e7aa 1363static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1364{
1365 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1366 assert(rs1<16);
1367 assert(rs2<16);
1368 assert(hi<16);
1369 assert(lo<16);
1370 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1371}
e2b5e7aa 1372
1373static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1374{
1375 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1376 assert(rs1<16);
1377 assert(rs2<16);
1378 assert(hi<16);
1379 assert(lo<16);
1380 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1381}
1382
e2b5e7aa 1383static void emit_clz(int rs,int rt)
57871462 1384{
1385 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1386 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1387}
1388
e2b5e7aa 1389static void emit_subcs(int rs1,int rs2,int rt)
57871462 1390{
1391 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1392 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1393}
1394
e2b5e7aa 1395static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1396{
1397 assert(imm>0);
1398 assert(imm<32);
1399 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1400 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1401}
1402
e2b5e7aa 1403static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1404{
1405 assert(imm>0);
1406 assert(imm<32);
1407 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1408 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1409}
1410
e2b5e7aa 1411static void emit_negmi(int rs, int rt)
57871462 1412{
1413 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1414 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1415}
1416
e2b5e7aa 1417static void emit_negsmi(int rs, int rt)
57871462 1418{
1419 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1420 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1421}
1422
e2b5e7aa 1423static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1424{
1425 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1426 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1427}
1428
e2b5e7aa 1429static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1430{
1431 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1432 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1433}
1434
e2b5e7aa 1435static void emit_teq(int rs, int rt)
57871462 1436{
1437 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1438 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1439}
1440
3968e69e 1441static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1442{
1443 u_int armval;
cfbd3c6e 1444 genimm_checked(imm,&armval);
57871462 1445 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1446 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1447}
1448
57871462 1449// Conditionally select one of two immediates, optimizing for small code size
1450// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1451static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1452{
1453 u_int armval;
1454 if(genimm(imm2-imm1,&armval)) {
1455 emit_movimm(imm1,rt);
1456 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1457 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1458 }else if(genimm(imm1-imm2,&armval)) {
1459 emit_movimm(imm1,rt);
1460 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1461 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1462 }
1463 else {
665f33e1 1464 #ifndef HAVE_ARMV7
57871462 1465 emit_movimm(imm1,rt);
1466 add_literal((int)out,imm2);
1467 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1468 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1469 #else
1470 emit_movw(imm1&0x0000FFFF,rt);
1471 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1472 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1473 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1474 }
1475 emit_movt(imm1&0xFFFF0000,rt);
1476 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1477 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1478 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1479 }
1480 #endif
1481 }
1482}
1483
57871462 1484// special case for checking invalid_code
e2b5e7aa 1485static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1486{
1487 assert(imm<128&&imm>=0);
1488 assert(r>=0&&r<16);
1489 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1490 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1491 emit_cmpimm(HOST_TEMPREG,imm);
1492}
1493
e2b5e7aa 1494static void emit_callne(int a)
0bbd1454 1495{
1496 assem_debug("blne %x\n",a);
1497 u_int offset=genjmp(a);
1498 output_w32(0x1b000000|offset);
1499}
1500
57871462 1501// Used to preload hash table entries
e2b5e7aa 1502static unused void emit_prefetchreg(int r)
57871462 1503{
1504 assem_debug("pld %s\n",regname[r]);
1505 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1506}
1507
1508// Special case for mini_ht
e2b5e7aa 1509static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1510{
1511 assert(offset<4096);
1512 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1513 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1514}
1515
e2b5e7aa 1516static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1517{
1518 u_int armval;
cfbd3c6e 1519 genimm_checked(imm,&armval);
b9b61529 1520 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1521 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1522}
1523
e2b5e7aa 1524static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1525{
1526 u_int armval;
cfbd3c6e 1527 genimm_checked(imm,&armval);
b9b61529 1528 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1529 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1530}
1531
e2b5e7aa 1532static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1533{
1534 u_int armval;
1535 genimm_checked(imm,&armval);
1536 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1537 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1538}
1539
e2b5e7aa 1540static void emit_jno_unlikely(int a)
57871462 1541{
1542 //emit_jno(a);
1543 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1544 output_w32(0x72800000|rd_rn_rm(15,15,0));
1545}
1546
054175e9 1547static void save_regs_all(u_int reglist)
57871462 1548{
054175e9 1549 int i;
57871462 1550 if(!reglist) return;
1551 assem_debug("stmia fp,{");
054175e9 1552 for(i=0;i<16;i++)
1553 if(reglist&(1<<i))
1554 assem_debug("r%d,",i);
57871462 1555 assem_debug("}\n");
1556 output_w32(0xe88b0000|reglist);
1557}
e2b5e7aa 1558
054175e9 1559static void restore_regs_all(u_int reglist)
57871462 1560{
054175e9 1561 int i;
57871462 1562 if(!reglist) return;
1563 assem_debug("ldmia fp,{");
054175e9 1564 for(i=0;i<16;i++)
1565 if(reglist&(1<<i))
1566 assem_debug("r%d,",i);
57871462 1567 assem_debug("}\n");
1568 output_w32(0xe89b0000|reglist);
1569}
e2b5e7aa 1570
054175e9 1571// Save registers before function call
1572static void save_regs(u_int reglist)
1573{
4d646738 1574 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1575 save_regs_all(reglist);
1576}
e2b5e7aa 1577
054175e9 1578// Restore registers after function call
1579static void restore_regs(u_int reglist)
1580{
4d646738 1581 reglist&=CALLER_SAVE_REGS;
054175e9 1582 restore_regs_all(reglist);
1583}
57871462 1584
57871462 1585/* Stubs/epilogue */
1586
e2b5e7aa 1587static void literal_pool(int n)
57871462 1588{
1589 if(!literalcount) return;
1590 if(n) {
1591 if((int)out-literals[0][0]<4096-n) return;
1592 }
1593 u_int *ptr;
1594 int i;
1595 for(i=0;i<literalcount;i++)
1596 {
77750690 1597 u_int l_addr=(u_int)out;
1598 int j;
1599 for(j=0;j<i;j++) {
1600 if(literals[j][1]==literals[i][1]) {
1601 //printf("dup %08x\n",literals[i][1]);
1602 l_addr=literals[j][0];
1603 break;
1604 }
1605 }
57871462 1606 ptr=(u_int *)literals[i][0];
77750690 1607 u_int offset=l_addr-(u_int)ptr-8;
57871462 1608 assert(offset<4096);
1609 assert(!(offset&3));
1610 *ptr|=offset;
77750690 1611 if(l_addr==(u_int)out) {
1612 literals[i][0]=l_addr; // remember for dupes
1613 output_w32(literals[i][1]);
1614 }
57871462 1615 }
1616 literalcount=0;
1617}
1618
e2b5e7aa 1619static void literal_pool_jumpover(int n)
57871462 1620{
1621 if(!literalcount) return;
1622 if(n) {
1623 if((int)out-literals[0][0]<4096-n) return;
1624 }
df4dc2b1 1625 void *jaddr = out;
57871462 1626 emit_jmp(0);
1627 literal_pool(0);
df4dc2b1 1628 set_jump_target(jaddr, out);
57871462 1629}
1630
7c3a5182 1631// parsed by get_pointer, find_extjump_insn
1632static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1633{
1634 u_char *ptr=(u_char *)addr;
1635 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1636 (void)ptr;
1637
57871462 1638 emit_loadlp(target,0);
643aeae3 1639 emit_loadlp((u_int)addr,1);
d62c125a 1640 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1641 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1642//DEBUG >
1643#ifdef DEBUG_CYCLE_COUNT
643aeae3 1644 emit_readword(&last_count,ECX);
57871462 1645 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1646 emit_readword(&next_interupt,ECX);
1647 emit_writeword(HOST_CCREG,&Count);
57871462 1648 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1649 emit_writeword(ECX,&last_count);
57871462 1650#endif
1651//DEBUG <
2a014d73 1652 emit_far_jump(linker);
57871462 1653}
1654
d1e4ebd9 1655static void check_extjump2(void *src)
1656{
1657 u_int *ptr = src;
1658 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1659 (void)ptr;
1660}
1661
13e35c04 1662// put rt_val into rt, potentially making use of rs with value rs_val
1663static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1664{
8575a877 1665 u_int armval;
1666 int diff;
1667 if(genimm(rt_val,&armval)) {
1668 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1669 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1670 return;
1671 }
1672 if(genimm(~rt_val,&armval)) {
1673 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1674 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1675 return;
1676 }
1677 diff=rt_val-rs_val;
1678 if(genimm(diff,&armval)) {
1679 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1680 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1681 return;
1682 }else if(genimm(-diff,&armval)) {
1683 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1684 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1685 return;
1686 }
1687 emit_movimm(rt_val,rt);
1688}
1689
1690// return 1 if above function can do it's job cheaply
1691static int is_similar_value(u_int v1,u_int v2)
1692{
13e35c04 1693 u_int xs;
8575a877 1694 int diff;
1695 if(v1==v2) return 1;
1696 diff=v2-v1;
1697 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1698 ;
8575a877 1699 if(xs<0x100) return 1;
1700 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1701 ;
1702 if(xs<0x100) return 1;
1703 return 0;
13e35c04 1704}
cbbab9cd 1705
b14b6a8f 1706static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1707{
1708 switch(type) {
1709 case LOADB_STUB: emit_signextend8(rs,rt); break;
1710 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1711 case LOADH_STUB: emit_signextend16(rs,rt); break;
1712 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1713 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1714 default: assert(0);
1715 }
1716}
1717
b1be1eee 1718#include "pcsxmem.h"
1719#include "pcsxmem_inline.c"
b1be1eee 1720
e2b5e7aa 1721static void do_readstub(int n)
57871462 1722{
b14b6a8f 1723 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1724 literal_pool(256);
b14b6a8f 1725 set_jump_target(stubs[n].addr, out);
1726 enum stub_type type=stubs[n].type;
1727 int i=stubs[n].a;
1728 int rs=stubs[n].b;
81dbbf4c 1729 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1730 u_int reglist=stubs[n].e;
81dbbf4c 1731 const signed char *i_regmap=i_regs->regmap;
581335b0 1732 int rt;
cf95b4f0 1733 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1734 rt=get_reg(i_regmap,FTEMP);
1735 }else{
cf95b4f0 1736 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1737 }
1738 assert(rs>=0);
df4dc2b1 1739 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1740 void *restore_jump = NULL;
c6c3b1b3 1741 reglist|=(1<<rs);
1742 for(r=0;r<=12;r++) {
1743 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1744 temp=r; break;
1745 }
1746 }
cf95b4f0 1747 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1748 reglist&=~(1<<rt);
1749 if(temp==-1) {
1750 save_regs(reglist);
1751 regs_saved=1;
1752 temp=(rs==0)?2:0;
1753 }
1754 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1755 temp2=1;
643aeae3 1756 emit_readword(&mem_rtab,temp);
c6c3b1b3 1757 emit_shrimm(rs,12,temp2);
1758 emit_readword_dualindexedx4(temp,temp2,temp2);
1759 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1760 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1761 switch(type) {
1762 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1763 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1764 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1765 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1766 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1767 default: assert(0);
c6c3b1b3 1768 }
1769 }
1770 if(regs_saved) {
df4dc2b1 1771 restore_jump=out;
c6c3b1b3 1772 emit_jcc(0); // jump to reg restore
1773 }
1774 else
b14b6a8f 1775 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1776
1777 if(!regs_saved)
1778 save_regs(reglist);
643aeae3 1779 void *handler=NULL;
c6c3b1b3 1780 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1781 handler=jump_handler_read8;
c6c3b1b3 1782 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1783 handler=jump_handler_read16;
c6c3b1b3 1784 if(type==LOADW_STUB)
643aeae3 1785 handler=jump_handler_read32;
1786 assert(handler);
b96d3df7 1787 pass_args(rs,temp2);
c6c3b1b3 1788 int cc=get_reg(i_regmap,CCREG);
1789 if(cc<0)
1790 emit_loadreg(CCREG,2);
bb4f300c 1791 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1792 emit_far_call(handler);
cf95b4f0 1793 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1794 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1795 }
1796 if(restore_jump)
df4dc2b1 1797 set_jump_target(restore_jump, out);
c6c3b1b3 1798 restore_regs(reglist);
b14b6a8f 1799 emit_jmp(stubs[n].retaddr); // return address
57871462 1800}
1801
81dbbf4c 1802static void inline_readstub(enum stub_type type, int i, u_int addr,
1803 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1804{
1805 int rs=get_reg(regmap,target);
57871462 1806 int rt=get_reg(regmap,target);
535d208a 1807 if(rs<0) rs=get_reg(regmap,-1);
57871462 1808 assert(rs>=0);
2a014d73 1809 u_int is_dynamic;
687b4580 1810 uintptr_t host_addr = 0;
643aeae3 1811 void *handler;
b1be1eee 1812 int cc=get_reg(regmap,CCREG);
bb4f300c 1813 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
b1be1eee 1814 return;
643aeae3 1815 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1816 if (handler == NULL) {
cf95b4f0 1817 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1818 return;
13e35c04 1819 if(addr!=host_addr)
1820 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1821 switch(type) {
1822 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1823 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1824 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1825 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1826 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1827 default: assert(0);
1828 }
1829 return;
1830 }
b1be1eee 1831 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1832 if(is_dynamic) {
1833 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1834 handler=jump_handler_read8;
b1be1eee 1835 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1836 handler=jump_handler_read16;
b1be1eee 1837 if(type==LOADW_STUB)
643aeae3 1838 handler=jump_handler_read32;
b1be1eee 1839 }
c6c3b1b3 1840
1841 // call a memhandler
cf95b4f0 1842 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1843 reglist&=~(1<<rt);
1844 save_regs(reglist);
1845 if(target==0)
1846 emit_movimm(addr,0);
1847 else if(rs!=0)
1848 emit_mov(rs,0);
b1be1eee 1849 if(cc<0)
1850 emit_loadreg(CCREG,2);
1851 if(is_dynamic) {
1852 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
bb4f300c 1853 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
c6c3b1b3 1854 }
b1be1eee 1855 else {
643aeae3 1856 emit_readword(&last_count,3);
bb4f300c 1857 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
b1be1eee 1858 emit_add(2,3,2);
643aeae3 1859 emit_writeword(2,&Count);
b1be1eee 1860 }
1861
2a014d73 1862 emit_far_call(handler);
b1be1eee 1863
cf95b4f0 1864 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1865 switch(type) {
1866 case LOADB_STUB: emit_signextend8(0,rt); break;
1867 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1868 case LOADH_STUB: emit_signextend16(0,rt); break;
1869 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1870 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1871 default: assert(0);
1872 }
1873 }
1874 restore_regs(reglist);
57871462 1875}
1876
e2b5e7aa 1877static void do_writestub(int n)
57871462 1878{
b14b6a8f 1879 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1880 literal_pool(256);
b14b6a8f 1881 set_jump_target(stubs[n].addr, out);
1882 enum stub_type type=stubs[n].type;
1883 int i=stubs[n].a;
1884 int rs=stubs[n].b;
81dbbf4c 1885 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1886 u_int reglist=stubs[n].e;
81dbbf4c 1887 const signed char *i_regmap=i_regs->regmap;
581335b0 1888 int rt,r;
cf95b4f0 1889 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1890 rt=get_reg(i_regmap,r=FTEMP);
1891 }else{
cf95b4f0 1892 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1893 }
1894 assert(rs>=0);
1895 assert(rt>=0);
b14b6a8f 1896 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1897 void *restore_jump = NULL;
b96d3df7 1898 int reglist2=reglist|(1<<rs)|(1<<rt);
1899 for(rtmp=0;rtmp<=12;rtmp++) {
1900 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1901 temp=rtmp; break;
1902 }
1903 }
1904 if(temp==-1) {
1905 save_regs(reglist);
1906 regs_saved=1;
1907 for(rtmp=0;rtmp<=3;rtmp++)
1908 if(rtmp!=rs&&rtmp!=rt)
1909 {temp=rtmp;break;}
1910 }
1911 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1912 temp2=3;
643aeae3 1913 emit_readword(&mem_wtab,temp);
b96d3df7 1914 emit_shrimm(rs,12,temp2);
1915 emit_readword_dualindexedx4(temp,temp2,temp2);
1916 emit_lsls_imm(temp2,1,temp2);
1917 switch(type) {
1918 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1919 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1920 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1921 default: assert(0);
1922 }
1923 if(regs_saved) {
df4dc2b1 1924 restore_jump=out;
b96d3df7 1925 emit_jcc(0); // jump to reg restore
1926 }
1927 else
b14b6a8f 1928 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1929
1930 if(!regs_saved)
1931 save_regs(reglist);
643aeae3 1932 void *handler=NULL;
b96d3df7 1933 switch(type) {
643aeae3 1934 case STOREB_STUB: handler=jump_handler_write8; break;
1935 case STOREH_STUB: handler=jump_handler_write16; break;
1936 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1937 default: assert(0);
b96d3df7 1938 }
643aeae3 1939 assert(handler);
b96d3df7 1940 pass_args(rs,rt);
1941 if(temp2!=3)
1942 emit_mov(temp2,3);
1943 int cc=get_reg(i_regmap,CCREG);
1944 if(cc<0)
1945 emit_loadreg(CCREG,2);
bb4f300c 1946 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
b96d3df7 1947 // returns new cycle_count
2a014d73 1948 emit_far_call(handler);
bb4f300c 1949 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
b96d3df7 1950 if(cc<0)
1951 emit_storereg(CCREG,2);
1952 if(restore_jump)
df4dc2b1 1953 set_jump_target(restore_jump, out);
b96d3df7 1954 restore_regs(reglist);
b14b6a8f 1955 emit_jmp(stubs[n].retaddr);
57871462 1956}
1957
81dbbf4c 1958static void inline_writestub(enum stub_type type, int i, u_int addr,
1959 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1960{
1961 int rs=get_reg(regmap,-1);
57871462 1962 int rt=get_reg(regmap,target);
1963 assert(rs>=0);
1964 assert(rt>=0);
687b4580 1965 uintptr_t host_addr = 0;
643aeae3 1966 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1967 if (handler == NULL) {
13e35c04 1968 if(addr!=host_addr)
1969 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1970 switch(type) {
1971 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1972 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1973 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1974 default: assert(0);
1975 }
1976 return;
1977 }
1978
1979 // call a memhandler
1980 save_regs(reglist);
13e35c04 1981 pass_args(rs,rt);
b96d3df7 1982 int cc=get_reg(regmap,CCREG);
1983 if(cc<0)
1984 emit_loadreg(CCREG,2);
bb4f300c 1985 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
643aeae3 1986 emit_movimm((u_int)handler,3);
b96d3df7 1987 // returns new cycle_count
2a014d73 1988 emit_far_call(jump_handler_write_h);
bb4f300c 1989 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
b96d3df7 1990 if(cc<0)
1991 emit_storereg(CCREG,2);
1992 restore_regs(reglist);
57871462 1993}
1994
d1e4ebd9 1995// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1996static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 1997{
665f33e1 1998 #ifndef HAVE_ARMV7
7c3a5182 1999 emit_loadlp((int)source, 1);
2000 emit_loadlp((int)copy, 2);
3d680478 2001 emit_loadlp(source_len, 3);
57871462 2002 #else
7c3a5182 2003 emit_movw(((u_int)source)&0x0000FFFF, 1);
2004 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2005 emit_movt(((u_int)source)&0xFFFF0000, 1);
2006 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 2007 emit_movw(source_len, 3);
57871462 2008 #endif
7c3a5182 2009 emit_movimm(arg0, 0);
2010}
2011
3d680478 2012static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 2013{
2014 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 2015 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 2016 emit_far_call(verify_code);
df4dc2b1 2017 void *entry = out;
57871462 2018 load_regs_entry(i);
df4dc2b1 2019 if (entry == out)
2020 entry = instr_addr[i];
57871462 2021 emit_jmp(instr_addr[i]);
2022 return entry;
2023}
2024
3d680478 2025static void do_dirty_stub_ds(u_int source_len)
57871462 2026{
3d680478 2027 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 2028 emit_far_call(verify_code_ds);
57871462 2029}
2030
57871462 2031/* Special assem */
2032
81dbbf4c 2033static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2034{
2035 save_regs_all(reglist);
32631e6a 2036 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2037#ifdef PCNT
81dbbf4c 2038 emit_movimm(op, 0);
2a014d73 2039 emit_far_call(pcnt_gte_start);
82ed88eb 2040#endif
81dbbf4c 2041 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2042}
2043
2044static void c2op_epilogue(u_int op,u_int reglist)
2045{
82ed88eb 2046#ifdef PCNT
2047 emit_movimm(op,0);
2a014d73 2048 emit_far_call(pcnt_gte_end);
82ed88eb 2049#endif
054175e9 2050 restore_regs_all(reglist);
2051}
2052
6c0eefaf 2053static void c2op_call_MACtoIR(int lm,int need_flags)
2054{
2055 if(need_flags)
2a014d73 2056 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2057 else
2a014d73 2058 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2059}
2060
2061static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2062{
2a014d73 2063 emit_far_call(func);
6c0eefaf 2064 // func is C code and trashes r0
2065 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2066 if(need_flags||need_ir)
2067 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2068 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2069}
2070
81dbbf4c 2071static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2072{
81dbbf4c 2073 u_int c2op = source[i] & 0x3f;
2074 u_int reglist_full = get_host_reglist(i_regs->regmap);
2075 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2076 int need_flags, need_ir;
b9b61529 2077
2078 if (gte_handlers[c2op]!=NULL) {
bedfea38 2079 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2080 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2081 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2082 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2083 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2084 need_flags=0;
6c0eefaf 2085 int shift = (source[i] >> 19) & 1;
2086 int lm = (source[i] >> 10) & 1;
054175e9 2087 switch(c2op) {
19776aef 2088#ifndef DRC_DBG
054175e9 2089 case GTE_MVMVA: {
82336ba3 2090#ifdef HAVE_ARMV5
054175e9 2091 int v = (source[i] >> 15) & 3;
2092 int cv = (source[i] >> 13) & 3;
2093 int mx = (source[i] >> 17) & 3;
4d646738 2094 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2095 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2096 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2097 if(v<3)
2098 emit_ldrd(v*8,0,4);
2099 else {
2100 emit_movzwl_indexed(9*4,0,4); // gteIR
2101 emit_movzwl_indexed(10*4,0,6);
2102 emit_movzwl_indexed(11*4,0,5);
2103 emit_orrshl_imm(6,16,4);
2104 }
2105 if(mx<3)
2106 emit_addimm(0,32*4+mx*8*4,6);
2107 else
643aeae3 2108 emit_readword(&zeromem_ptr,6);
054175e9 2109 if(cv<3)
2110 emit_addimm(0,32*4+(cv*8+5)*4,7);
2111 else
643aeae3 2112 emit_readword(&zeromem_ptr,7);
054175e9 2113#ifdef __ARM_NEON__
2114 emit_movimm(source[i],1); // opcode
2a014d73 2115 emit_far_call(gteMVMVA_part_neon);
054175e9 2116 if(need_flags) {
2117 emit_movimm(lm,1);
2a014d73 2118 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2119 }
2120#else
2121 if(cv==3&&shift)
2a014d73 2122 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
054175e9 2123 else {
2124 emit_movimm(shift,1);
2a014d73 2125 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
054175e9 2126 }
6c0eefaf 2127 if(need_flags||need_ir)
2128 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2129#endif
2130#else /* if not HAVE_ARMV5 */
81dbbf4c 2131 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2132 emit_movimm(source[i],1); // opcode
643aeae3 2133 emit_writeword(1,&psxRegs.code);
2a014d73 2134 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2135#endif
2136 break;
2137 }
6c0eefaf 2138 case GTE_OP:
81dbbf4c 2139 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2140 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2141 if(need_flags||need_ir) {
2142 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2143 c2op_call_MACtoIR(lm,need_flags);
2144 }
2145 break;
2146 case GTE_DPCS:
81dbbf4c 2147 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2148 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2149 break;
2150 case GTE_INTPL:
81dbbf4c 2151 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2152 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2153 break;
2154 case GTE_SQR:
81dbbf4c 2155 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2156 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2157 if(need_flags||need_ir) {
2158 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2159 c2op_call_MACtoIR(lm,need_flags);
2160 }
2161 break;
2162 case GTE_DCPL:
81dbbf4c 2163 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2164 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2165 break;
2166 case GTE_GPF:
81dbbf4c 2167 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2168 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2169 break;
2170 case GTE_GPL:
81dbbf4c 2171 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2172 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2173 break;
19776aef 2174#endif
054175e9 2175 default:
81dbbf4c 2176 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2177#ifdef DRC_DBG
2178 emit_movimm(source[i],1); // opcode
643aeae3 2179 emit_writeword(1,&psxRegs.code);
19776aef 2180#endif
2a014d73 2181 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2182 break;
2183 }
2184 c2op_epilogue(c2op,reglist);
2185 }
b9b61529 2186}
2187
3968e69e 2188static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2189{
2190 //value = value & 0x7ffff000;
2191 //if (value & 0x7f87e000) value |= 0x80000000;
2192 emit_shrimm(sl,12,temp);
2193 emit_shlimm(temp,12,temp);
2194 emit_testimm(temp,0x7f000000);
2195 emit_testeqimm(temp,0x00870000);
2196 emit_testeqimm(temp,0x0000e000);
2197 emit_orrne_imm(temp,0x80000000,temp);
2198}
2199
2200static void do_mfc2_31_one(u_int copr,signed char temp)
2201{
2202 emit_readword(&reg_cop2d[copr],temp);
2203 emit_testimm(temp,0x8000); // do we need this?
2204 emit_andne_imm(temp,0,temp);
2205 emit_cmpimm(temp,0xf80);
2206 emit_andimm(temp,0xf80,temp);
2207 emit_cmovae_imm(0xf80,temp);
2208}
2209
2210static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2211{
2212 if (temp < 0) {
2213 host_tempreg_acquire();
2214 temp = HOST_TEMPREG;
2215 }
2216 do_mfc2_31_one(9,temp);
2217 emit_shrimm(temp,7,tl);
2218 do_mfc2_31_one(10,temp);
2219 emit_orrshr_imm(temp,2,tl);
2220 do_mfc2_31_one(11,temp);
2221 emit_orrshl_imm(temp,3,tl);
2222 emit_writeword(tl,&reg_cop2d[29]);
2223 if (temp == HOST_TEMPREG)
2224 host_tempreg_release();
2225}
2226
e2b5e7aa 2227static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 2228{
2229 // case 0x18: MULT
2230 // case 0x19: MULTU
2231 // case 0x1A: DIV
2232 // case 0x1B: DIVU
2233 // case 0x1C: DMULT
2234 // case 0x1D: DMULTU
2235 // case 0x1E: DDIV
2236 // case 0x1F: DDIVU
cf95b4f0 2237 if(dops[i].rs1&&dops[i].rs2)
57871462 2238 {
cf95b4f0 2239 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2240 {
cf95b4f0 2241 if(dops[i].opcode2==0x18) // MULT
57871462 2242 {
cf95b4f0 2243 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2244 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2245 signed char hi=get_reg(i_regs->regmap,HIREG);
2246 signed char lo=get_reg(i_regs->regmap,LOREG);
2247 assert(m1>=0);
2248 assert(m2>=0);
2249 assert(hi>=0);
2250 assert(lo>=0);
2251 emit_smull(m1,m2,hi,lo);
2252 }
cf95b4f0 2253 if(dops[i].opcode2==0x19) // MULTU
57871462 2254 {
cf95b4f0 2255 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2256 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2257 signed char hi=get_reg(i_regs->regmap,HIREG);
2258 signed char lo=get_reg(i_regs->regmap,LOREG);
2259 assert(m1>=0);
2260 assert(m2>=0);
2261 assert(hi>=0);
2262 assert(lo>=0);
2263 emit_umull(m1,m2,hi,lo);
2264 }
cf95b4f0 2265 if(dops[i].opcode2==0x1A) // DIV
57871462 2266 {
cf95b4f0 2267 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2268 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2269 assert(d1>=0);
2270 assert(d2>=0);
2271 signed char quotient=get_reg(i_regs->regmap,LOREG);
2272 signed char remainder=get_reg(i_regs->regmap,HIREG);
2273 assert(quotient>=0);
2274 assert(remainder>=0);
2275 emit_movs(d1,remainder);
44a80f6a 2276 emit_movimm(0xffffffff,quotient);
2277 emit_negmi(quotient,quotient); // .. quotient and ..
2278 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2279 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2280 emit_jeq(out+52); // Division by zero
82336ba3 2281 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2282#ifdef HAVE_ARMV5
57871462 2283 emit_clz(HOST_TEMPREG,quotient);
2284 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2285#else
2286 emit_movimm(0,quotient);
2287 emit_addpl_imm(quotient,1,quotient);
2288 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2289 emit_jns(out-2*4);
665f33e1 2290#endif
57871462 2291 emit_orimm(quotient,1<<31,quotient);
2292 emit_shr(quotient,quotient,quotient);
2293 emit_cmp(remainder,HOST_TEMPREG);
2294 emit_subcs(remainder,HOST_TEMPREG,remainder);
2295 emit_adcs(quotient,quotient,quotient);
2296 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2297 emit_jcc(out-16); // -4
57871462 2298 emit_teq(d1,d2);
2299 emit_negmi(quotient,quotient);
2300 emit_test(d1,d1);
2301 emit_negmi(remainder,remainder);
2302 }
cf95b4f0 2303 if(dops[i].opcode2==0x1B) // DIVU
57871462 2304 {
cf95b4f0 2305 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2306 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2307 assert(d1>=0);
2308 assert(d2>=0);
2309 signed char quotient=get_reg(i_regs->regmap,LOREG);
2310 signed char remainder=get_reg(i_regs->regmap,HIREG);
2311 assert(quotient>=0);
2312 assert(remainder>=0);
44a80f6a 2313 emit_mov(d1,remainder);
2314 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2315 emit_test(d2,d2);
7c3a5182 2316 emit_jeq(out+40); // Division by zero
665f33e1 2317#ifdef HAVE_ARMV5
57871462 2318 emit_clz(d2,HOST_TEMPREG);
2319 emit_movimm(1<<31,quotient);
2320 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2321#else
2322 emit_movimm(0,HOST_TEMPREG);
82336ba3 2323 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2324 emit_lslpls_imm(d2,1,d2);
7c3a5182 2325 emit_jns(out-2*4);
665f33e1 2326 emit_movimm(1<<31,quotient);
2327#endif
57871462 2328 emit_shr(quotient,HOST_TEMPREG,quotient);
2329 emit_cmp(remainder,d2);
2330 emit_subcs(remainder,d2,remainder);
2331 emit_adcs(quotient,quotient,quotient);
2332 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2333 emit_jcc(out-16); // -4
57871462 2334 }
2335 }
2336 else // 64-bit
71e490c5 2337 assert(0);
57871462 2338 }
2339 else
2340 {
2341 // Multiply by zero is zero.
2342 // MIPS does not have a divide by zero exception.
2343 // The result is undefined, we return zero.
2344 signed char hr=get_reg(i_regs->regmap,HIREG);
2345 signed char lr=get_reg(i_regs->regmap,LOREG);
2346 if(hr>=0) emit_zeroreg(hr);
2347 if(lr>=0) emit_zeroreg(lr);
2348 }
2349}
2350#define multdiv_assemble multdiv_assemble_arm
2351
d1e4ebd9 2352static void do_jump_vaddr(int rs)
2353{
2a014d73 2354 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2355}
2356
e2b5e7aa 2357static void do_preload_rhash(int r) {
57871462 2358 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2359 // register. On ARM the hash can be done with a single instruction (below)
2360}
2361
e2b5e7aa 2362static void do_preload_rhtbl(int ht) {
57871462 2363 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2364}
2365
e2b5e7aa 2366static void do_rhash(int rs,int rh) {
57871462 2367 emit_andimm(rs,0xf8,rh);
2368}
2369
e2b5e7aa 2370static void do_miniht_load(int ht,int rh) {
57871462 2371 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2372 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2373}
2374
e2b5e7aa 2375static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2376 emit_cmp(rh,rs);
2377 emit_ldreq_indexed(ht,4,15);
2378 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2379 if(rs!=7)
2380 emit_mov(rs,7);
2381 rs=7;
57871462 2382 #endif
d1e4ebd9 2383 do_jump_vaddr(rs);
57871462 2384}
2385
e2b5e7aa 2386static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2387 #ifndef HAVE_ARMV7
57871462 2388 emit_movimm(return_address,rt); // PC into link register
643aeae3 2389 add_to_linker(out,return_address,1);
57871462 2390 emit_pcreladdr(temp);
643aeae3 2391 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2392 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2393 #else
2394 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2395 add_to_linker(out,return_address,1);
57871462 2396 emit_pcreladdr(temp);
643aeae3 2397 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2398 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2399 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2400 #endif
2401}
2402
57871462 2403// CPU-architecture-specific initialization
2a014d73 2404static void arch_init(void)
2405{
2406 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2407 struct tramp_insns *ops = ndrc->tramp.ops;
2408 size_t i;
2409 assert(!(diff & 3));
2410 assert(diff < 0x1000);
2411 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2412 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2413 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2414 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2415}
b9b61529 2416
2417// vim:shiftwidth=2:expandtab