drc: fix some mistake done during arm64 porting
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
d148d265 104static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 105
57871462 106/* Linker */
107
df4dc2b1 108static void set_jump_target(void *addr, void *target_)
57871462 109{
df4dc2b1 110 u_int target = (u_int)target_;
111 u_char *ptr = addr;
57871462 112 u_int *ptr2=(u_int *)ptr;
113 if(ptr[3]==0xe2) {
114 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 115 assert(((uintptr_t)addr&3)==0);
57871462 116 assert((target&3)==0);
117 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 118 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 119 }
120 else if(ptr[3]==0x72) {
121 // generated by emit_jno_unlikely
122 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 123 assert(((uintptr_t)addr&3)==0);
57871462 124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 }
127 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 128 assert(((uintptr_t)addr&3)==0);
57871462 129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
131 }
132 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134 else {
135 assert((ptr[3]&0x0e)==0xa);
136 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
137 }
138}
139
140// This optionally copies the instruction from the target of the branch into
141// the space before the branch. Works, but the difference in speed is
142// usually insignificant.
e2b5e7aa 143#if 0
144static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 145{
146 u_char *ptr=(u_char *)addr;
147 u_int *ptr2=(u_int *)ptr;
148 assert(!copy||ptr2[-1]==0xe28dd000);
149 if(ptr[3]==0xe2) {
150 assert(!copy);
151 assert((target-(u_int)ptr2-8)<4096);
152 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
153 }
154 else {
155 assert((ptr[3]&0x0e)==0xa);
156 u_int target_insn=*(u_int *)target;
157 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
158 copy=0;
159 }
160 if((target_insn&0x0c100000)==0x04100000) { // Load
161 copy=0;
162 }
163 if(target_insn&0x08000000) {
164 copy=0;
165 }
166 if(copy) {
167 ptr2[-1]=target_insn;
168 target+=4;
169 }
170 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
171 }
172}
e2b5e7aa 173#endif
57871462 174
175/* Literal pool */
e2b5e7aa 176static void add_literal(int addr,int val)
57871462 177{
15776b68 178 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 179 literals[literalcount][0]=addr;
180 literals[literalcount][1]=val;
9f51b4b9 181 literalcount++;
182}
57871462 183
d148d265 184// from a pointer to external jump stub (which was produced by emit_extjump2)
185// find where the jumping insn is
186static void *find_extjump_insn(void *stub)
57871462 187{
188 int *ptr=(int *)(stub+4);
d148d265 189 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 190 u_int offset=*ptr&0xfff;
d148d265 191 void **l_ptr=(void *)ptr+offset+8;
192 return *l_ptr;
57871462 193}
194
f968d35d 195// find where external branch is liked to using addr of it's stub:
196// get address that insn one after stub loads (dyna_linker arg1),
197// treat it as a pointer to branch insn,
198// return addr where that branch jumps to
643aeae3 199static void *get_pointer(void *stub)
57871462 200{
201 //printf("get_pointer(%x)\n",(int)stub);
d148d265 202 int *i_ptr=find_extjump_insn(stub);
3d680478 203 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 204 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 205}
206
207// Find the "clean" entry point from a "dirty" entry point
208// by skipping past the call to verify_code
df4dc2b1 209static void *get_clean_addr(void *addr)
57871462 210{
df4dc2b1 211 signed int *ptr = addr;
665f33e1 212 #ifndef HAVE_ARMV7
57871462 213 ptr+=4;
214 #else
215 ptr+=6;
216 #endif
217 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
218 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
219 ptr++;
220 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 221 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 222 }
df4dc2b1 223 return ptr;
57871462 224}
225
3968e69e 226static int verify_dirty(const u_int *ptr)
57871462 227{
665f33e1 228 #ifndef HAVE_ARMV7
16c8be17 229 u_int offset;
57871462 230 // get from literal pool
15776b68 231 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 232 offset=*ptr&0xfff;
233 u_int source=*(u_int*)((void *)ptr+offset+8);
234 ptr++;
235 assert((*ptr&0xFFFF0000)==0xe59f0000);
236 offset=*ptr&0xfff;
237 u_int copy=*(u_int*)((void *)ptr+offset+8);
238 ptr++;
239 assert((*ptr&0xFFFF0000)==0xe59f0000);
240 offset=*ptr&0xfff;
241 u_int len=*(u_int*)((void *)ptr+offset+8);
242 ptr++;
243 ptr++;
57871462 244 #else
245 // ARMv7 movw/movt
246 assert((*ptr&0xFFF00000)==0xe3000000);
247 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
248 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
249 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
250 ptr+=6;
251 #endif
252 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
253 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 254 //printf("verify_dirty: %x %x %x\n",source,copy,len);
255 return !memcmp((void *)source,(void *)copy,len);
256}
257
258// This doesn't necessarily find all clean entry points, just
259// guarantees that it's not dirty
df4dc2b1 260static int isclean(void *addr)
57871462 261{
665f33e1 262 #ifndef HAVE_ARMV7
581335b0 263 u_int *ptr=((u_int *)addr)+4;
57871462 264 #else
581335b0 265 u_int *ptr=((u_int *)addr)+6;
57871462 266 #endif
267 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
268 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
269 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 270 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
271 return 1;
272}
273
4a35de07 274// get source that block at addr was compiled from (host pointers)
01d26796 275static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 276{
643aeae3 277 u_int *ptr = addr;
665f33e1 278 #ifndef HAVE_ARMV7
16c8be17 279 u_int offset;
57871462 280 // get from literal pool
15776b68 281 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 282 offset=*ptr&0xfff;
283 u_int source=*(u_int*)((void *)ptr+offset+8);
284 ptr++;
285 //assert((*ptr&0xFFFF0000)==0xe59f0000);
286 //offset=*ptr&0xfff;
287 //u_int copy=*(u_int*)((void *)ptr+offset+8);
288 ptr++;
289 assert((*ptr&0xFFFF0000)==0xe59f0000);
290 offset=*ptr&0xfff;
291 u_int len=*(u_int*)((void *)ptr+offset+8);
292 ptr++;
293 ptr++;
57871462 294 #else
295 // ARMv7 movw/movt
296 assert((*ptr&0xFFF00000)==0xe3000000);
297 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
298 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
299 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
300 ptr+=6;
301 #endif
302 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
303 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 304 *start=(u_char *)source;
305 *end=(u_char *)source+len;
57871462 306}
307
57871462 308// Allocate a specific ARM register.
e2b5e7aa 309static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 310{
311 int n;
f776eb14 312 int dirty=0;
9f51b4b9 313
57871462 314 // see if it's already allocated (and dealloc it)
315 for(n=0;n<HOST_REGS;n++)
316 {
f776eb14 317 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
318 dirty=(cur->dirty>>n)&1;
319 cur->regmap[n]=-1;
320 }
57871462 321 }
9f51b4b9 322
57871462 323 cur->regmap[hr]=reg;
324 cur->dirty&=~(1<<hr);
f776eb14 325 cur->dirty|=dirty<<hr;
57871462 326 cur->isconst&=~(1<<hr);
327}
328
329// Alloc cycle count into dedicated register
e2b5e7aa 330static void alloc_cc(struct regstat *cur,int i)
57871462 331{
332 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
333}
334
57871462 335/* Assembler */
336
e2b5e7aa 337static unused char regname[16][4] = {
57871462 338 "r0",
339 "r1",
340 "r2",
341 "r3",
342 "r4",
343 "r5",
344 "r6",
345 "r7",
346 "r8",
347 "r9",
348 "r10",
349 "fp",
350 "r12",
351 "sp",
352 "lr",
353 "pc"};
354
e2b5e7aa 355static void output_w32(u_int word)
57871462 356{
357 *((u_int *)out)=word;
358 out+=4;
359}
e2b5e7aa 360
361static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 362{
363 assert(rd<16);
364 assert(rn<16);
365 assert(rm<16);
366 return((rn<<16)|(rd<<12)|rm);
367}
e2b5e7aa 368
369static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 370{
371 assert(rd<16);
372 assert(rn<16);
373 assert(imm<256);
374 assert((shift&1)==0);
375 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
376}
e2b5e7aa 377
378static u_int genimm(u_int imm,u_int *encoded)
57871462 379{
c2e3bd42 380 *encoded=0;
381 if(imm==0) return 1;
57871462 382 int i=32;
383 while(i>0)
384 {
385 if(imm<256) {
386 *encoded=((i&30)<<7)|imm;
387 return 1;
388 }
389 imm=(imm>>2)|(imm<<30);i-=2;
390 }
391 return 0;
392}
e2b5e7aa 393
394static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 395{
396 u_int ret=genimm(imm,encoded);
397 assert(ret);
581335b0 398 (void)ret;
cfbd3c6e 399}
e2b5e7aa 400
401static u_int genjmp(u_int addr)
57871462 402{
7c3a5182 403 if (addr < 3) return 0; // a branch that will be patched later
404 int offset = addr-(int)out-8;
405 if (offset < -33554432 || offset >= 33554432) {
406 SysPrintf("genjmp: out of range: %08x\n", offset);
407 abort();
e80343e2 408 return 0;
409 }
57871462 410 return ((u_int)offset>>2)&0xffffff;
411}
412
d1e4ebd9 413static unused void emit_breakpoint(void)
414{
415 assem_debug("bkpt #0\n");
416 //output_w32(0xe1200070);
417 output_w32(0xe7f001f0);
418}
419
e2b5e7aa 420static void emit_mov(int rs,int rt)
57871462 421{
422 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
423 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
424}
425
e2b5e7aa 426static void emit_movs(int rs,int rt)
57871462 427{
428 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_add(int rs1,int rs2,int rt)
57871462 433{
434 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
435 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
436}
437
39b71d9a 438static void emit_adds(int rs1,int rs2,int rt)
439{
440 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
442}
443#define emit_adds_ptr emit_adds
444
e2b5e7aa 445static void emit_adcs(int rs1,int rs2,int rt)
57871462 446{
447 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
448 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
449}
450
e2b5e7aa 451static void emit_neg(int rs, int rt)
57871462 452{
453 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
455}
456
e2b5e7aa 457static void emit_sub(int rs1,int rs2,int rt)
57871462 458{
459 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
460 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
461}
462
e2b5e7aa 463static void emit_zeroreg(int rt)
57871462 464{
465 assem_debug("mov %s,#0\n",regname[rt]);
466 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
467}
468
e2b5e7aa 469static void emit_loadlp(u_int imm,u_int rt)
790ee18e 470{
471 add_literal((int)out,imm);
472 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
473 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
474}
e2b5e7aa 475
476static void emit_movw(u_int imm,u_int rt)
790ee18e 477{
478 assert(imm<65536);
479 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
480 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
481}
e2b5e7aa 482
483static void emit_movt(u_int imm,u_int rt)
790ee18e 484{
485 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
486 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
487}
e2b5e7aa 488
489static void emit_movimm(u_int imm,u_int rt)
790ee18e 490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
665f33e1 499 #ifndef HAVE_ARMV7
790ee18e 500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
665f33e1 508 #ifndef HAVE_ARMV7
790ee18e 509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
e2b5e7aa 516
517static void emit_pcreladdr(u_int rt)
790ee18e 518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
e2b5e7aa 523static void emit_loadreg(int r, int hr)
57871462 524{
3d624f89 525 if(r&64) {
c43b5311 526 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 527 assert(0);
528 return;
3d624f89 529 }
57871462 530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
7c3a5182 533 int addr = (int)&psxRegs.GPR.r[r];
534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
537 case CCREG: addr = (int)&cycle_count; break;
538 case CSREG: addr = (int)&Status; break;
539 case INVCP: addr = (int)&invc_ptr; break;
37387d8b 540 case ROREG: addr = (int)&ram_offset; break;
7c3a5182 541 default: assert(r < 34); break;
542 }
57871462 543 u_int offset = addr-(u_int)&dynarec_local;
544 assert(offset<4096);
545 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
546 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
547 }
548}
e2b5e7aa 549
550static void emit_storereg(int r, int hr)
57871462 551{
3d624f89 552 if(r&64) {
c43b5311 553 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 554 assert(0);
555 return;
3d624f89 556 }
7c3a5182 557 int addr = (int)&psxRegs.GPR.r[r];
558 switch (r) {
559 //case HIREG: addr = &hi; break;
560 //case LOREG: addr = &lo; break;
561 case CCREG: addr = (int)&cycle_count; break;
562 default: assert(r < 34); break;
563 }
57871462 564 u_int offset = addr-(u_int)&dynarec_local;
565 assert(offset<4096);
566 assem_debug("str %s,fp+%d\n",regname[hr],offset);
567 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
568}
569
e2b5e7aa 570static void emit_test(int rs, int rt)
57871462 571{
572 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
573 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
574}
575
e2b5e7aa 576static void emit_testimm(int rs,int imm)
57871462 577{
578 u_int armval;
5a05d80c 579 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 580 genimm_checked(imm,&armval);
57871462 581 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
582}
583
e2b5e7aa 584static void emit_testeqimm(int rs,int imm)
b9b61529 585{
586 u_int armval;
587 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 588 genimm_checked(imm,&armval);
b9b61529 589 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
590}
591
e2b5e7aa 592static void emit_not(int rs,int rt)
57871462 593{
594 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
595 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
596}
597
e2b5e7aa 598static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 599{
600 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
601 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
602}
603
e2b5e7aa 604static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 605{
606 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
608}
e2b5e7aa 609
e2b5e7aa 610static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 611{
612 assert(rs<16);
613 assert(rt<16);
614 assert(imm<32);
615 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
616 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
617}
618
e2b5e7aa 619static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 620{
621 assert(rs<16);
622 assert(rt<16);
623 assert(imm<32);
624 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
625 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
626}
627
e2b5e7aa 628static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 629{
630 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
631 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
632}
633
3968e69e 634static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
635{
636 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
637 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
638}
639
e2b5e7aa 640static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 641{
642 assert(rs<16);
643 assert(rt<16);
644 if(imm!=0) {
57871462 645 u_int armval;
646 if(genimm(imm,&armval)) {
647 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
648 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
649 }else if(genimm(-imm,&armval)) {
8a0a8423 650 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 651 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 652 #ifdef HAVE_ARMV7
653 }else if(rt!=rs&&(u_int)imm<65536) {
654 emit_movw(imm&0x0000ffff,rt);
655 emit_add(rs,rt,rt);
656 }else if(rt!=rs&&(u_int)-imm<65536) {
657 emit_movw(-imm&0x0000ffff,rt);
658 emit_sub(rs,rt,rt);
659 #endif
660 }else if((u_int)-imm<65536) {
57871462 661 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
662 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
663 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
664 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 665 }else {
666 do {
667 int shift = (ffs(imm) - 1) & ~1;
668 int imm8 = imm & (0xff << shift);
669 genimm_checked(imm8,&armval);
670 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
671 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
672 rs = rt;
673 imm &= ~imm8;
674 }
675 while (imm != 0);
57871462 676 }
677 }
678 else if(rs!=rt) emit_mov(rs,rt);
679}
680
e2b5e7aa 681static void emit_addimm_and_set_flags(int imm,int rt)
57871462 682{
683 assert(imm>-65536&&imm<65536);
684 u_int armval;
685 if(genimm(imm,&armval)) {
686 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
687 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
688 }else if(genimm(-imm,&armval)) {
689 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
690 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
691 }else if(imm<0) {
692 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
693 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
694 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
695 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
696 }else{
697 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
698 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
699 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
700 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
701 }
702}
e2b5e7aa 703
e2b5e7aa 704static void emit_addnop(u_int r)
57871462 705{
706 assert(r<16);
707 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
708 output_w32(0xe2800000|rd_rn_rm(r,r,0));
709}
710
e2b5e7aa 711static void emit_andimm(int rs,int imm,int rt)
57871462 712{
713 u_int armval;
790ee18e 714 if(imm==0) {
715 emit_zeroreg(rt);
716 }else if(genimm(imm,&armval)) {
57871462 717 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
718 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
719 }else if(genimm(~imm,&armval)) {
720 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(imm==65535) {
332a4533 723 #ifndef HAVE_ARMV6
57871462 724 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
725 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
726 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
727 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
728 #else
729 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
730 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
731 #endif
732 }else{
733 assert(imm>0&&imm<65535);
665f33e1 734 #ifndef HAVE_ARMV7
57871462 735 assem_debug("mov r14,#%d\n",imm&0xFF00);
736 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
737 assem_debug("add r14,r14,#%d\n",imm&0xFF);
738 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
739 #else
740 emit_movw(imm,HOST_TEMPREG);
741 #endif
742 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
743 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
744 }
745}
746
e2b5e7aa 747static void emit_orimm(int rs,int imm,int rt)
57871462 748{
749 u_int armval;
790ee18e 750 if(imm==0) {
751 if(rs!=rt) emit_mov(rs,rt);
752 }else if(genimm(imm,&armval)) {
57871462 753 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
754 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
755 }else{
756 assert(imm>0&&imm<65536);
757 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
758 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
759 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
760 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
761 }
762}
763
e2b5e7aa 764static void emit_xorimm(int rs,int imm,int rt)
57871462 765{
57871462 766 u_int armval;
790ee18e 767 if(imm==0) {
768 if(rs!=rt) emit_mov(rs,rt);
769 }else if(genimm(imm,&armval)) {
57871462 770 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
771 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
772 }else{
514ed0d9 773 assert(imm>0&&imm<65536);
57871462 774 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
775 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
776 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
777 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
778 }
779}
780
e2b5e7aa 781static void emit_shlimm(int rs,u_int imm,int rt)
57871462 782{
783 assert(imm>0);
784 assert(imm<32);
785 //if(imm==1) ...
786 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
787 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
788}
789
e2b5e7aa 790static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 791{
792 assert(imm>0);
793 assert(imm<32);
794 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
795 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
796}
797
e2b5e7aa 798static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 799{
800 assert(imm>0);
801 assert(imm<32);
802 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
803 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
804}
805
e2b5e7aa 806static void emit_shrimm(int rs,u_int imm,int rt)
57871462 807{
808 assert(imm>0);
809 assert(imm<32);
810 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
811 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
812}
813
e2b5e7aa 814static void emit_sarimm(int rs,u_int imm,int rt)
57871462 815{
816 assert(imm>0);
817 assert(imm<32);
818 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
819 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
820}
821
e2b5e7aa 822static void emit_rorimm(int rs,u_int imm,int rt)
57871462 823{
824 assert(imm>0);
825 assert(imm<32);
826 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
827 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
828}
829
e2b5e7aa 830static void emit_signextend16(int rs,int rt)
b9b61529 831{
332a4533 832 #ifndef HAVE_ARMV6
b9b61529 833 emit_shlimm(rs,16,rt);
834 emit_sarimm(rt,16,rt);
835 #else
836 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
837 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
838 #endif
839}
840
e2b5e7aa 841static void emit_signextend8(int rs,int rt)
c6c3b1b3 842{
332a4533 843 #ifndef HAVE_ARMV6
c6c3b1b3 844 emit_shlimm(rs,24,rt);
845 emit_sarimm(rt,24,rt);
846 #else
847 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
e2b5e7aa 852static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 853{
854 assert(rs<16);
855 assert(rt<16);
856 assert(shift<16);
857 //if(imm==1) ...
858 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
859 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
860}
e2b5e7aa 861
862static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 863{
864 assert(rs<16);
865 assert(rt<16);
866 assert(shift<16);
867 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
868 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
869}
e2b5e7aa 870
871static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 872{
873 assert(rs<16);
874 assert(rt<16);
875 assert(shift<16);
876 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
877 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
878}
57871462 879
3968e69e 880static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 881{
882 assert(rs<16);
883 assert(rt<16);
884 assert(shift<16);
885 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
886 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
887}
e2b5e7aa 888
3968e69e 889static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 890{
891 assert(rs<16);
892 assert(rt<16);
893 assert(shift<16);
894 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
895 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
896}
897
e2b5e7aa 898static void emit_cmpimm(int rs,int imm)
57871462 899{
900 u_int armval;
901 if(genimm(imm,&armval)) {
5a05d80c 902 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 903 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
904 }else if(genimm(-imm,&armval)) {
5a05d80c 905 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 906 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
907 }else if(imm>0) {
908 assert(imm<65536);
57871462 909 emit_movimm(imm,HOST_TEMPREG);
57871462 910 assem_debug("cmp %s,r14\n",regname[rs]);
911 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
912 }else{
913 assert(imm>-65536);
57871462 914 emit_movimm(-imm,HOST_TEMPREG);
57871462 915 assem_debug("cmn %s,r14\n",regname[rs]);
916 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
917 }
918}
919
e2b5e7aa 920static void emit_cmovne_imm(int imm,int rt)
57871462 921{
922 assem_debug("movne %s,#%d\n",regname[rt],imm);
923 u_int armval;
cfbd3c6e 924 genimm_checked(imm,&armval);
57871462 925 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
926}
e2b5e7aa 927
928static void emit_cmovl_imm(int imm,int rt)
57871462 929{
930 assem_debug("movlt %s,#%d\n",regname[rt],imm);
931 u_int armval;
cfbd3c6e 932 genimm_checked(imm,&armval);
57871462 933 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
934}
e2b5e7aa 935
936static void emit_cmovb_imm(int imm,int rt)
57871462 937{
938 assem_debug("movcc %s,#%d\n",regname[rt],imm);
939 u_int armval;
cfbd3c6e 940 genimm_checked(imm,&armval);
57871462 941 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
942}
e2b5e7aa 943
3968e69e 944static void emit_cmovae_imm(int imm,int rt)
945{
946 assem_debug("movcs %s,#%d\n",regname[rt],imm);
947 u_int armval;
948 genimm_checked(imm,&armval);
949 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
950}
951
9c997d19 952static void emit_cmovs_imm(int imm,int rt)
953{
954 assem_debug("movmi %s,#%d\n",regname[rt],imm);
955 u_int armval;
956 genimm_checked(imm,&armval);
957 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
958}
959
e2b5e7aa 960static void emit_cmovne_reg(int rs,int rt)
57871462 961{
962 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
963 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
964}
e2b5e7aa 965
966static void emit_cmovl_reg(int rs,int rt)
57871462 967{
968 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
969 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
970}
e2b5e7aa 971
e3c6bdb5 972static void emit_cmovb_reg(int rs,int rt)
973{
974 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
975 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
976}
977
e2b5e7aa 978static void emit_cmovs_reg(int rs,int rt)
57871462 979{
980 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
981 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
982}
983
e2b5e7aa 984static void emit_slti32(int rs,int imm,int rt)
57871462 985{
986 if(rs!=rt) emit_zeroreg(rt);
987 emit_cmpimm(rs,imm);
988 if(rs==rt) emit_movimm(0,rt);
989 emit_cmovl_imm(1,rt);
990}
e2b5e7aa 991
992static void emit_sltiu32(int rs,int imm,int rt)
57871462 993{
994 if(rs!=rt) emit_zeroreg(rt);
995 emit_cmpimm(rs,imm);
996 if(rs==rt) emit_movimm(0,rt);
997 emit_cmovb_imm(1,rt);
998}
e2b5e7aa 999
e2b5e7aa 1000static void emit_cmp(int rs,int rt)
57871462 1001{
1002 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1003 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1004}
e2b5e7aa 1005
1006static void emit_set_gz32(int rs, int rt)
57871462 1007{
1008 //assem_debug("set_gz32\n");
1009 emit_cmpimm(rs,1);
1010 emit_movimm(1,rt);
1011 emit_cmovl_imm(0,rt);
1012}
e2b5e7aa 1013
1014static void emit_set_nz32(int rs, int rt)
57871462 1015{
1016 //assem_debug("set_nz32\n");
1017 if(rs!=rt) emit_movs(rs,rt);
1018 else emit_test(rs,rs);
1019 emit_cmovne_imm(1,rt);
1020}
e2b5e7aa 1021
e2b5e7aa 1022static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1023{
1024 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1025 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1026 emit_cmp(rs1,rs2);
1027 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1028 emit_cmovl_imm(1,rt);
1029}
e2b5e7aa 1030
1031static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1032{
1033 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1034 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1035 emit_cmp(rs1,rs2);
1036 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1037 emit_cmovb_imm(1,rt);
1038}
e2b5e7aa 1039
2a014d73 1040static int can_jump_or_call(const void *a)
1041{
1042 intptr_t offset = (u_char *)a - out - 8;
1043 return (-33554432 <= offset && offset < 33554432);
1044}
1045
643aeae3 1046static void emit_call(const void *a_)
57871462 1047{
643aeae3 1048 int a = (int)a_;
d1e4ebd9 1049 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1050 u_int offset=genjmp(a);
1051 output_w32(0xeb000000|offset);
1052}
e2b5e7aa 1053
b14b6a8f 1054static void emit_jmp(const void *a_)
57871462 1055{
b14b6a8f 1056 int a = (int)a_;
d1e4ebd9 1057 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1058 u_int offset=genjmp(a);
1059 output_w32(0xea000000|offset);
1060}
e2b5e7aa 1061
643aeae3 1062static void emit_jne(const void *a_)
57871462 1063{
643aeae3 1064 int a = (int)a_;
57871462 1065 assem_debug("bne %x\n",a);
1066 u_int offset=genjmp(a);
1067 output_w32(0x1a000000|offset);
1068}
e2b5e7aa 1069
7c3a5182 1070static void emit_jeq(const void *a_)
57871462 1071{
7c3a5182 1072 int a = (int)a_;
57871462 1073 assem_debug("beq %x\n",a);
1074 u_int offset=genjmp(a);
1075 output_w32(0x0a000000|offset);
1076}
e2b5e7aa 1077
7c3a5182 1078static void emit_js(const void *a_)
57871462 1079{
7c3a5182 1080 int a = (int)a_;
57871462 1081 assem_debug("bmi %x\n",a);
1082 u_int offset=genjmp(a);
1083 output_w32(0x4a000000|offset);
1084}
e2b5e7aa 1085
7c3a5182 1086static void emit_jns(const void *a_)
57871462 1087{
7c3a5182 1088 int a = (int)a_;
57871462 1089 assem_debug("bpl %x\n",a);
1090 u_int offset=genjmp(a);
1091 output_w32(0x5a000000|offset);
1092}
e2b5e7aa 1093
7c3a5182 1094static void emit_jl(const void *a_)
57871462 1095{
7c3a5182 1096 int a = (int)a_;
57871462 1097 assem_debug("blt %x\n",a);
1098 u_int offset=genjmp(a);
1099 output_w32(0xba000000|offset);
1100}
e2b5e7aa 1101
7c3a5182 1102static void emit_jge(const void *a_)
57871462 1103{
7c3a5182 1104 int a = (int)a_;
57871462 1105 assem_debug("bge %x\n",a);
1106 u_int offset=genjmp(a);
1107 output_w32(0xaa000000|offset);
1108}
e2b5e7aa 1109
7c3a5182 1110static void emit_jno(const void *a_)
57871462 1111{
7c3a5182 1112 int a = (int)a_;
57871462 1113 assem_debug("bvc %x\n",a);
1114 u_int offset=genjmp(a);
1115 output_w32(0x7a000000|offset);
1116}
e2b5e7aa 1117
7c3a5182 1118static void emit_jc(const void *a_)
57871462 1119{
7c3a5182 1120 int a = (int)a_;
57871462 1121 assem_debug("bcs %x\n",a);
1122 u_int offset=genjmp(a);
1123 output_w32(0x2a000000|offset);
1124}
e2b5e7aa 1125
7c3a5182 1126static void emit_jcc(const void *a_)
57871462 1127{
b14b6a8f 1128 int a = (int)a_;
57871462 1129 assem_debug("bcc %x\n",a);
1130 u_int offset=genjmp(a);
1131 output_w32(0x3a000000|offset);
1132}
1133
3968e69e 1134static unused void emit_callreg(u_int r)
57871462 1135{
c6c3b1b3 1136 assert(r<15);
1137 assem_debug("blx %s\n",regname[r]);
1138 output_w32(0xe12fff30|r);
57871462 1139}
e2b5e7aa 1140
1141static void emit_jmpreg(u_int r)
57871462 1142{
1143 assem_debug("mov pc,%s\n",regname[r]);
1144 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1145}
1146
be516ebe 1147static void emit_ret(void)
1148{
1149 emit_jmpreg(14);
1150}
1151
e2b5e7aa 1152static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1153{
1154 assert(offset>-4096&&offset<4096);
1155 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1156 if(offset>=0) {
1157 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1158 }else{
1159 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1160 }
1161}
e2b5e7aa 1162
1163static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1164{
1165 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1167}
39b71d9a 1168#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1169
1170static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1171{
1172 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1174}
e2b5e7aa 1175
1176static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1177{
1178 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1180}
e2b5e7aa 1181
37387d8b 1182static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1183{
1184 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1186}
1187
e2b5e7aa 1188static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1189{
1190 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1192}
e2b5e7aa 1193
37387d8b 1194static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1195{
1196 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1197 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1198}
1199
e2b5e7aa 1200static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1201{
1202 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1203 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1204}
e2b5e7aa 1205
37387d8b 1206static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1207{
1208 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1209 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1210}
1211
e2b5e7aa 1212static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1213{
1214 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1215 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1216}
e2b5e7aa 1217
37387d8b 1218static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1219{
1220 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1221 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1222}
1223
e2b5e7aa 1224static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1225{
1226 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1227 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1228}
1229
1230static void emit_str_dualindexed(int rs1, int rs2, int rt)
1231{
1232 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1233 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1234}
1235
1236static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1237{
1238 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1239 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1240}
1241
1242static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1243{
1244 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1245 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1246}
e2b5e7aa 1247
e2b5e7aa 1248static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1249{
1250 assert(offset>-256&&offset<256);
1251 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1252 if(offset>=0) {
1253 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1254 }else{
1255 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1256 }
1257}
e2b5e7aa 1258
e2b5e7aa 1259static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1260{
1261 assert(offset>-256&&offset<256);
1262 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1263 if(offset>=0) {
1264 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1265 }else{
1266 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1267 }
1268}
e2b5e7aa 1269
1270static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1271{
1272 assert(offset>-4096&&offset<4096);
1273 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1274 if(offset>=0) {
1275 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1276 }else{
1277 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1278 }
1279}
e2b5e7aa 1280
e2b5e7aa 1281static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1282{
1283 assert(offset>-256&&offset<256);
1284 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1285 if(offset>=0) {
1286 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1287 }else{
1288 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1289 }
1290}
e2b5e7aa 1291
054175e9 1292static void emit_ldrd(int offset, int rs, int rt)
1293{
1294 assert(offset>-256&&offset<256);
1295 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1296 if(offset>=0) {
1297 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1298 }else{
1299 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1300 }
1301}
e2b5e7aa 1302
643aeae3 1303static void emit_readword(void *addr, int rt)
57871462 1304{
643aeae3 1305 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1306 assert(offset<4096);
1307 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1308 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1309}
39b71d9a 1310#define emit_readptr emit_readword
e2b5e7aa 1311
e2b5e7aa 1312static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1313{
1314 assert(offset>-4096&&offset<4096);
1315 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1316 if(offset>=0) {
1317 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1318 }else{
1319 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1320 }
1321}
e2b5e7aa 1322
e2b5e7aa 1323static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1324{
1325 assert(offset>-256&&offset<256);
1326 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1327 if(offset>=0) {
1328 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1329 }else{
1330 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1331 }
1332}
e2b5e7aa 1333
1334static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1335{
1336 assert(offset>-4096&&offset<4096);
1337 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1338 if(offset>=0) {
1339 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1340 }else{
1341 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1342 }
1343}
e2b5e7aa 1344
e2b5e7aa 1345static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1346{
1347 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1348 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1349}
e2b5e7aa 1350
1351static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1352{
1353 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1354 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1355}
e2b5e7aa 1356
1357static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1358{
1359 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1360 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1361}
e2b5e7aa 1362
643aeae3 1363static void emit_writeword(int rt, void *addr)
57871462 1364{
643aeae3 1365 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1366 assert(offset<4096);
1367 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1368 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1369}
e2b5e7aa 1370
e2b5e7aa 1371static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1372{
1373 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1374 assert(rs1<16);
1375 assert(rs2<16);
1376 assert(hi<16);
1377 assert(lo<16);
1378 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1379}
e2b5e7aa 1380
1381static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1382{
1383 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1384 assert(rs1<16);
1385 assert(rs2<16);
1386 assert(hi<16);
1387 assert(lo<16);
1388 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1389}
1390
e2b5e7aa 1391static void emit_clz(int rs,int rt)
57871462 1392{
1393 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1394 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1395}
1396
e2b5e7aa 1397static void emit_subcs(int rs1,int rs2,int rt)
57871462 1398{
1399 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1400 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1401}
1402
e2b5e7aa 1403static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1404{
1405 assert(imm>0);
1406 assert(imm<32);
1407 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1408 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1409}
1410
e2b5e7aa 1411static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1412{
1413 assert(imm>0);
1414 assert(imm<32);
1415 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1416 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1417}
1418
e2b5e7aa 1419static void emit_negmi(int rs, int rt)
57871462 1420{
1421 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1422 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1423}
1424
e2b5e7aa 1425static void emit_negsmi(int rs, int rt)
57871462 1426{
1427 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1428 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1429}
1430
e2b5e7aa 1431static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1432{
1433 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1434 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1435}
1436
e2b5e7aa 1437static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1438{
1439 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1440 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1441}
1442
e2b5e7aa 1443static void emit_teq(int rs, int rt)
57871462 1444{
1445 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1446 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1447}
1448
3968e69e 1449static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1450{
1451 u_int armval;
cfbd3c6e 1452 genimm_checked(imm,&armval);
57871462 1453 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1454 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1455}
1456
57871462 1457// Conditionally select one of two immediates, optimizing for small code size
1458// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1459static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1460{
1461 u_int armval;
1462 if(genimm(imm2-imm1,&armval)) {
1463 emit_movimm(imm1,rt);
1464 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1465 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1466 }else if(genimm(imm1-imm2,&armval)) {
1467 emit_movimm(imm1,rt);
1468 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1469 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1470 }
1471 else {
665f33e1 1472 #ifndef HAVE_ARMV7
57871462 1473 emit_movimm(imm1,rt);
1474 add_literal((int)out,imm2);
1475 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1476 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1477 #else
1478 emit_movw(imm1&0x0000FFFF,rt);
1479 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1480 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1481 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1482 }
1483 emit_movt(imm1&0xFFFF0000,rt);
1484 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1485 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1486 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1487 }
1488 #endif
1489 }
1490}
1491
57871462 1492// special case for checking invalid_code
e2b5e7aa 1493static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1494{
1495 assert(imm<128&&imm>=0);
1496 assert(r>=0&&r<16);
1497 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1498 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1499 emit_cmpimm(HOST_TEMPREG,imm);
1500}
1501
e2b5e7aa 1502static void emit_callne(int a)
0bbd1454 1503{
1504 assem_debug("blne %x\n",a);
1505 u_int offset=genjmp(a);
1506 output_w32(0x1b000000|offset);
1507}
1508
57871462 1509// Used to preload hash table entries
e2b5e7aa 1510static unused void emit_prefetchreg(int r)
57871462 1511{
1512 assem_debug("pld %s\n",regname[r]);
1513 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1514}
1515
1516// Special case for mini_ht
e2b5e7aa 1517static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1518{
1519 assert(offset<4096);
1520 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1521 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1522}
1523
e2b5e7aa 1524static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1525{
1526 u_int armval;
cfbd3c6e 1527 genimm_checked(imm,&armval);
b9b61529 1528 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1529 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1530}
1531
e2b5e7aa 1532static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1533{
1534 u_int armval;
1535 genimm_checked(imm,&armval);
1536 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1537 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1538}
1539
e2b5e7aa 1540static void emit_jno_unlikely(int a)
57871462 1541{
1542 //emit_jno(a);
1543 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1544 output_w32(0x72800000|rd_rn_rm(15,15,0));
1545}
1546
054175e9 1547static void save_regs_all(u_int reglist)
57871462 1548{
054175e9 1549 int i;
57871462 1550 if(!reglist) return;
1551 assem_debug("stmia fp,{");
054175e9 1552 for(i=0;i<16;i++)
1553 if(reglist&(1<<i))
1554 assem_debug("r%d,",i);
57871462 1555 assem_debug("}\n");
1556 output_w32(0xe88b0000|reglist);
1557}
e2b5e7aa 1558
054175e9 1559static void restore_regs_all(u_int reglist)
57871462 1560{
054175e9 1561 int i;
57871462 1562 if(!reglist) return;
1563 assem_debug("ldmia fp,{");
054175e9 1564 for(i=0;i<16;i++)
1565 if(reglist&(1<<i))
1566 assem_debug("r%d,",i);
57871462 1567 assem_debug("}\n");
1568 output_w32(0xe89b0000|reglist);
1569}
e2b5e7aa 1570
054175e9 1571// Save registers before function call
1572static void save_regs(u_int reglist)
1573{
4d646738 1574 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1575 save_regs_all(reglist);
1576}
e2b5e7aa 1577
054175e9 1578// Restore registers after function call
1579static void restore_regs(u_int reglist)
1580{
4d646738 1581 reglist&=CALLER_SAVE_REGS;
054175e9 1582 restore_regs_all(reglist);
1583}
57871462 1584
57871462 1585/* Stubs/epilogue */
1586
e2b5e7aa 1587static void literal_pool(int n)
57871462 1588{
1589 if(!literalcount) return;
1590 if(n) {
1591 if((int)out-literals[0][0]<4096-n) return;
1592 }
1593 u_int *ptr;
1594 int i;
1595 for(i=0;i<literalcount;i++)
1596 {
77750690 1597 u_int l_addr=(u_int)out;
1598 int j;
1599 for(j=0;j<i;j++) {
1600 if(literals[j][1]==literals[i][1]) {
1601 //printf("dup %08x\n",literals[i][1]);
1602 l_addr=literals[j][0];
1603 break;
1604 }
1605 }
57871462 1606 ptr=(u_int *)literals[i][0];
77750690 1607 u_int offset=l_addr-(u_int)ptr-8;
57871462 1608 assert(offset<4096);
1609 assert(!(offset&3));
1610 *ptr|=offset;
77750690 1611 if(l_addr==(u_int)out) {
1612 literals[i][0]=l_addr; // remember for dupes
1613 output_w32(literals[i][1]);
1614 }
57871462 1615 }
1616 literalcount=0;
1617}
1618
e2b5e7aa 1619static void literal_pool_jumpover(int n)
57871462 1620{
1621 if(!literalcount) return;
1622 if(n) {
1623 if((int)out-literals[0][0]<4096-n) return;
1624 }
df4dc2b1 1625 void *jaddr = out;
57871462 1626 emit_jmp(0);
1627 literal_pool(0);
df4dc2b1 1628 set_jump_target(jaddr, out);
57871462 1629}
1630
7c3a5182 1631// parsed by get_pointer, find_extjump_insn
1632static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1633{
1634 u_char *ptr=(u_char *)addr;
1635 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1636 (void)ptr;
1637
57871462 1638 emit_loadlp(target,0);
643aeae3 1639 emit_loadlp((u_int)addr,1);
d62c125a 1640 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1641 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1642//DEBUG >
1643#ifdef DEBUG_CYCLE_COUNT
643aeae3 1644 emit_readword(&last_count,ECX);
57871462 1645 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1646 emit_readword(&next_interupt,ECX);
1647 emit_writeword(HOST_CCREG,&Count);
57871462 1648 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1649 emit_writeword(ECX,&last_count);
57871462 1650#endif
1651//DEBUG <
2a014d73 1652 emit_far_jump(linker);
57871462 1653}
1654
d1e4ebd9 1655static void check_extjump2(void *src)
1656{
1657 u_int *ptr = src;
1658 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1659 (void)ptr;
1660}
1661
13e35c04 1662// put rt_val into rt, potentially making use of rs with value rs_val
1663static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1664{
8575a877 1665 u_int armval;
1666 int diff;
1667 if(genimm(rt_val,&armval)) {
1668 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1669 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1670 return;
1671 }
1672 if(genimm(~rt_val,&armval)) {
1673 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1674 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1675 return;
1676 }
1677 diff=rt_val-rs_val;
1678 if(genimm(diff,&armval)) {
1679 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1680 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1681 return;
1682 }else if(genimm(-diff,&armval)) {
1683 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1684 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1685 return;
1686 }
1687 emit_movimm(rt_val,rt);
1688}
1689
1690// return 1 if above function can do it's job cheaply
1691static int is_similar_value(u_int v1,u_int v2)
1692{
13e35c04 1693 u_int xs;
8575a877 1694 int diff;
1695 if(v1==v2) return 1;
1696 diff=v2-v1;
1697 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1698 ;
8575a877 1699 if(xs<0x100) return 1;
1700 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1701 ;
1702 if(xs<0x100) return 1;
1703 return 0;
13e35c04 1704}
cbbab9cd 1705
b14b6a8f 1706static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1707{
1708 switch(type) {
1709 case LOADB_STUB: emit_signextend8(rs,rt); break;
1710 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1711 case LOADH_STUB: emit_signextend16(rs,rt); break;
1712 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1713 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1714 default: assert(0);
1715 }
1716}
1717
b1be1eee 1718#include "pcsxmem.h"
1719#include "pcsxmem_inline.c"
b1be1eee 1720
e2b5e7aa 1721static void do_readstub(int n)
57871462 1722{
b14b6a8f 1723 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1724 literal_pool(256);
b14b6a8f 1725 set_jump_target(stubs[n].addr, out);
1726 enum stub_type type=stubs[n].type;
1727 int i=stubs[n].a;
1728 int rs=stubs[n].b;
81dbbf4c 1729 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1730 u_int reglist=stubs[n].e;
81dbbf4c 1731 const signed char *i_regmap=i_regs->regmap;
581335b0 1732 int rt;
cf95b4f0 1733 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1734 rt=get_reg(i_regmap,FTEMP);
1735 }else{
cf95b4f0 1736 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1737 }
1738 assert(rs>=0);
df4dc2b1 1739 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1740 void *restore_jump = NULL;
c6c3b1b3 1741 reglist|=(1<<rs);
1742 for(r=0;r<=12;r++) {
1743 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1744 temp=r; break;
1745 }
1746 }
cf95b4f0 1747 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1748 reglist&=~(1<<rt);
1749 if(temp==-1) {
1750 save_regs(reglist);
1751 regs_saved=1;
1752 temp=(rs==0)?2:0;
1753 }
1754 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1755 temp2=1;
643aeae3 1756 emit_readword(&mem_rtab,temp);
c6c3b1b3 1757 emit_shrimm(rs,12,temp2);
1758 emit_readword_dualindexedx4(temp,temp2,temp2);
1759 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1760 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1761 switch(type) {
1762 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1763 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1764 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1765 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1766 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1767 default: assert(0);
c6c3b1b3 1768 }
1769 }
1770 if(regs_saved) {
df4dc2b1 1771 restore_jump=out;
c6c3b1b3 1772 emit_jcc(0); // jump to reg restore
1773 }
1774 else
b14b6a8f 1775 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1776
1777 if(!regs_saved)
1778 save_regs(reglist);
643aeae3 1779 void *handler=NULL;
c6c3b1b3 1780 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1781 handler=jump_handler_read8;
c6c3b1b3 1782 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1783 handler=jump_handler_read16;
c6c3b1b3 1784 if(type==LOADW_STUB)
643aeae3 1785 handler=jump_handler_read32;
1786 assert(handler);
b96d3df7 1787 pass_args(rs,temp2);
c6c3b1b3 1788 int cc=get_reg(i_regmap,CCREG);
1789 if(cc<0)
1790 emit_loadreg(CCREG,2);
2330734f 1791 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1792 emit_far_call(handler);
cf95b4f0 1793 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1794 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1795 }
1796 if(restore_jump)
df4dc2b1 1797 set_jump_target(restore_jump, out);
c6c3b1b3 1798 restore_regs(reglist);
b14b6a8f 1799 emit_jmp(stubs[n].retaddr); // return address
57871462 1800}
1801
81dbbf4c 1802static void inline_readstub(enum stub_type type, int i, u_int addr,
1803 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1804{
1805 int rs=get_reg(regmap,target);
57871462 1806 int rt=get_reg(regmap,target);
535d208a 1807 if(rs<0) rs=get_reg(regmap,-1);
57871462 1808 assert(rs>=0);
2a014d73 1809 u_int is_dynamic;
687b4580 1810 uintptr_t host_addr = 0;
643aeae3 1811 void *handler;
b1be1eee 1812 int cc=get_reg(regmap,CCREG);
2330734f 1813 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
b1be1eee 1814 return;
643aeae3 1815 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1816 if (handler == NULL) {
cf95b4f0 1817 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1818 return;
13e35c04 1819 if(addr!=host_addr)
1820 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1821 switch(type) {
1822 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1823 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1824 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1825 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1826 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1827 default: assert(0);
1828 }
1829 return;
1830 }
b1be1eee 1831 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1832 if(is_dynamic) {
1833 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1834 handler=jump_handler_read8;
b1be1eee 1835 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1836 handler=jump_handler_read16;
b1be1eee 1837 if(type==LOADW_STUB)
643aeae3 1838 handler=jump_handler_read32;
b1be1eee 1839 }
c6c3b1b3 1840
1841 // call a memhandler
cf95b4f0 1842 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1843 reglist&=~(1<<rt);
1844 save_regs(reglist);
1845 if(target==0)
1846 emit_movimm(addr,0);
1847 else if(rs!=0)
1848 emit_mov(rs,0);
b1be1eee 1849 if(cc<0)
1850 emit_loadreg(CCREG,2);
1851 if(is_dynamic) {
1852 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1853 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1854 }
b1be1eee 1855 else {
643aeae3 1856 emit_readword(&last_count,3);
2330734f 1857 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1858 emit_add(2,3,2);
643aeae3 1859 emit_writeword(2,&Count);
b1be1eee 1860 }
1861
2a014d73 1862 emit_far_call(handler);
b1be1eee 1863
cf95b4f0 1864 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1865 switch(type) {
1866 case LOADB_STUB: emit_signextend8(0,rt); break;
1867 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1868 case LOADH_STUB: emit_signextend16(0,rt); break;
1869 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1870 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1871 default: assert(0);
1872 }
1873 }
1874 restore_regs(reglist);
57871462 1875}
1876
e2b5e7aa 1877static void do_writestub(int n)
57871462 1878{
b14b6a8f 1879 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1880 literal_pool(256);
b14b6a8f 1881 set_jump_target(stubs[n].addr, out);
1882 enum stub_type type=stubs[n].type;
1883 int i=stubs[n].a;
1884 int rs=stubs[n].b;
81dbbf4c 1885 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1886 u_int reglist=stubs[n].e;
81dbbf4c 1887 const signed char *i_regmap=i_regs->regmap;
581335b0 1888 int rt,r;
cf95b4f0 1889 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1890 rt=get_reg(i_regmap,r=FTEMP);
1891 }else{
cf95b4f0 1892 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1893 }
1894 assert(rs>=0);
1895 assert(rt>=0);
b14b6a8f 1896 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1897 void *restore_jump = NULL;
b96d3df7 1898 int reglist2=reglist|(1<<rs)|(1<<rt);
1899 for(rtmp=0;rtmp<=12;rtmp++) {
1900 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1901 temp=rtmp; break;
1902 }
1903 }
1904 if(temp==-1) {
1905 save_regs(reglist);
1906 regs_saved=1;
1907 for(rtmp=0;rtmp<=3;rtmp++)
1908 if(rtmp!=rs&&rtmp!=rt)
1909 {temp=rtmp;break;}
1910 }
1911 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1912 temp2=3;
643aeae3 1913 emit_readword(&mem_wtab,temp);
b96d3df7 1914 emit_shrimm(rs,12,temp2);
1915 emit_readword_dualindexedx4(temp,temp2,temp2);
1916 emit_lsls_imm(temp2,1,temp2);
1917 switch(type) {
1918 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1919 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1920 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1921 default: assert(0);
1922 }
1923 if(regs_saved) {
df4dc2b1 1924 restore_jump=out;
b96d3df7 1925 emit_jcc(0); // jump to reg restore
1926 }
1927 else
b14b6a8f 1928 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1929
1930 if(!regs_saved)
1931 save_regs(reglist);
643aeae3 1932 void *handler=NULL;
b96d3df7 1933 switch(type) {
643aeae3 1934 case STOREB_STUB: handler=jump_handler_write8; break;
1935 case STOREH_STUB: handler=jump_handler_write16; break;
1936 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1937 default: assert(0);
b96d3df7 1938 }
643aeae3 1939 assert(handler);
b96d3df7 1940 pass_args(rs,rt);
1941 if(temp2!=3)
1942 emit_mov(temp2,3);
1943 int cc=get_reg(i_regmap,CCREG);
1944 if(cc<0)
1945 emit_loadreg(CCREG,2);
2330734f 1946 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
b96d3df7 1947 // returns new cycle_count
2a014d73 1948 emit_far_call(handler);
2330734f 1949 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1950 if(cc<0)
1951 emit_storereg(CCREG,2);
1952 if(restore_jump)
df4dc2b1 1953 set_jump_target(restore_jump, out);
b96d3df7 1954 restore_regs(reglist);
b14b6a8f 1955 emit_jmp(stubs[n].retaddr);
57871462 1956}
1957
81dbbf4c 1958static void inline_writestub(enum stub_type type, int i, u_int addr,
1959 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1960{
1961 int rs=get_reg(regmap,-1);
57871462 1962 int rt=get_reg(regmap,target);
1963 assert(rs>=0);
1964 assert(rt>=0);
687b4580 1965 uintptr_t host_addr = 0;
643aeae3 1966 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1967 if (handler == NULL) {
13e35c04 1968 if(addr!=host_addr)
1969 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1970 switch(type) {
1971 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1972 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1973 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1974 default: assert(0);
1975 }
1976 return;
1977 }
1978
1979 // call a memhandler
1980 save_regs(reglist);
13e35c04 1981 pass_args(rs,rt);
b96d3df7 1982 int cc=get_reg(regmap,CCREG);
1983 if(cc<0)
1984 emit_loadreg(CCREG,2);
2330734f 1985 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1986 emit_movimm((u_int)handler,3);
b96d3df7 1987 // returns new cycle_count
2a014d73 1988 emit_far_call(jump_handler_write_h);
2330734f 1989 emit_addimm(0,-adj,cc<0?2:cc);
b96d3df7 1990 if(cc<0)
1991 emit_storereg(CCREG,2);
1992 restore_regs(reglist);
57871462 1993}
1994
d1e4ebd9 1995// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1996static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 1997{
665f33e1 1998 #ifndef HAVE_ARMV7
7c3a5182 1999 emit_loadlp((int)source, 1);
2000 emit_loadlp((int)copy, 2);
3d680478 2001 emit_loadlp(source_len, 3);
57871462 2002 #else
7c3a5182 2003 emit_movw(((u_int)source)&0x0000FFFF, 1);
2004 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2005 emit_movt(((u_int)source)&0xFFFF0000, 1);
2006 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 2007 emit_movw(source_len, 3);
57871462 2008 #endif
7c3a5182 2009 emit_movimm(arg0, 0);
2010}
2011
3d680478 2012static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 2013{
2014 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 2015 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 2016 emit_far_call(verify_code);
df4dc2b1 2017 void *entry = out;
57871462 2018 load_regs_entry(i);
df4dc2b1 2019 if (entry == out)
2020 entry = instr_addr[i];
57871462 2021 emit_jmp(instr_addr[i]);
2022 return entry;
2023}
2024
3d680478 2025static void do_dirty_stub_ds(u_int source_len)
57871462 2026{
3d680478 2027 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 2028 emit_far_call(verify_code_ds);
57871462 2029}
2030
57871462 2031/* Special assem */
2032
81dbbf4c 2033static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2034{
2035 save_regs_all(reglist);
32631e6a 2036 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2037#ifdef PCNT
81dbbf4c 2038 emit_movimm(op, 0);
2a014d73 2039 emit_far_call(pcnt_gte_start);
82ed88eb 2040#endif
81dbbf4c 2041 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2042}
2043
2044static void c2op_epilogue(u_int op,u_int reglist)
2045{
82ed88eb 2046#ifdef PCNT
2047 emit_movimm(op,0);
2a014d73 2048 emit_far_call(pcnt_gte_end);
82ed88eb 2049#endif
054175e9 2050 restore_regs_all(reglist);
2051}
2052
6c0eefaf 2053static void c2op_call_MACtoIR(int lm,int need_flags)
2054{
2055 if(need_flags)
2a014d73 2056 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2057 else
2a014d73 2058 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2059}
2060
2061static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2062{
2a014d73 2063 emit_far_call(func);
6c0eefaf 2064 // func is C code and trashes r0
2065 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2066 if(need_flags||need_ir)
2067 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2068 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2069}
2070
81dbbf4c 2071static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2072{
81dbbf4c 2073 u_int c2op = source[i] & 0x3f;
2074 u_int reglist_full = get_host_reglist(i_regs->regmap);
2075 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2076 int need_flags, need_ir;
b9b61529 2077
2078 if (gte_handlers[c2op]!=NULL) {
bedfea38 2079 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2080 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2081 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2082 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2083 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2084 need_flags=0;
6c0eefaf 2085 int shift = (source[i] >> 19) & 1;
2086 int lm = (source[i] >> 10) & 1;
054175e9 2087 switch(c2op) {
19776aef 2088#ifndef DRC_DBG
054175e9 2089 case GTE_MVMVA: {
82336ba3 2090#ifdef HAVE_ARMV5
054175e9 2091 int v = (source[i] >> 15) & 3;
2092 int cv = (source[i] >> 13) & 3;
2093 int mx = (source[i] >> 17) & 3;
4d646738 2094 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2095 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2096 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2097 if(v<3)
2098 emit_ldrd(v*8,0,4);
2099 else {
2100 emit_movzwl_indexed(9*4,0,4); // gteIR
2101 emit_movzwl_indexed(10*4,0,6);
2102 emit_movzwl_indexed(11*4,0,5);
2103 emit_orrshl_imm(6,16,4);
2104 }
2105 if(mx<3)
2106 emit_addimm(0,32*4+mx*8*4,6);
2107 else
643aeae3 2108 emit_readword(&zeromem_ptr,6);
054175e9 2109 if(cv<3)
2110 emit_addimm(0,32*4+(cv*8+5)*4,7);
2111 else
643aeae3 2112 emit_readword(&zeromem_ptr,7);
054175e9 2113#ifdef __ARM_NEON__
2114 emit_movimm(source[i],1); // opcode
2a014d73 2115 emit_far_call(gteMVMVA_part_neon);
054175e9 2116 if(need_flags) {
2117 emit_movimm(lm,1);
2a014d73 2118 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2119 }
2120#else
2121 if(cv==3&&shift)
2a014d73 2122 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
054175e9 2123 else {
2124 emit_movimm(shift,1);
2a014d73 2125 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
054175e9 2126 }
6c0eefaf 2127 if(need_flags||need_ir)
2128 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2129#endif
2130#else /* if not HAVE_ARMV5 */
81dbbf4c 2131 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2132 emit_movimm(source[i],1); // opcode
643aeae3 2133 emit_writeword(1,&psxRegs.code);
2a014d73 2134 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2135#endif
2136 break;
2137 }
6c0eefaf 2138 case GTE_OP:
81dbbf4c 2139 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2140 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2141 if(need_flags||need_ir) {
2142 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2143 c2op_call_MACtoIR(lm,need_flags);
2144 }
2145 break;
2146 case GTE_DPCS:
81dbbf4c 2147 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2148 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2149 break;
2150 case GTE_INTPL:
81dbbf4c 2151 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2152 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2153 break;
2154 case GTE_SQR:
81dbbf4c 2155 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2156 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2157 if(need_flags||need_ir) {
2158 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2159 c2op_call_MACtoIR(lm,need_flags);
2160 }
2161 break;
2162 case GTE_DCPL:
81dbbf4c 2163 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2164 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2165 break;
2166 case GTE_GPF:
81dbbf4c 2167 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2168 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2169 break;
2170 case GTE_GPL:
81dbbf4c 2171 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2172 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2173 break;
19776aef 2174#endif
054175e9 2175 default:
81dbbf4c 2176 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2177#ifdef DRC_DBG
2178 emit_movimm(source[i],1); // opcode
643aeae3 2179 emit_writeword(1,&psxRegs.code);
19776aef 2180#endif
2a014d73 2181 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2182 break;
2183 }
2184 c2op_epilogue(c2op,reglist);
2185 }
b9b61529 2186}
2187
3968e69e 2188static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2189{
2190 //value = value & 0x7ffff000;
2191 //if (value & 0x7f87e000) value |= 0x80000000;
2192 emit_shrimm(sl,12,temp);
2193 emit_shlimm(temp,12,temp);
2194 emit_testimm(temp,0x7f000000);
2195 emit_testeqimm(temp,0x00870000);
2196 emit_testeqimm(temp,0x0000e000);
2197 emit_orrne_imm(temp,0x80000000,temp);
2198}
2199
2200static void do_mfc2_31_one(u_int copr,signed char temp)
2201{
2202 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2203 emit_lsls_imm(temp,16,temp);
2204 emit_cmovs_imm(0,temp);
2205 emit_cmpimm(temp,0xf80<<16);
2206 emit_andimm(temp,0xf80<<16,temp);
2207 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2208}
2209
2210static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2211{
2212 if (temp < 0) {
2213 host_tempreg_acquire();
2214 temp = HOST_TEMPREG;
2215 }
2216 do_mfc2_31_one(9,temp);
9c997d19 2217 emit_shrimm(temp,7+16,tl);
3968e69e 2218 do_mfc2_31_one(10,temp);
9c997d19 2219 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2220 do_mfc2_31_one(11,temp);
9c997d19 2221 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2222 emit_writeword(tl,&reg_cop2d[29]);
2223 if (temp == HOST_TEMPREG)
2224 host_tempreg_release();
2225}
2226
2330734f 2227static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2228{
2229 // case 0x18: MULT
2230 // case 0x19: MULTU
2231 // case 0x1A: DIV
2232 // case 0x1B: DIVU
2233 // case 0x1C: DMULT
2234 // case 0x1D: DMULTU
2235 // case 0x1E: DDIV
2236 // case 0x1F: DDIVU
cf95b4f0 2237 if(dops[i].rs1&&dops[i].rs2)
57871462 2238 {
cf95b4f0 2239 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2240 {
cf95b4f0 2241 if(dops[i].opcode2==0x18) // MULT
57871462 2242 {
cf95b4f0 2243 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2244 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2245 signed char hi=get_reg(i_regs->regmap,HIREG);
2246 signed char lo=get_reg(i_regs->regmap,LOREG);
2247 assert(m1>=0);
2248 assert(m2>=0);
2249 assert(hi>=0);
2250 assert(lo>=0);
2251 emit_smull(m1,m2,hi,lo);
2252 }
cf95b4f0 2253 if(dops[i].opcode2==0x19) // MULTU
57871462 2254 {
cf95b4f0 2255 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2256 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2257 signed char hi=get_reg(i_regs->regmap,HIREG);
2258 signed char lo=get_reg(i_regs->regmap,LOREG);
2259 assert(m1>=0);
2260 assert(m2>=0);
2261 assert(hi>=0);
2262 assert(lo>=0);
2263 emit_umull(m1,m2,hi,lo);
2264 }
cf95b4f0 2265 if(dops[i].opcode2==0x1A) // DIV
57871462 2266 {
cf95b4f0 2267 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2268 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2269 assert(d1>=0);
2270 assert(d2>=0);
2271 signed char quotient=get_reg(i_regs->regmap,LOREG);
2272 signed char remainder=get_reg(i_regs->regmap,HIREG);
2273 assert(quotient>=0);
2274 assert(remainder>=0);
2275 emit_movs(d1,remainder);
44a80f6a 2276 emit_movimm(0xffffffff,quotient);
2277 emit_negmi(quotient,quotient); // .. quotient and ..
2278 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2279 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2280 emit_jeq(out+52); // Division by zero
82336ba3 2281 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2282#ifdef HAVE_ARMV5
57871462 2283 emit_clz(HOST_TEMPREG,quotient);
2284 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2285#else
2286 emit_movimm(0,quotient);
2287 emit_addpl_imm(quotient,1,quotient);
2288 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2289 emit_jns(out-2*4);
665f33e1 2290#endif
57871462 2291 emit_orimm(quotient,1<<31,quotient);
2292 emit_shr(quotient,quotient,quotient);
2293 emit_cmp(remainder,HOST_TEMPREG);
2294 emit_subcs(remainder,HOST_TEMPREG,remainder);
2295 emit_adcs(quotient,quotient,quotient);
2296 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2297 emit_jcc(out-16); // -4
57871462 2298 emit_teq(d1,d2);
2299 emit_negmi(quotient,quotient);
2300 emit_test(d1,d1);
2301 emit_negmi(remainder,remainder);
2302 }
cf95b4f0 2303 if(dops[i].opcode2==0x1B) // DIVU
57871462 2304 {
cf95b4f0 2305 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2306 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2307 assert(d1>=0);
2308 assert(d2>=0);
2309 signed char quotient=get_reg(i_regs->regmap,LOREG);
2310 signed char remainder=get_reg(i_regs->regmap,HIREG);
2311 assert(quotient>=0);
2312 assert(remainder>=0);
44a80f6a 2313 emit_mov(d1,remainder);
2314 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2315 emit_test(d2,d2);
7c3a5182 2316 emit_jeq(out+40); // Division by zero
665f33e1 2317#ifdef HAVE_ARMV5
57871462 2318 emit_clz(d2,HOST_TEMPREG);
2319 emit_movimm(1<<31,quotient);
2320 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2321#else
2322 emit_movimm(0,HOST_TEMPREG);
82336ba3 2323 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2324 emit_lslpls_imm(d2,1,d2);
7c3a5182 2325 emit_jns(out-2*4);
665f33e1 2326 emit_movimm(1<<31,quotient);
2327#endif
57871462 2328 emit_shr(quotient,HOST_TEMPREG,quotient);
2329 emit_cmp(remainder,d2);
2330 emit_subcs(remainder,d2,remainder);
2331 emit_adcs(quotient,quotient,quotient);
2332 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2333 emit_jcc(out-16); // -4
57871462 2334 }
2335 }
2336 else // 64-bit
71e490c5 2337 assert(0);
57871462 2338 }
2339 else
2340 {
2341 // Multiply by zero is zero.
2342 // MIPS does not have a divide by zero exception.
2343 // The result is undefined, we return zero.
2344 signed char hr=get_reg(i_regs->regmap,HIREG);
2345 signed char lr=get_reg(i_regs->regmap,LOREG);
2346 if(hr>=0) emit_zeroreg(hr);
2347 if(lr>=0) emit_zeroreg(lr);
2348 }
2349}
2350#define multdiv_assemble multdiv_assemble_arm
2351
d1e4ebd9 2352static void do_jump_vaddr(int rs)
2353{
2a014d73 2354 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2355}
2356
e2b5e7aa 2357static void do_preload_rhash(int r) {
57871462 2358 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2359 // register. On ARM the hash can be done with a single instruction (below)
2360}
2361
e2b5e7aa 2362static void do_preload_rhtbl(int ht) {
57871462 2363 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2364}
2365
e2b5e7aa 2366static void do_rhash(int rs,int rh) {
57871462 2367 emit_andimm(rs,0xf8,rh);
2368}
2369
e2b5e7aa 2370static void do_miniht_load(int ht,int rh) {
57871462 2371 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2372 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2373}
2374
e2b5e7aa 2375static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2376 emit_cmp(rh,rs);
2377 emit_ldreq_indexed(ht,4,15);
2378 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2379 if(rs!=7)
2380 emit_mov(rs,7);
2381 rs=7;
57871462 2382 #endif
d1e4ebd9 2383 do_jump_vaddr(rs);
57871462 2384}
2385
e2b5e7aa 2386static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2387 #ifndef HAVE_ARMV7
57871462 2388 emit_movimm(return_address,rt); // PC into link register
643aeae3 2389 add_to_linker(out,return_address,1);
57871462 2390 emit_pcreladdr(temp);
643aeae3 2391 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2392 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2393 #else
2394 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2395 add_to_linker(out,return_address,1);
57871462 2396 emit_pcreladdr(temp);
643aeae3 2397 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2398 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2399 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2400 #endif
2401}
2402
57871462 2403// CPU-architecture-specific initialization
2a014d73 2404static void arch_init(void)
2405{
2406 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2407 struct tramp_insns *ops = ndrc->tramp.ops;
2408 size_t i;
2409 assert(!(diff & 3));
2410 assert(diff < 0x1000);
2411 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2412 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2413 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2414 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2415}
b9b61529 2416
2417// vim:shiftwidth=2:expandtab