drc: try to make gte stall handling less bloaty
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
4d646738 30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
e2b5e7aa 36#define unused __attribute__((unused))
37
dd114d7d 38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
57871462 44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
b14b6a8f 60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
57871462 72 0,
b14b6a8f 73 jump_vaddr_r12,
57871462 74 0,
75 0,
b14b6a8f 76 0
77};
57871462 78
0bbd1454 79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
d148d265 110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 111
57871462 112/* Linker */
113
df4dc2b1 114static void set_jump_target(void *addr, void *target_)
57871462 115{
df4dc2b1 116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
57871462 118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 129 assert(((uintptr_t)addr&3)==0);
57871462 130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 134 assert(((uintptr_t)addr&3)==0);
57871462 135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
e2b5e7aa 149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
e2b5e7aa 179#endif
57871462 180
181/* Literal pool */
e2b5e7aa 182static void add_literal(int addr,int val)
57871462 183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
9f51b4b9 187 literalcount++;
188}
57871462 189
d148d265 190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
57871462 193{
194 int *ptr=(int *)(stub+4);
d148d265 195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 196 u_int offset=*ptr&0xfff;
d148d265 197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
643aeae3 205static void *get_pointer(void *stub)
57871462 206{
207 //printf("get_pointer(%x)\n",(int)stub);
d148d265 208 int *i_ptr=find_extjump_insn(stub);
57871462 209 assert((*i_ptr&0x0f000000)==0x0a000000);
643aeae3 210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
df4dc2b1 215static void *get_clean_addr(void *addr)
57871462 216{
df4dc2b1 217 signed int *ptr = addr;
665f33e1 218 #ifndef HAVE_ARMV7
57871462 219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 228 }
df4dc2b1 229 return ptr;
57871462 230}
231
3968e69e 232static int verify_dirty(const u_int *ptr)
57871462 233{
665f33e1 234 #ifndef HAVE_ARMV7
16c8be17 235 u_int offset;
57871462 236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
57871462 250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
df4dc2b1 266static int isclean(void *addr)
57871462 267{
665f33e1 268 #ifndef HAVE_ARMV7
581335b0 269 u_int *ptr=((u_int *)addr)+4;
57871462 270 #else
581335b0 271 u_int *ptr=((u_int *)addr)+6;
57871462 272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
4a35de07 280// get source that block at addr was compiled from (host pointers)
01d26796 281static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 282{
643aeae3 283 u_int *ptr = addr;
665f33e1 284 #ifndef HAVE_ARMV7
16c8be17 285 u_int offset;
57871462 286 // get from literal pool
15776b68 287 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
57871462 300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
57871462 312}
313
57871462 314// Allocate a specific ARM register.
e2b5e7aa 315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 316{
317 int n;
f776eb14 318 int dirty=0;
9f51b4b9 319
57871462 320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
f776eb14 323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
57871462 327 }
9f51b4b9 328
57871462 329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
f776eb14 331 cur->dirty|=dirty<<hr;
57871462 332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
e2b5e7aa 336static void alloc_cc(struct regstat *cur,int i)
57871462 337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
57871462 341/* Assembler */
342
e2b5e7aa 343static unused char regname[16][4] = {
57871462 344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
e2b5e7aa 361static void output_w32(u_int word)
57871462 362{
363 *((u_int *)out)=word;
364 out+=4;
365}
e2b5e7aa 366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
e2b5e7aa 374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
e2b5e7aa 383
384static u_int genimm(u_int imm,u_int *encoded)
57871462 385{
c2e3bd42 386 *encoded=0;
387 if(imm==0) return 1;
57871462 388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
e2b5e7aa 399
400static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
581335b0 404 (void)ret;
cfbd3c6e 405}
e2b5e7aa 406
407static u_int genjmp(u_int addr)
57871462 408{
7c3a5182 409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
e80343e2 414 return 0;
415 }
57871462 416 return ((u_int)offset>>2)&0xffffff;
417}
418
d1e4ebd9 419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
e2b5e7aa 426static void emit_mov(int rs,int rt)
57871462 427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_movs(int rs,int rt)
57871462 433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
e2b5e7aa 438static void emit_add(int rs1,int rs2,int rt)
57871462 439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
e2b5e7aa 444static void emit_adcs(int rs1,int rs2,int rt)
57871462 445{
446 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
448}
449
e2b5e7aa 450static void emit_neg(int rs, int rt)
57871462 451{
452 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
453 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
454}
455
e2b5e7aa 456static void emit_sub(int rs1,int rs2,int rt)
57871462 457{
458 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
459 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
460}
461
e2b5e7aa 462static void emit_zeroreg(int rt)
57871462 463{
464 assem_debug("mov %s,#0\n",regname[rt]);
465 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
466}
467
e2b5e7aa 468static void emit_loadlp(u_int imm,u_int rt)
790ee18e 469{
470 add_literal((int)out,imm);
471 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
472 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
473}
e2b5e7aa 474
475static void emit_movw(u_int imm,u_int rt)
790ee18e 476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
e2b5e7aa 481
482static void emit_movt(u_int imm,u_int rt)
790ee18e 483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
e2b5e7aa 487
488static void emit_movimm(u_int imm,u_int rt)
790ee18e 489{
490 u_int armval;
491 if(genimm(imm,&armval)) {
492 assem_debug("mov %s,#%d\n",regname[rt],imm);
493 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
494 }else if(genimm(~imm,&armval)) {
495 assem_debug("mvn %s,#%d\n",regname[rt],imm);
496 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
497 }else if(imm<65536) {
665f33e1 498 #ifndef HAVE_ARMV7
790ee18e 499 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
500 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
501 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
502 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
503 #else
504 emit_movw(imm,rt);
505 #endif
506 }else{
665f33e1 507 #ifndef HAVE_ARMV7
790ee18e 508 emit_loadlp(imm,rt);
509 #else
510 emit_movw(imm&0x0000FFFF,rt);
511 emit_movt(imm&0xFFFF0000,rt);
512 #endif
513 }
514}
e2b5e7aa 515
516static void emit_pcreladdr(u_int rt)
790ee18e 517{
518 assem_debug("add %s,pc,#?\n",regname[rt]);
519 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
520}
521
e2b5e7aa 522static void emit_loadreg(int r, int hr)
57871462 523{
3d624f89 524 if(r&64) {
c43b5311 525 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 526 assert(0);
527 return;
3d624f89 528 }
57871462 529 if((r&63)==0)
530 emit_zeroreg(hr);
531 else {
7c3a5182 532 int addr = (int)&psxRegs.GPR.r[r];
533 switch (r) {
534 //case HIREG: addr = &hi; break;
535 //case LOREG: addr = &lo; break;
536 case CCREG: addr = (int)&cycle_count; break;
537 case CSREG: addr = (int)&Status; break;
538 case INVCP: addr = (int)&invc_ptr; break;
539 default: assert(r < 34); break;
540 }
57871462 541 u_int offset = addr-(u_int)&dynarec_local;
542 assert(offset<4096);
543 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
544 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
545 }
546}
e2b5e7aa 547
548static void emit_storereg(int r, int hr)
57871462 549{
3d624f89 550 if(r&64) {
c43b5311 551 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 552 assert(0);
553 return;
3d624f89 554 }
7c3a5182 555 int addr = (int)&psxRegs.GPR.r[r];
556 switch (r) {
557 //case HIREG: addr = &hi; break;
558 //case LOREG: addr = &lo; break;
559 case CCREG: addr = (int)&cycle_count; break;
560 default: assert(r < 34); break;
561 }
57871462 562 u_int offset = addr-(u_int)&dynarec_local;
563 assert(offset<4096);
564 assem_debug("str %s,fp+%d\n",regname[hr],offset);
565 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
566}
567
e2b5e7aa 568static void emit_test(int rs, int rt)
57871462 569{
570 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
571 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
572}
573
e2b5e7aa 574static void emit_testimm(int rs,int imm)
57871462 575{
576 u_int armval;
5a05d80c 577 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 578 genimm_checked(imm,&armval);
57871462 579 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
580}
581
e2b5e7aa 582static void emit_testeqimm(int rs,int imm)
b9b61529 583{
584 u_int armval;
585 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 586 genimm_checked(imm,&armval);
b9b61529 587 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
588}
589
e2b5e7aa 590static void emit_not(int rs,int rt)
57871462 591{
592 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
593 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
594}
595
e2b5e7aa 596static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 597{
598 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
599 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
600}
601
e2b5e7aa 602static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 603{
604 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
605 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
606}
e2b5e7aa 607
e2b5e7aa 608static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 609{
610 assert(rs<16);
611 assert(rt<16);
612 assert(imm<32);
613 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
614 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
615}
616
e2b5e7aa 617static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 618{
619 assert(rs<16);
620 assert(rt<16);
621 assert(imm<32);
622 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
623 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
624}
625
e2b5e7aa 626static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 627{
628 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
629 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
630}
631
3968e69e 632static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
633{
634 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
635 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
636}
637
e2b5e7aa 638static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 639{
640 assert(rs<16);
641 assert(rt<16);
642 if(imm!=0) {
57871462 643 u_int armval;
644 if(genimm(imm,&armval)) {
645 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
646 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
647 }else if(genimm(-imm,&armval)) {
8a0a8423 648 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 649 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 650 #ifdef HAVE_ARMV7
651 }else if(rt!=rs&&(u_int)imm<65536) {
652 emit_movw(imm&0x0000ffff,rt);
653 emit_add(rs,rt,rt);
654 }else if(rt!=rs&&(u_int)-imm<65536) {
655 emit_movw(-imm&0x0000ffff,rt);
656 emit_sub(rs,rt,rt);
657 #endif
658 }else if((u_int)-imm<65536) {
57871462 659 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
661 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
662 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 663 }else {
664 do {
665 int shift = (ffs(imm) - 1) & ~1;
666 int imm8 = imm & (0xff << shift);
667 genimm_checked(imm8,&armval);
668 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
669 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
670 rs = rt;
671 imm &= ~imm8;
672 }
673 while (imm != 0);
57871462 674 }
675 }
676 else if(rs!=rt) emit_mov(rs,rt);
677}
678
e2b5e7aa 679static void emit_addimm_and_set_flags(int imm,int rt)
57871462 680{
681 assert(imm>-65536&&imm<65536);
682 u_int armval;
683 if(genimm(imm,&armval)) {
684 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
685 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
686 }else if(genimm(-imm,&armval)) {
687 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
688 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
689 }else if(imm<0) {
690 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
691 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
692 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
693 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
694 }else{
695 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
696 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
697 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
698 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
699 }
700}
e2b5e7aa 701
702static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 703{
704 emit_addimm(rt,imm,rt);
705}
706
e2b5e7aa 707static void emit_addnop(u_int r)
57871462 708{
709 assert(r<16);
710 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
711 output_w32(0xe2800000|rd_rn_rm(r,r,0));
712}
713
e2b5e7aa 714static void emit_andimm(int rs,int imm,int rt)
57871462 715{
716 u_int armval;
790ee18e 717 if(imm==0) {
718 emit_zeroreg(rt);
719 }else if(genimm(imm,&armval)) {
57871462 720 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(genimm(~imm,&armval)) {
723 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(imm==65535) {
332a4533 726 #ifndef HAVE_ARMV6
57871462 727 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
728 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
729 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
731 #else
732 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
734 #endif
735 }else{
736 assert(imm>0&&imm<65535);
665f33e1 737 #ifndef HAVE_ARMV7
57871462 738 assem_debug("mov r14,#%d\n",imm&0xFF00);
739 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
740 assem_debug("add r14,r14,#%d\n",imm&0xFF);
741 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
742 #else
743 emit_movw(imm,HOST_TEMPREG);
744 #endif
745 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
746 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
747 }
748}
749
e2b5e7aa 750static void emit_orimm(int rs,int imm,int rt)
57871462 751{
752 u_int armval;
790ee18e 753 if(imm==0) {
754 if(rs!=rt) emit_mov(rs,rt);
755 }else if(genimm(imm,&armval)) {
57871462 756 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
758 }else{
759 assert(imm>0&&imm<65536);
760 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
762 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
764 }
765}
766
e2b5e7aa 767static void emit_xorimm(int rs,int imm,int rt)
57871462 768{
57871462 769 u_int armval;
790ee18e 770 if(imm==0) {
771 if(rs!=rt) emit_mov(rs,rt);
772 }else if(genimm(imm,&armval)) {
57871462 773 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
775 }else{
514ed0d9 776 assert(imm>0&&imm<65536);
57871462 777 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
779 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
781 }
782}
783
e2b5e7aa 784static void emit_shlimm(int rs,u_int imm,int rt)
57871462 785{
786 assert(imm>0);
787 assert(imm<32);
788 //if(imm==1) ...
789 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
e2b5e7aa 793static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
e2b5e7aa 801static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
e2b5e7aa 809static void emit_shrimm(int rs,u_int imm,int rt)
57871462 810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
815}
816
e2b5e7aa 817static void emit_sarimm(int rs,u_int imm,int rt)
57871462 818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
823}
824
e2b5e7aa 825static void emit_rorimm(int rs,u_int imm,int rt)
57871462 826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
831}
832
e2b5e7aa 833static void emit_signextend16(int rs,int rt)
b9b61529 834{
332a4533 835 #ifndef HAVE_ARMV6
b9b61529 836 emit_shlimm(rs,16,rt);
837 emit_sarimm(rt,16,rt);
838 #else
839 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
840 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
841 #endif
842}
843
e2b5e7aa 844static void emit_signextend8(int rs,int rt)
c6c3b1b3 845{
332a4533 846 #ifndef HAVE_ARMV6
c6c3b1b3 847 emit_shlimm(rs,24,rt);
848 emit_sarimm(rt,24,rt);
849 #else
850 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
851 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
852 #endif
853}
854
e2b5e7aa 855static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 856{
857 assert(rs<16);
858 assert(rt<16);
859 assert(shift<16);
860 //if(imm==1) ...
861 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
862 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
863}
e2b5e7aa 864
865static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 866{
867 assert(rs<16);
868 assert(rt<16);
869 assert(shift<16);
870 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
872}
e2b5e7aa 873
874static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 875{
876 assert(rs<16);
877 assert(rt<16);
878 assert(shift<16);
879 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
881}
57871462 882
3968e69e 883static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
889 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
890}
e2b5e7aa 891
3968e69e 892static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 893{
894 assert(rs<16);
895 assert(rt<16);
896 assert(shift<16);
897 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
898 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
899}
900
e2b5e7aa 901static void emit_cmpimm(int rs,int imm)
57871462 902{
903 u_int armval;
904 if(genimm(imm,&armval)) {
5a05d80c 905 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 906 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
907 }else if(genimm(-imm,&armval)) {
5a05d80c 908 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 909 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
910 }else if(imm>0) {
911 assert(imm<65536);
57871462 912 emit_movimm(imm,HOST_TEMPREG);
57871462 913 assem_debug("cmp %s,r14\n",regname[rs]);
914 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
915 }else{
916 assert(imm>-65536);
57871462 917 emit_movimm(-imm,HOST_TEMPREG);
57871462 918 assem_debug("cmn %s,r14\n",regname[rs]);
919 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
920 }
921}
922
e2b5e7aa 923static void emit_cmovne_imm(int imm,int rt)
57871462 924{
925 assem_debug("movne %s,#%d\n",regname[rt],imm);
926 u_int armval;
cfbd3c6e 927 genimm_checked(imm,&armval);
57871462 928 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
929}
e2b5e7aa 930
931static void emit_cmovl_imm(int imm,int rt)
57871462 932{
933 assem_debug("movlt %s,#%d\n",regname[rt],imm);
934 u_int armval;
cfbd3c6e 935 genimm_checked(imm,&armval);
57871462 936 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
937}
e2b5e7aa 938
939static void emit_cmovb_imm(int imm,int rt)
57871462 940{
941 assem_debug("movcc %s,#%d\n",regname[rt],imm);
942 u_int armval;
cfbd3c6e 943 genimm_checked(imm,&armval);
57871462 944 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
945}
e2b5e7aa 946
3968e69e 947static void emit_cmovae_imm(int imm,int rt)
948{
949 assem_debug("movcs %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
e2b5e7aa 955static void emit_cmovne_reg(int rs,int rt)
57871462 956{
957 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
958 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
959}
e2b5e7aa 960
961static void emit_cmovl_reg(int rs,int rt)
57871462 962{
963 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
964 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
965}
e2b5e7aa 966
e3c6bdb5 967static void emit_cmovb_reg(int rs,int rt)
968{
969 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
971}
972
e2b5e7aa 973static void emit_cmovs_reg(int rs,int rt)
57871462 974{
975 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
976 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
977}
978
e2b5e7aa 979static void emit_slti32(int rs,int imm,int rt)
57871462 980{
981 if(rs!=rt) emit_zeroreg(rt);
982 emit_cmpimm(rs,imm);
983 if(rs==rt) emit_movimm(0,rt);
984 emit_cmovl_imm(1,rt);
985}
e2b5e7aa 986
987static void emit_sltiu32(int rs,int imm,int rt)
57871462 988{
989 if(rs!=rt) emit_zeroreg(rt);
990 emit_cmpimm(rs,imm);
991 if(rs==rt) emit_movimm(0,rt);
992 emit_cmovb_imm(1,rt);
993}
e2b5e7aa 994
e2b5e7aa 995static void emit_cmp(int rs,int rt)
57871462 996{
997 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
998 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
999}
e2b5e7aa 1000
1001static void emit_set_gz32(int rs, int rt)
57871462 1002{
1003 //assem_debug("set_gz32\n");
1004 emit_cmpimm(rs,1);
1005 emit_movimm(1,rt);
1006 emit_cmovl_imm(0,rt);
1007}
e2b5e7aa 1008
1009static void emit_set_nz32(int rs, int rt)
57871462 1010{
1011 //assem_debug("set_nz32\n");
1012 if(rs!=rt) emit_movs(rs,rt);
1013 else emit_test(rs,rs);
1014 emit_cmovne_imm(1,rt);
1015}
e2b5e7aa 1016
e2b5e7aa 1017static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1018{
1019 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1020 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1021 emit_cmp(rs1,rs2);
1022 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1023 emit_cmovl_imm(1,rt);
1024}
e2b5e7aa 1025
1026static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1027{
1028 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1029 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1030 emit_cmp(rs1,rs2);
1031 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1032 emit_cmovb_imm(1,rt);
1033}
e2b5e7aa 1034
2a014d73 1035static int can_jump_or_call(const void *a)
1036{
1037 intptr_t offset = (u_char *)a - out - 8;
1038 return (-33554432 <= offset && offset < 33554432);
1039}
1040
643aeae3 1041static void emit_call(const void *a_)
57871462 1042{
643aeae3 1043 int a = (int)a_;
d1e4ebd9 1044 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1045 u_int offset=genjmp(a);
1046 output_w32(0xeb000000|offset);
1047}
e2b5e7aa 1048
b14b6a8f 1049static void emit_jmp(const void *a_)
57871462 1050{
b14b6a8f 1051 int a = (int)a_;
d1e4ebd9 1052 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1053 u_int offset=genjmp(a);
1054 output_w32(0xea000000|offset);
1055}
e2b5e7aa 1056
643aeae3 1057static void emit_jne(const void *a_)
57871462 1058{
643aeae3 1059 int a = (int)a_;
57871462 1060 assem_debug("bne %x\n",a);
1061 u_int offset=genjmp(a);
1062 output_w32(0x1a000000|offset);
1063}
e2b5e7aa 1064
7c3a5182 1065static void emit_jeq(const void *a_)
57871462 1066{
7c3a5182 1067 int a = (int)a_;
57871462 1068 assem_debug("beq %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x0a000000|offset);
1071}
e2b5e7aa 1072
7c3a5182 1073static void emit_js(const void *a_)
57871462 1074{
7c3a5182 1075 int a = (int)a_;
57871462 1076 assem_debug("bmi %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x4a000000|offset);
1079}
e2b5e7aa 1080
7c3a5182 1081static void emit_jns(const void *a_)
57871462 1082{
7c3a5182 1083 int a = (int)a_;
57871462 1084 assem_debug("bpl %x\n",a);
1085 u_int offset=genjmp(a);
1086 output_w32(0x5a000000|offset);
1087}
e2b5e7aa 1088
7c3a5182 1089static void emit_jl(const void *a_)
57871462 1090{
7c3a5182 1091 int a = (int)a_;
57871462 1092 assem_debug("blt %x\n",a);
1093 u_int offset=genjmp(a);
1094 output_w32(0xba000000|offset);
1095}
e2b5e7aa 1096
7c3a5182 1097static void emit_jge(const void *a_)
57871462 1098{
7c3a5182 1099 int a = (int)a_;
57871462 1100 assem_debug("bge %x\n",a);
1101 u_int offset=genjmp(a);
1102 output_w32(0xaa000000|offset);
1103}
e2b5e7aa 1104
7c3a5182 1105static void emit_jno(const void *a_)
57871462 1106{
7c3a5182 1107 int a = (int)a_;
57871462 1108 assem_debug("bvc %x\n",a);
1109 u_int offset=genjmp(a);
1110 output_w32(0x7a000000|offset);
1111}
e2b5e7aa 1112
7c3a5182 1113static void emit_jc(const void *a_)
57871462 1114{
7c3a5182 1115 int a = (int)a_;
57871462 1116 assem_debug("bcs %x\n",a);
1117 u_int offset=genjmp(a);
1118 output_w32(0x2a000000|offset);
1119}
e2b5e7aa 1120
7c3a5182 1121static void emit_jcc(const void *a_)
57871462 1122{
b14b6a8f 1123 int a = (int)a_;
57871462 1124 assem_debug("bcc %x\n",a);
1125 u_int offset=genjmp(a);
1126 output_w32(0x3a000000|offset);
1127}
1128
3968e69e 1129static unused void emit_callreg(u_int r)
57871462 1130{
c6c3b1b3 1131 assert(r<15);
1132 assem_debug("blx %s\n",regname[r]);
1133 output_w32(0xe12fff30|r);
57871462 1134}
e2b5e7aa 1135
1136static void emit_jmpreg(u_int r)
57871462 1137{
1138 assem_debug("mov pc,%s\n",regname[r]);
1139 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1140}
1141
be516ebe 1142static void emit_ret(void)
1143{
1144 emit_jmpreg(14);
1145}
1146
e2b5e7aa 1147static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1148{
1149 assert(offset>-4096&&offset<4096);
1150 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1151 if(offset>=0) {
1152 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1153 }else{
1154 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1155 }
1156}
e2b5e7aa 1157
1158static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1159{
1160 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1161 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1162}
e2b5e7aa 1163
1164static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1165{
1166 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1168}
e2b5e7aa 1169
1170static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1171{
1172 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1174}
e2b5e7aa 1175
1176static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1177{
1178 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1180}
e2b5e7aa 1181
1182static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1183{
1184 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1186}
e2b5e7aa 1187
1188static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1189{
1190 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1191 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1192}
e2b5e7aa 1193
e2b5e7aa 1194static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1195{
1196 assert(offset>-256&&offset<256);
1197 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1198 if(offset>=0) {
1199 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1200 }else{
1201 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1202 }
1203}
e2b5e7aa 1204
e2b5e7aa 1205static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1206{
1207 assert(offset>-256&&offset<256);
1208 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1209 if(offset>=0) {
1210 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1211 }else{
1212 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1213 }
1214}
e2b5e7aa 1215
1216static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1217{
1218 assert(offset>-4096&&offset<4096);
1219 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1220 if(offset>=0) {
1221 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1222 }else{
1223 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1224 }
1225}
e2b5e7aa 1226
e2b5e7aa 1227static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1228{
1229 assert(offset>-256&&offset<256);
1230 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1231 if(offset>=0) {
1232 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1233 }else{
1234 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1235 }
1236}
e2b5e7aa 1237
054175e9 1238static void emit_ldrd(int offset, int rs, int rt)
1239{
1240 assert(offset>-256&&offset<256);
1241 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1242 if(offset>=0) {
1243 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1244 }else{
1245 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1246 }
1247}
e2b5e7aa 1248
643aeae3 1249static void emit_readword(void *addr, int rt)
57871462 1250{
643aeae3 1251 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1252 assert(offset<4096);
1253 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1254 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1255}
e2b5e7aa 1256
e2b5e7aa 1257static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1258{
1259 assert(offset>-4096&&offset<4096);
1260 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1261 if(offset>=0) {
1262 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1263 }else{
1264 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1265 }
1266}
e2b5e7aa 1267
e2b5e7aa 1268static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1269{
1270 assert(offset>-256&&offset<256);
1271 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1272 if(offset>=0) {
1273 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1274 }else{
1275 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1276 }
1277}
e2b5e7aa 1278
1279static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1280{
1281 assert(offset>-4096&&offset<4096);
1282 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1283 if(offset>=0) {
1284 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1285 }else{
1286 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1287 }
1288}
e2b5e7aa 1289
e2b5e7aa 1290static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1291{
1292 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1293 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1294}
e2b5e7aa 1295
1296static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1297{
1298 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1299 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1300}
e2b5e7aa 1301
1302static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1303{
1304 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1305 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1306}
e2b5e7aa 1307
643aeae3 1308static void emit_writeword(int rt, void *addr)
57871462 1309{
643aeae3 1310 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1311 assert(offset<4096);
1312 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1313 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1314}
e2b5e7aa 1315
e2b5e7aa 1316static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1317{
1318 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1319 assert(rs1<16);
1320 assert(rs2<16);
1321 assert(hi<16);
1322 assert(lo<16);
1323 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1324}
e2b5e7aa 1325
1326static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1327{
1328 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1329 assert(rs1<16);
1330 assert(rs2<16);
1331 assert(hi<16);
1332 assert(lo<16);
1333 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1334}
1335
e2b5e7aa 1336static void emit_clz(int rs,int rt)
57871462 1337{
1338 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1339 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1340}
1341
e2b5e7aa 1342static void emit_subcs(int rs1,int rs2,int rt)
57871462 1343{
1344 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1345 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1346}
1347
e2b5e7aa 1348static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1354}
1355
e2b5e7aa 1356static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1362}
1363
e2b5e7aa 1364static void emit_negmi(int rs, int rt)
57871462 1365{
1366 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1367 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1368}
1369
e2b5e7aa 1370static void emit_negsmi(int rs, int rt)
57871462 1371{
1372 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1373 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1374}
1375
e2b5e7aa 1376static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1377{
1378 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1379 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1380}
1381
e2b5e7aa 1382static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1383{
1384 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1385 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1386}
1387
e2b5e7aa 1388static void emit_teq(int rs, int rt)
57871462 1389{
1390 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1391 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1392}
1393
3968e69e 1394static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1395{
1396 u_int armval;
cfbd3c6e 1397 genimm_checked(imm,&armval);
57871462 1398 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1399 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1400}
1401
57871462 1402// Conditionally select one of two immediates, optimizing for small code size
1403// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1404static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1405{
1406 u_int armval;
1407 if(genimm(imm2-imm1,&armval)) {
1408 emit_movimm(imm1,rt);
1409 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1410 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1411 }else if(genimm(imm1-imm2,&armval)) {
1412 emit_movimm(imm1,rt);
1413 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1414 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1415 }
1416 else {
665f33e1 1417 #ifndef HAVE_ARMV7
57871462 1418 emit_movimm(imm1,rt);
1419 add_literal((int)out,imm2);
1420 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1421 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1422 #else
1423 emit_movw(imm1&0x0000FFFF,rt);
1424 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1425 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1426 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1427 }
1428 emit_movt(imm1&0xFFFF0000,rt);
1429 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1430 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1431 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1432 }
1433 #endif
1434 }
1435}
1436
57871462 1437// special case for checking invalid_code
e2b5e7aa 1438static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1439{
1440 assert(imm<128&&imm>=0);
1441 assert(r>=0&&r<16);
1442 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1443 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1444 emit_cmpimm(HOST_TEMPREG,imm);
1445}
1446
e2b5e7aa 1447static void emit_callne(int a)
0bbd1454 1448{
1449 assem_debug("blne %x\n",a);
1450 u_int offset=genjmp(a);
1451 output_w32(0x1b000000|offset);
1452}
1453
57871462 1454// Used to preload hash table entries
e2b5e7aa 1455static unused void emit_prefetchreg(int r)
57871462 1456{
1457 assem_debug("pld %s\n",regname[r]);
1458 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1459}
1460
1461// Special case for mini_ht
e2b5e7aa 1462static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1463{
1464 assert(offset<4096);
1465 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1466 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1467}
1468
e2b5e7aa 1469static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1470{
1471 u_int armval;
cfbd3c6e 1472 genimm_checked(imm,&armval);
b9b61529 1473 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1474 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1475}
1476
e2b5e7aa 1477static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1478{
1479 u_int armval;
cfbd3c6e 1480 genimm_checked(imm,&armval);
b9b61529 1481 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1482 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1483}
1484
e2b5e7aa 1485static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1486{
1487 u_int armval;
1488 genimm_checked(imm,&armval);
1489 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1490 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1491}
1492
e2b5e7aa 1493static void emit_jno_unlikely(int a)
57871462 1494{
1495 //emit_jno(a);
1496 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1497 output_w32(0x72800000|rd_rn_rm(15,15,0));
1498}
1499
054175e9 1500static void save_regs_all(u_int reglist)
57871462 1501{
054175e9 1502 int i;
57871462 1503 if(!reglist) return;
1504 assem_debug("stmia fp,{");
054175e9 1505 for(i=0;i<16;i++)
1506 if(reglist&(1<<i))
1507 assem_debug("r%d,",i);
57871462 1508 assem_debug("}\n");
1509 output_w32(0xe88b0000|reglist);
1510}
e2b5e7aa 1511
054175e9 1512static void restore_regs_all(u_int reglist)
57871462 1513{
054175e9 1514 int i;
57871462 1515 if(!reglist) return;
1516 assem_debug("ldmia fp,{");
054175e9 1517 for(i=0;i<16;i++)
1518 if(reglist&(1<<i))
1519 assem_debug("r%d,",i);
57871462 1520 assem_debug("}\n");
1521 output_w32(0xe89b0000|reglist);
1522}
e2b5e7aa 1523
054175e9 1524// Save registers before function call
1525static void save_regs(u_int reglist)
1526{
4d646738 1527 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1528 save_regs_all(reglist);
1529}
e2b5e7aa 1530
054175e9 1531// Restore registers after function call
1532static void restore_regs(u_int reglist)
1533{
4d646738 1534 reglist&=CALLER_SAVE_REGS;
054175e9 1535 restore_regs_all(reglist);
1536}
57871462 1537
57871462 1538/* Stubs/epilogue */
1539
e2b5e7aa 1540static void literal_pool(int n)
57871462 1541{
1542 if(!literalcount) return;
1543 if(n) {
1544 if((int)out-literals[0][0]<4096-n) return;
1545 }
1546 u_int *ptr;
1547 int i;
1548 for(i=0;i<literalcount;i++)
1549 {
77750690 1550 u_int l_addr=(u_int)out;
1551 int j;
1552 for(j=0;j<i;j++) {
1553 if(literals[j][1]==literals[i][1]) {
1554 //printf("dup %08x\n",literals[i][1]);
1555 l_addr=literals[j][0];
1556 break;
1557 }
1558 }
57871462 1559 ptr=(u_int *)literals[i][0];
77750690 1560 u_int offset=l_addr-(u_int)ptr-8;
57871462 1561 assert(offset<4096);
1562 assert(!(offset&3));
1563 *ptr|=offset;
77750690 1564 if(l_addr==(u_int)out) {
1565 literals[i][0]=l_addr; // remember for dupes
1566 output_w32(literals[i][1]);
1567 }
57871462 1568 }
1569 literalcount=0;
1570}
1571
e2b5e7aa 1572static void literal_pool_jumpover(int n)
57871462 1573{
1574 if(!literalcount) return;
1575 if(n) {
1576 if((int)out-literals[0][0]<4096-n) return;
1577 }
df4dc2b1 1578 void *jaddr = out;
57871462 1579 emit_jmp(0);
1580 literal_pool(0);
df4dc2b1 1581 set_jump_target(jaddr, out);
57871462 1582}
1583
7c3a5182 1584// parsed by get_pointer, find_extjump_insn
1585static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1586{
1587 u_char *ptr=(u_char *)addr;
1588 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1589 (void)ptr;
1590
57871462 1591 emit_loadlp(target,0);
643aeae3 1592 emit_loadlp((u_int)addr,1);
d62c125a 1593 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1594 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1595//DEBUG >
1596#ifdef DEBUG_CYCLE_COUNT
643aeae3 1597 emit_readword(&last_count,ECX);
57871462 1598 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1599 emit_readword(&next_interupt,ECX);
1600 emit_writeword(HOST_CCREG,&Count);
57871462 1601 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1602 emit_writeword(ECX,&last_count);
57871462 1603#endif
1604//DEBUG <
2a014d73 1605 emit_far_jump(linker);
57871462 1606}
1607
d1e4ebd9 1608static void check_extjump2(void *src)
1609{
1610 u_int *ptr = src;
1611 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1612 (void)ptr;
1613}
1614
13e35c04 1615// put rt_val into rt, potentially making use of rs with value rs_val
1616static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1617{
8575a877 1618 u_int armval;
1619 int diff;
1620 if(genimm(rt_val,&armval)) {
1621 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1622 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1623 return;
1624 }
1625 if(genimm(~rt_val,&armval)) {
1626 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1627 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1628 return;
1629 }
1630 diff=rt_val-rs_val;
1631 if(genimm(diff,&armval)) {
1632 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1633 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1634 return;
1635 }else if(genimm(-diff,&armval)) {
1636 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1637 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1638 return;
1639 }
1640 emit_movimm(rt_val,rt);
1641}
1642
1643// return 1 if above function can do it's job cheaply
1644static int is_similar_value(u_int v1,u_int v2)
1645{
13e35c04 1646 u_int xs;
8575a877 1647 int diff;
1648 if(v1==v2) return 1;
1649 diff=v2-v1;
1650 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1651 ;
8575a877 1652 if(xs<0x100) return 1;
1653 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1654 ;
1655 if(xs<0x100) return 1;
1656 return 0;
13e35c04 1657}
cbbab9cd 1658
b14b6a8f 1659static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1660{
1661 switch(type) {
1662 case LOADB_STUB: emit_signextend8(rs,rt); break;
1663 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1664 case LOADH_STUB: emit_signextend16(rs,rt); break;
1665 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1666 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1667 default: assert(0);
1668 }
1669}
1670
b1be1eee 1671#include "pcsxmem.h"
1672#include "pcsxmem_inline.c"
b1be1eee 1673
e2b5e7aa 1674static void do_readstub(int n)
57871462 1675{
b14b6a8f 1676 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1677 literal_pool(256);
b14b6a8f 1678 set_jump_target(stubs[n].addr, out);
1679 enum stub_type type=stubs[n].type;
1680 int i=stubs[n].a;
1681 int rs=stubs[n].b;
81dbbf4c 1682 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1683 u_int reglist=stubs[n].e;
81dbbf4c 1684 const signed char *i_regmap=i_regs->regmap;
581335b0 1685 int rt;
b9b61529 1686 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 1687 rt=get_reg(i_regmap,FTEMP);
1688 }else{
57871462 1689 rt=get_reg(i_regmap,rt1[i]);
1690 }
1691 assert(rs>=0);
df4dc2b1 1692 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1693 void *restore_jump = NULL;
c6c3b1b3 1694 reglist|=(1<<rs);
1695 for(r=0;r<=12;r++) {
1696 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1697 temp=r; break;
1698 }
1699 }
db829eeb 1700 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 1701 reglist&=~(1<<rt);
1702 if(temp==-1) {
1703 save_regs(reglist);
1704 regs_saved=1;
1705 temp=(rs==0)?2:0;
1706 }
1707 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1708 temp2=1;
643aeae3 1709 emit_readword(&mem_rtab,temp);
c6c3b1b3 1710 emit_shrimm(rs,12,temp2);
1711 emit_readword_dualindexedx4(temp,temp2,temp2);
1712 emit_lsls_imm(temp2,1,temp2);
1713 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1714 switch(type) {
1715 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1716 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1717 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1718 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1719 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1720 default: assert(0);
c6c3b1b3 1721 }
1722 }
1723 if(regs_saved) {
df4dc2b1 1724 restore_jump=out;
c6c3b1b3 1725 emit_jcc(0); // jump to reg restore
1726 }
1727 else
b14b6a8f 1728 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1729
1730 if(!regs_saved)
1731 save_regs(reglist);
643aeae3 1732 void *handler=NULL;
c6c3b1b3 1733 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1734 handler=jump_handler_read8;
c6c3b1b3 1735 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1736 handler=jump_handler_read16;
c6c3b1b3 1737 if(type==LOADW_STUB)
643aeae3 1738 handler=jump_handler_read32;
1739 assert(handler);
b96d3df7 1740 pass_args(rs,temp2);
c6c3b1b3 1741 int cc=get_reg(i_regmap,CCREG);
1742 if(cc<0)
1743 emit_loadreg(CCREG,2);
bb4f300c 1744 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1745 emit_far_call(handler);
c6c3b1b3 1746 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 1747 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1748 }
1749 if(restore_jump)
df4dc2b1 1750 set_jump_target(restore_jump, out);
c6c3b1b3 1751 restore_regs(reglist);
b14b6a8f 1752 emit_jmp(stubs[n].retaddr); // return address
57871462 1753}
1754
81dbbf4c 1755static void inline_readstub(enum stub_type type, int i, u_int addr,
1756 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1757{
1758 int rs=get_reg(regmap,target);
57871462 1759 int rt=get_reg(regmap,target);
535d208a 1760 if(rs<0) rs=get_reg(regmap,-1);
57871462 1761 assert(rs>=0);
2a014d73 1762 u_int is_dynamic;
687b4580 1763 uintptr_t host_addr = 0;
643aeae3 1764 void *handler;
b1be1eee 1765 int cc=get_reg(regmap,CCREG);
bb4f300c 1766 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
b1be1eee 1767 return;
643aeae3 1768 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1769 if (handler == NULL) {
db829eeb 1770 if(rt<0||rt1[i]==0)
c6c3b1b3 1771 return;
13e35c04 1772 if(addr!=host_addr)
1773 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1774 switch(type) {
1775 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1776 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1777 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1778 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1779 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1780 default: assert(0);
1781 }
1782 return;
1783 }
b1be1eee 1784 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1785 if(is_dynamic) {
1786 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1787 handler=jump_handler_read8;
b1be1eee 1788 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1789 handler=jump_handler_read16;
b1be1eee 1790 if(type==LOADW_STUB)
643aeae3 1791 handler=jump_handler_read32;
b1be1eee 1792 }
c6c3b1b3 1793
1794 // call a memhandler
db829eeb 1795 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 1796 reglist&=~(1<<rt);
1797 save_regs(reglist);
1798 if(target==0)
1799 emit_movimm(addr,0);
1800 else if(rs!=0)
1801 emit_mov(rs,0);
b1be1eee 1802 if(cc<0)
1803 emit_loadreg(CCREG,2);
1804 if(is_dynamic) {
1805 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
bb4f300c 1806 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
c6c3b1b3 1807 }
b1be1eee 1808 else {
643aeae3 1809 emit_readword(&last_count,3);
bb4f300c 1810 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
b1be1eee 1811 emit_add(2,3,2);
643aeae3 1812 emit_writeword(2,&Count);
b1be1eee 1813 }
1814
2a014d73 1815 emit_far_call(handler);
b1be1eee 1816
db829eeb 1817 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 1818 switch(type) {
1819 case LOADB_STUB: emit_signextend8(0,rt); break;
1820 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1821 case LOADH_STUB: emit_signextend16(0,rt); break;
1822 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1823 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1824 default: assert(0);
1825 }
1826 }
1827 restore_regs(reglist);
57871462 1828}
1829
e2b5e7aa 1830static void do_writestub(int n)
57871462 1831{
b14b6a8f 1832 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1833 literal_pool(256);
b14b6a8f 1834 set_jump_target(stubs[n].addr, out);
1835 enum stub_type type=stubs[n].type;
1836 int i=stubs[n].a;
1837 int rs=stubs[n].b;
81dbbf4c 1838 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1839 u_int reglist=stubs[n].e;
81dbbf4c 1840 const signed char *i_regmap=i_regs->regmap;
581335b0 1841 int rt,r;
b9b61529 1842 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 1843 rt=get_reg(i_regmap,r=FTEMP);
1844 }else{
57871462 1845 rt=get_reg(i_regmap,r=rs2[i]);
1846 }
1847 assert(rs>=0);
1848 assert(rt>=0);
b14b6a8f 1849 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1850 void *restore_jump = NULL;
b96d3df7 1851 int reglist2=reglist|(1<<rs)|(1<<rt);
1852 for(rtmp=0;rtmp<=12;rtmp++) {
1853 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1854 temp=rtmp; break;
1855 }
1856 }
1857 if(temp==-1) {
1858 save_regs(reglist);
1859 regs_saved=1;
1860 for(rtmp=0;rtmp<=3;rtmp++)
1861 if(rtmp!=rs&&rtmp!=rt)
1862 {temp=rtmp;break;}
1863 }
1864 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1865 temp2=3;
643aeae3 1866 emit_readword(&mem_wtab,temp);
b96d3df7 1867 emit_shrimm(rs,12,temp2);
1868 emit_readword_dualindexedx4(temp,temp2,temp2);
1869 emit_lsls_imm(temp2,1,temp2);
1870 switch(type) {
1871 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1872 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1873 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1874 default: assert(0);
1875 }
1876 if(regs_saved) {
df4dc2b1 1877 restore_jump=out;
b96d3df7 1878 emit_jcc(0); // jump to reg restore
1879 }
1880 else
b14b6a8f 1881 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1882
1883 if(!regs_saved)
1884 save_regs(reglist);
643aeae3 1885 void *handler=NULL;
b96d3df7 1886 switch(type) {
643aeae3 1887 case STOREB_STUB: handler=jump_handler_write8; break;
1888 case STOREH_STUB: handler=jump_handler_write16; break;
1889 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1890 default: assert(0);
b96d3df7 1891 }
643aeae3 1892 assert(handler);
b96d3df7 1893 pass_args(rs,rt);
1894 if(temp2!=3)
1895 emit_mov(temp2,3);
1896 int cc=get_reg(i_regmap,CCREG);
1897 if(cc<0)
1898 emit_loadreg(CCREG,2);
bb4f300c 1899 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
b96d3df7 1900 // returns new cycle_count
2a014d73 1901 emit_far_call(handler);
bb4f300c 1902 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
b96d3df7 1903 if(cc<0)
1904 emit_storereg(CCREG,2);
1905 if(restore_jump)
df4dc2b1 1906 set_jump_target(restore_jump, out);
b96d3df7 1907 restore_regs(reglist);
b14b6a8f 1908 emit_jmp(stubs[n].retaddr);
57871462 1909}
1910
81dbbf4c 1911static void inline_writestub(enum stub_type type, int i, u_int addr,
1912 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1913{
1914 int rs=get_reg(regmap,-1);
57871462 1915 int rt=get_reg(regmap,target);
1916 assert(rs>=0);
1917 assert(rt>=0);
687b4580 1918 uintptr_t host_addr = 0;
643aeae3 1919 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1920 if (handler == NULL) {
13e35c04 1921 if(addr!=host_addr)
1922 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1923 switch(type) {
1924 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1925 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1926 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1927 default: assert(0);
1928 }
1929 return;
1930 }
1931
1932 // call a memhandler
1933 save_regs(reglist);
13e35c04 1934 pass_args(rs,rt);
b96d3df7 1935 int cc=get_reg(regmap,CCREG);
1936 if(cc<0)
1937 emit_loadreg(CCREG,2);
bb4f300c 1938 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
643aeae3 1939 emit_movimm((u_int)handler,3);
b96d3df7 1940 // returns new cycle_count
2a014d73 1941 emit_far_call(jump_handler_write_h);
bb4f300c 1942 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
b96d3df7 1943 if(cc<0)
1944 emit_storereg(CCREG,2);
1945 restore_regs(reglist);
57871462 1946}
1947
d1e4ebd9 1948// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
7c3a5182 1949static void do_dirty_stub_emit_args(u_int arg0)
57871462 1950{
665f33e1 1951 #ifndef HAVE_ARMV7
7c3a5182 1952 emit_loadlp((int)source, 1);
1953 emit_loadlp((int)copy, 2);
1954 emit_loadlp(slen*4, 3);
57871462 1955 #else
7c3a5182 1956 emit_movw(((u_int)source)&0x0000FFFF, 1);
1957 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1958 emit_movt(((u_int)source)&0xFFFF0000, 1);
1959 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1960 emit_movw(slen*4, 3);
57871462 1961 #endif
7c3a5182 1962 emit_movimm(arg0, 0);
1963}
1964
1965static void *do_dirty_stub(int i)
1966{
1967 assem_debug("do_dirty_stub %x\n",start+i*4);
1968 do_dirty_stub_emit_args(start + i*4);
2a014d73 1969 emit_far_call(verify_code);
df4dc2b1 1970 void *entry = out;
57871462 1971 load_regs_entry(i);
df4dc2b1 1972 if (entry == out)
1973 entry = instr_addr[i];
57871462 1974 emit_jmp(instr_addr[i]);
1975 return entry;
1976}
1977
e2b5e7aa 1978static void do_dirty_stub_ds()
57871462 1979{
7c3a5182 1980 do_dirty_stub_emit_args(start + 1);
2a014d73 1981 emit_far_call(verify_code_ds);
57871462 1982}
1983
57871462 1984/* Special assem */
1985
81dbbf4c 1986static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 1987{
1988 save_regs_all(reglist);
81dbbf4c 1989 cop2_call_stall_check(op, i, i_regs, 0);
82ed88eb 1990#ifdef PCNT
81dbbf4c 1991 emit_movimm(op, 0);
2a014d73 1992 emit_far_call(pcnt_gte_start);
82ed88eb 1993#endif
81dbbf4c 1994 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 1995}
1996
1997static void c2op_epilogue(u_int op,u_int reglist)
1998{
82ed88eb 1999#ifdef PCNT
2000 emit_movimm(op,0);
2a014d73 2001 emit_far_call(pcnt_gte_end);
82ed88eb 2002#endif
054175e9 2003 restore_regs_all(reglist);
2004}
2005
6c0eefaf 2006static void c2op_call_MACtoIR(int lm,int need_flags)
2007{
2008 if(need_flags)
2a014d73 2009 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2010 else
2a014d73 2011 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2012}
2013
2014static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2015{
2a014d73 2016 emit_far_call(func);
6c0eefaf 2017 // func is C code and trashes r0
2018 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2019 if(need_flags||need_ir)
2020 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2021 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2022}
2023
81dbbf4c 2024static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2025{
81dbbf4c 2026 u_int c2op = source[i] & 0x3f;
2027 u_int reglist_full = get_host_reglist(i_regs->regmap);
2028 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2029 int need_flags, need_ir;
b9b61529 2030
2031 if (gte_handlers[c2op]!=NULL) {
bedfea38 2032 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2033 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2034 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2035 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2036 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2037 need_flags=0;
6c0eefaf 2038 int shift = (source[i] >> 19) & 1;
2039 int lm = (source[i] >> 10) & 1;
054175e9 2040 switch(c2op) {
19776aef 2041#ifndef DRC_DBG
054175e9 2042 case GTE_MVMVA: {
82336ba3 2043#ifdef HAVE_ARMV5
054175e9 2044 int v = (source[i] >> 15) & 3;
2045 int cv = (source[i] >> 13) & 3;
2046 int mx = (source[i] >> 17) & 3;
4d646738 2047 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2048 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2049 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2050 if(v<3)
2051 emit_ldrd(v*8,0,4);
2052 else {
2053 emit_movzwl_indexed(9*4,0,4); // gteIR
2054 emit_movzwl_indexed(10*4,0,6);
2055 emit_movzwl_indexed(11*4,0,5);
2056 emit_orrshl_imm(6,16,4);
2057 }
2058 if(mx<3)
2059 emit_addimm(0,32*4+mx*8*4,6);
2060 else
643aeae3 2061 emit_readword(&zeromem_ptr,6);
054175e9 2062 if(cv<3)
2063 emit_addimm(0,32*4+(cv*8+5)*4,7);
2064 else
643aeae3 2065 emit_readword(&zeromem_ptr,7);
054175e9 2066#ifdef __ARM_NEON__
2067 emit_movimm(source[i],1); // opcode
2a014d73 2068 emit_far_call(gteMVMVA_part_neon);
054175e9 2069 if(need_flags) {
2070 emit_movimm(lm,1);
2a014d73 2071 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2072 }
2073#else
2074 if(cv==3&&shift)
2a014d73 2075 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
054175e9 2076 else {
2077 emit_movimm(shift,1);
2a014d73 2078 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
054175e9 2079 }
6c0eefaf 2080 if(need_flags||need_ir)
2081 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2082#endif
2083#else /* if not HAVE_ARMV5 */
81dbbf4c 2084 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2085 emit_movimm(source[i],1); // opcode
643aeae3 2086 emit_writeword(1,&psxRegs.code);
2a014d73 2087 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2088#endif
2089 break;
2090 }
6c0eefaf 2091 case GTE_OP:
81dbbf4c 2092 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2093 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2094 if(need_flags||need_ir) {
2095 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2096 c2op_call_MACtoIR(lm,need_flags);
2097 }
2098 break;
2099 case GTE_DPCS:
81dbbf4c 2100 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2101 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2102 break;
2103 case GTE_INTPL:
81dbbf4c 2104 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2105 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2106 break;
2107 case GTE_SQR:
81dbbf4c 2108 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2109 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2110 if(need_flags||need_ir) {
2111 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2112 c2op_call_MACtoIR(lm,need_flags);
2113 }
2114 break;
2115 case GTE_DCPL:
81dbbf4c 2116 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2117 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2118 break;
2119 case GTE_GPF:
81dbbf4c 2120 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2121 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2122 break;
2123 case GTE_GPL:
81dbbf4c 2124 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2125 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2126 break;
19776aef 2127#endif
054175e9 2128 default:
81dbbf4c 2129 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2130#ifdef DRC_DBG
2131 emit_movimm(source[i],1); // opcode
643aeae3 2132 emit_writeword(1,&psxRegs.code);
19776aef 2133#endif
2a014d73 2134 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2135 break;
2136 }
2137 c2op_epilogue(c2op,reglist);
2138 }
b9b61529 2139}
2140
3968e69e 2141static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2142{
2143 //value = value & 0x7ffff000;
2144 //if (value & 0x7f87e000) value |= 0x80000000;
2145 emit_shrimm(sl,12,temp);
2146 emit_shlimm(temp,12,temp);
2147 emit_testimm(temp,0x7f000000);
2148 emit_testeqimm(temp,0x00870000);
2149 emit_testeqimm(temp,0x0000e000);
2150 emit_orrne_imm(temp,0x80000000,temp);
2151}
2152
2153static void do_mfc2_31_one(u_int copr,signed char temp)
2154{
2155 emit_readword(&reg_cop2d[copr],temp);
2156 emit_testimm(temp,0x8000); // do we need this?
2157 emit_andne_imm(temp,0,temp);
2158 emit_cmpimm(temp,0xf80);
2159 emit_andimm(temp,0xf80,temp);
2160 emit_cmovae_imm(0xf80,temp);
2161}
2162
2163static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2164{
2165 if (temp < 0) {
2166 host_tempreg_acquire();
2167 temp = HOST_TEMPREG;
2168 }
2169 do_mfc2_31_one(9,temp);
2170 emit_shrimm(temp,7,tl);
2171 do_mfc2_31_one(10,temp);
2172 emit_orrshr_imm(temp,2,tl);
2173 do_mfc2_31_one(11,temp);
2174 emit_orrshl_imm(temp,3,tl);
2175 emit_writeword(tl,&reg_cop2d[29]);
2176 if (temp == HOST_TEMPREG)
2177 host_tempreg_release();
2178}
2179
e2b5e7aa 2180static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 2181{
2182 // case 0x18: MULT
2183 // case 0x19: MULTU
2184 // case 0x1A: DIV
2185 // case 0x1B: DIVU
2186 // case 0x1C: DMULT
2187 // case 0x1D: DMULTU
2188 // case 0x1E: DDIV
2189 // case 0x1F: DDIVU
2190 if(rs1[i]&&rs2[i])
2191 {
2192 if((opcode2[i]&4)==0) // 32-bit
2193 {
2194 if(opcode2[i]==0x18) // MULT
2195 {
2196 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2197 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2198 signed char hi=get_reg(i_regs->regmap,HIREG);
2199 signed char lo=get_reg(i_regs->regmap,LOREG);
2200 assert(m1>=0);
2201 assert(m2>=0);
2202 assert(hi>=0);
2203 assert(lo>=0);
2204 emit_smull(m1,m2,hi,lo);
2205 }
2206 if(opcode2[i]==0x19) // MULTU
2207 {
2208 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2209 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2210 signed char hi=get_reg(i_regs->regmap,HIREG);
2211 signed char lo=get_reg(i_regs->regmap,LOREG);
2212 assert(m1>=0);
2213 assert(m2>=0);
2214 assert(hi>=0);
2215 assert(lo>=0);
2216 emit_umull(m1,m2,hi,lo);
2217 }
2218 if(opcode2[i]==0x1A) // DIV
2219 {
2220 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2221 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2222 assert(d1>=0);
2223 assert(d2>=0);
2224 signed char quotient=get_reg(i_regs->regmap,LOREG);
2225 signed char remainder=get_reg(i_regs->regmap,HIREG);
2226 assert(quotient>=0);
2227 assert(remainder>=0);
2228 emit_movs(d1,remainder);
44a80f6a 2229 emit_movimm(0xffffffff,quotient);
2230 emit_negmi(quotient,quotient); // .. quotient and ..
2231 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2232 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2233 emit_jeq(out+52); // Division by zero
82336ba3 2234 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2235#ifdef HAVE_ARMV5
57871462 2236 emit_clz(HOST_TEMPREG,quotient);
2237 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2238#else
2239 emit_movimm(0,quotient);
2240 emit_addpl_imm(quotient,1,quotient);
2241 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2242 emit_jns(out-2*4);
665f33e1 2243#endif
57871462 2244 emit_orimm(quotient,1<<31,quotient);
2245 emit_shr(quotient,quotient,quotient);
2246 emit_cmp(remainder,HOST_TEMPREG);
2247 emit_subcs(remainder,HOST_TEMPREG,remainder);
2248 emit_adcs(quotient,quotient,quotient);
2249 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2250 emit_jcc(out-16); // -4
57871462 2251 emit_teq(d1,d2);
2252 emit_negmi(quotient,quotient);
2253 emit_test(d1,d1);
2254 emit_negmi(remainder,remainder);
2255 }
2256 if(opcode2[i]==0x1B) // DIVU
2257 {
2258 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2259 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2260 assert(d1>=0);
2261 assert(d2>=0);
2262 signed char quotient=get_reg(i_regs->regmap,LOREG);
2263 signed char remainder=get_reg(i_regs->regmap,HIREG);
2264 assert(quotient>=0);
2265 assert(remainder>=0);
44a80f6a 2266 emit_mov(d1,remainder);
2267 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2268 emit_test(d2,d2);
7c3a5182 2269 emit_jeq(out+40); // Division by zero
665f33e1 2270#ifdef HAVE_ARMV5
57871462 2271 emit_clz(d2,HOST_TEMPREG);
2272 emit_movimm(1<<31,quotient);
2273 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2274#else
2275 emit_movimm(0,HOST_TEMPREG);
82336ba3 2276 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2277 emit_lslpls_imm(d2,1,d2);
7c3a5182 2278 emit_jns(out-2*4);
665f33e1 2279 emit_movimm(1<<31,quotient);
2280#endif
57871462 2281 emit_shr(quotient,HOST_TEMPREG,quotient);
2282 emit_cmp(remainder,d2);
2283 emit_subcs(remainder,d2,remainder);
2284 emit_adcs(quotient,quotient,quotient);
2285 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2286 emit_jcc(out-16); // -4
57871462 2287 }
2288 }
2289 else // 64-bit
71e490c5 2290 assert(0);
57871462 2291 }
2292 else
2293 {
2294 // Multiply by zero is zero.
2295 // MIPS does not have a divide by zero exception.
2296 // The result is undefined, we return zero.
2297 signed char hr=get_reg(i_regs->regmap,HIREG);
2298 signed char lr=get_reg(i_regs->regmap,LOREG);
2299 if(hr>=0) emit_zeroreg(hr);
2300 if(lr>=0) emit_zeroreg(lr);
2301 }
2302}
2303#define multdiv_assemble multdiv_assemble_arm
2304
d1e4ebd9 2305static void do_jump_vaddr(int rs)
2306{
2a014d73 2307 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2308}
2309
e2b5e7aa 2310static void do_preload_rhash(int r) {
57871462 2311 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2312 // register. On ARM the hash can be done with a single instruction (below)
2313}
2314
e2b5e7aa 2315static void do_preload_rhtbl(int ht) {
57871462 2316 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2317}
2318
e2b5e7aa 2319static void do_rhash(int rs,int rh) {
57871462 2320 emit_andimm(rs,0xf8,rh);
2321}
2322
e2b5e7aa 2323static void do_miniht_load(int ht,int rh) {
57871462 2324 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2325 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2326}
2327
e2b5e7aa 2328static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2329 emit_cmp(rh,rs);
2330 emit_ldreq_indexed(ht,4,15);
2331 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2332 if(rs!=7)
2333 emit_mov(rs,7);
2334 rs=7;
57871462 2335 #endif
d1e4ebd9 2336 do_jump_vaddr(rs);
57871462 2337}
2338
e2b5e7aa 2339static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2340 #ifndef HAVE_ARMV7
57871462 2341 emit_movimm(return_address,rt); // PC into link register
643aeae3 2342 add_to_linker(out,return_address,1);
57871462 2343 emit_pcreladdr(temp);
643aeae3 2344 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2345 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2346 #else
2347 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2348 add_to_linker(out,return_address,1);
57871462 2349 emit_pcreladdr(temp);
643aeae3 2350 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2351 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2352 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2353 #endif
2354}
2355
57871462 2356// CPU-architecture-specific initialization
2a014d73 2357static void arch_init(void)
2358{
2359 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2360 struct tramp_insns *ops = ndrc->tramp.ops;
2361 size_t i;
2362 assert(!(diff & 3));
2363 assert(diff < 0x1000);
2364 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2365 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2366 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2367 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2368}
b9b61529 2369
2370// vim:shiftwidth=2:expandtab