drc/gte: add some stall handling
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
4d646738 30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
e2b5e7aa 36#define unused __attribute__((unused))
37
dd114d7d 38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
57871462 44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
b14b6a8f 60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
57871462 72 0,
b14b6a8f 73 jump_vaddr_r12,
57871462 74 0,
75 0,
b14b6a8f 76 0
77};
57871462 78
0bbd1454 79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
d148d265 110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 111
57871462 112/* Linker */
113
df4dc2b1 114static void set_jump_target(void *addr, void *target_)
57871462 115{
df4dc2b1 116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
57871462 118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 129 assert(((uintptr_t)addr&3)==0);
57871462 130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 134 assert(((uintptr_t)addr&3)==0);
57871462 135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
e2b5e7aa 149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
e2b5e7aa 179#endif
57871462 180
181/* Literal pool */
e2b5e7aa 182static void add_literal(int addr,int val)
57871462 183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
9f51b4b9 187 literalcount++;
188}
57871462 189
d148d265 190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
57871462 193{
194 int *ptr=(int *)(stub+4);
d148d265 195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 196 u_int offset=*ptr&0xfff;
d148d265 197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
643aeae3 205static void *get_pointer(void *stub)
57871462 206{
207 //printf("get_pointer(%x)\n",(int)stub);
d148d265 208 int *i_ptr=find_extjump_insn(stub);
57871462 209 assert((*i_ptr&0x0f000000)==0x0a000000);
643aeae3 210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
df4dc2b1 215static void *get_clean_addr(void *addr)
57871462 216{
df4dc2b1 217 signed int *ptr = addr;
665f33e1 218 #ifndef HAVE_ARMV7
57871462 219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 228 }
df4dc2b1 229 return ptr;
57871462 230}
231
3968e69e 232static int verify_dirty(const u_int *ptr)
57871462 233{
665f33e1 234 #ifndef HAVE_ARMV7
16c8be17 235 u_int offset;
57871462 236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
57871462 250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
df4dc2b1 266static int isclean(void *addr)
57871462 267{
665f33e1 268 #ifndef HAVE_ARMV7
581335b0 269 u_int *ptr=((u_int *)addr)+4;
57871462 270 #else
581335b0 271 u_int *ptr=((u_int *)addr)+6;
57871462 272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
4a35de07 280// get source that block at addr was compiled from (host pointers)
01d26796 281static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 282{
643aeae3 283 u_int *ptr = addr;
665f33e1 284 #ifndef HAVE_ARMV7
16c8be17 285 u_int offset;
57871462 286 // get from literal pool
15776b68 287 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
57871462 300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
57871462 312}
313
57871462 314// Allocate a specific ARM register.
e2b5e7aa 315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 316{
317 int n;
f776eb14 318 int dirty=0;
9f51b4b9 319
57871462 320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
f776eb14 323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
57871462 327 }
9f51b4b9 328
57871462 329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
f776eb14 331 cur->dirty|=dirty<<hr;
57871462 332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
e2b5e7aa 336static void alloc_cc(struct regstat *cur,int i)
57871462 337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
57871462 341/* Assembler */
342
e2b5e7aa 343static unused char regname[16][4] = {
57871462 344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
e2b5e7aa 361static void output_w32(u_int word)
57871462 362{
363 *((u_int *)out)=word;
364 out+=4;
365}
e2b5e7aa 366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
e2b5e7aa 374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
e2b5e7aa 383
384static u_int genimm(u_int imm,u_int *encoded)
57871462 385{
c2e3bd42 386 *encoded=0;
387 if(imm==0) return 1;
57871462 388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
e2b5e7aa 399
400static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
581335b0 404 (void)ret;
cfbd3c6e 405}
e2b5e7aa 406
407static u_int genjmp(u_int addr)
57871462 408{
7c3a5182 409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
e80343e2 414 return 0;
415 }
57871462 416 return ((u_int)offset>>2)&0xffffff;
417}
418
d1e4ebd9 419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
e2b5e7aa 426static void emit_mov(int rs,int rt)
57871462 427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_movs(int rs,int rt)
57871462 433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
e2b5e7aa 438static void emit_add(int rs1,int rs2,int rt)
57871462 439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
e2b5e7aa 444static void emit_adcs(int rs1,int rs2,int rt)
57871462 445{
446 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
448}
449
e2b5e7aa 450static void emit_neg(int rs, int rt)
57871462 451{
452 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
453 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
454}
455
e2b5e7aa 456static void emit_sub(int rs1,int rs2,int rt)
57871462 457{
458 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
459 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
460}
461
e2b5e7aa 462static void emit_zeroreg(int rt)
57871462 463{
464 assem_debug("mov %s,#0\n",regname[rt]);
465 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
466}
467
e2b5e7aa 468static void emit_loadlp(u_int imm,u_int rt)
790ee18e 469{
470 add_literal((int)out,imm);
471 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
472 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
473}
e2b5e7aa 474
475static void emit_movw(u_int imm,u_int rt)
790ee18e 476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
e2b5e7aa 481
482static void emit_movt(u_int imm,u_int rt)
790ee18e 483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
e2b5e7aa 487
488static void emit_movimm(u_int imm,u_int rt)
790ee18e 489{
490 u_int armval;
491 if(genimm(imm,&armval)) {
492 assem_debug("mov %s,#%d\n",regname[rt],imm);
493 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
494 }else if(genimm(~imm,&armval)) {
495 assem_debug("mvn %s,#%d\n",regname[rt],imm);
496 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
497 }else if(imm<65536) {
665f33e1 498 #ifndef HAVE_ARMV7
790ee18e 499 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
500 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
501 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
502 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
503 #else
504 emit_movw(imm,rt);
505 #endif
506 }else{
665f33e1 507 #ifndef HAVE_ARMV7
790ee18e 508 emit_loadlp(imm,rt);
509 #else
510 emit_movw(imm&0x0000FFFF,rt);
511 emit_movt(imm&0xFFFF0000,rt);
512 #endif
513 }
514}
e2b5e7aa 515
516static void emit_pcreladdr(u_int rt)
790ee18e 517{
518 assem_debug("add %s,pc,#?\n",regname[rt]);
519 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
520}
521
e2b5e7aa 522static void emit_loadreg(int r, int hr)
57871462 523{
3d624f89 524 if(r&64) {
c43b5311 525 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 526 assert(0);
527 return;
3d624f89 528 }
57871462 529 if((r&63)==0)
530 emit_zeroreg(hr);
531 else {
7c3a5182 532 int addr = (int)&psxRegs.GPR.r[r];
533 switch (r) {
534 //case HIREG: addr = &hi; break;
535 //case LOREG: addr = &lo; break;
536 case CCREG: addr = (int)&cycle_count; break;
537 case CSREG: addr = (int)&Status; break;
538 case INVCP: addr = (int)&invc_ptr; break;
539 default: assert(r < 34); break;
540 }
57871462 541 u_int offset = addr-(u_int)&dynarec_local;
542 assert(offset<4096);
543 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
544 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
545 }
546}
e2b5e7aa 547
548static void emit_storereg(int r, int hr)
57871462 549{
3d624f89 550 if(r&64) {
c43b5311 551 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 552 assert(0);
553 return;
3d624f89 554 }
7c3a5182 555 int addr = (int)&psxRegs.GPR.r[r];
556 switch (r) {
557 //case HIREG: addr = &hi; break;
558 //case LOREG: addr = &lo; break;
559 case CCREG: addr = (int)&cycle_count; break;
560 default: assert(r < 34); break;
561 }
57871462 562 u_int offset = addr-(u_int)&dynarec_local;
563 assert(offset<4096);
564 assem_debug("str %s,fp+%d\n",regname[hr],offset);
565 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
566}
567
e2b5e7aa 568static void emit_test(int rs, int rt)
57871462 569{
570 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
571 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
572}
573
e2b5e7aa 574static void emit_testimm(int rs,int imm)
57871462 575{
576 u_int armval;
5a05d80c 577 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 578 genimm_checked(imm,&armval);
57871462 579 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
580}
581
e2b5e7aa 582static void emit_testeqimm(int rs,int imm)
b9b61529 583{
584 u_int armval;
585 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 586 genimm_checked(imm,&armval);
b9b61529 587 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
588}
589
e2b5e7aa 590static void emit_not(int rs,int rt)
57871462 591{
592 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
593 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
594}
595
e2b5e7aa 596static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 597{
598 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
599 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
600}
601
e2b5e7aa 602static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 603{
604 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
605 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
606}
e2b5e7aa 607
e2b5e7aa 608static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 609{
610 assert(rs<16);
611 assert(rt<16);
612 assert(imm<32);
613 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
614 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
615}
616
e2b5e7aa 617static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 618{
619 assert(rs<16);
620 assert(rt<16);
621 assert(imm<32);
622 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
623 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
624}
625
e2b5e7aa 626static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 627{
628 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
629 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
630}
631
3968e69e 632static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
633{
634 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
635 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
636}
637
e2b5e7aa 638static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 639{
640 assert(rs<16);
641 assert(rt<16);
642 if(imm!=0) {
57871462 643 u_int armval;
644 if(genimm(imm,&armval)) {
645 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
646 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
647 }else if(genimm(-imm,&armval)) {
8a0a8423 648 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 649 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 650 #ifdef HAVE_ARMV7
651 }else if(rt!=rs&&(u_int)imm<65536) {
652 emit_movw(imm&0x0000ffff,rt);
653 emit_add(rs,rt,rt);
654 }else if(rt!=rs&&(u_int)-imm<65536) {
655 emit_movw(-imm&0x0000ffff,rt);
656 emit_sub(rs,rt,rt);
657 #endif
658 }else if((u_int)-imm<65536) {
57871462 659 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
660 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
661 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
662 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 663 }else {
664 do {
665 int shift = (ffs(imm) - 1) & ~1;
666 int imm8 = imm & (0xff << shift);
667 genimm_checked(imm8,&armval);
668 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
669 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
670 rs = rt;
671 imm &= ~imm8;
672 }
673 while (imm != 0);
57871462 674 }
675 }
676 else if(rs!=rt) emit_mov(rs,rt);
677}
678
e2b5e7aa 679static void emit_addimm_and_set_flags(int imm,int rt)
57871462 680{
681 assert(imm>-65536&&imm<65536);
682 u_int armval;
683 if(genimm(imm,&armval)) {
684 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
685 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
686 }else if(genimm(-imm,&armval)) {
687 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
688 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
689 }else if(imm<0) {
690 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
691 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
692 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
693 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
694 }else{
695 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
696 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
697 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
698 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
699 }
700}
e2b5e7aa 701
702static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 703{
704 emit_addimm(rt,imm,rt);
705}
706
e2b5e7aa 707static void emit_addnop(u_int r)
57871462 708{
709 assert(r<16);
710 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
711 output_w32(0xe2800000|rd_rn_rm(r,r,0));
712}
713
e2b5e7aa 714static void emit_andimm(int rs,int imm,int rt)
57871462 715{
716 u_int armval;
790ee18e 717 if(imm==0) {
718 emit_zeroreg(rt);
719 }else if(genimm(imm,&armval)) {
57871462 720 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(genimm(~imm,&armval)) {
723 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(imm==65535) {
332a4533 726 #ifndef HAVE_ARMV6
57871462 727 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
728 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
729 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
731 #else
732 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
734 #endif
735 }else{
736 assert(imm>0&&imm<65535);
665f33e1 737 #ifndef HAVE_ARMV7
57871462 738 assem_debug("mov r14,#%d\n",imm&0xFF00);
739 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
740 assem_debug("add r14,r14,#%d\n",imm&0xFF);
741 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
742 #else
743 emit_movw(imm,HOST_TEMPREG);
744 #endif
745 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
746 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
747 }
748}
749
e2b5e7aa 750static void emit_orimm(int rs,int imm,int rt)
57871462 751{
752 u_int armval;
790ee18e 753 if(imm==0) {
754 if(rs!=rt) emit_mov(rs,rt);
755 }else if(genimm(imm,&armval)) {
57871462 756 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
758 }else{
759 assert(imm>0&&imm<65536);
760 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
762 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
764 }
765}
766
e2b5e7aa 767static void emit_xorimm(int rs,int imm,int rt)
57871462 768{
57871462 769 u_int armval;
790ee18e 770 if(imm==0) {
771 if(rs!=rt) emit_mov(rs,rt);
772 }else if(genimm(imm,&armval)) {
57871462 773 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
775 }else{
514ed0d9 776 assert(imm>0&&imm<65536);
57871462 777 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
779 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
781 }
782}
783
e2b5e7aa 784static void emit_shlimm(int rs,u_int imm,int rt)
57871462 785{
786 assert(imm>0);
787 assert(imm<32);
788 //if(imm==1) ...
789 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
e2b5e7aa 793static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
e2b5e7aa 801static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
e2b5e7aa 809static void emit_shrimm(int rs,u_int imm,int rt)
57871462 810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
815}
816
e2b5e7aa 817static void emit_sarimm(int rs,u_int imm,int rt)
57871462 818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
823}
824
e2b5e7aa 825static void emit_rorimm(int rs,u_int imm,int rt)
57871462 826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
831}
832
e2b5e7aa 833static void emit_signextend16(int rs,int rt)
b9b61529 834{
332a4533 835 #ifndef HAVE_ARMV6
b9b61529 836 emit_shlimm(rs,16,rt);
837 emit_sarimm(rt,16,rt);
838 #else
839 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
840 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
841 #endif
842}
843
e2b5e7aa 844static void emit_signextend8(int rs,int rt)
c6c3b1b3 845{
332a4533 846 #ifndef HAVE_ARMV6
c6c3b1b3 847 emit_shlimm(rs,24,rt);
848 emit_sarimm(rt,24,rt);
849 #else
850 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
851 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
852 #endif
853}
854
e2b5e7aa 855static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 856{
857 assert(rs<16);
858 assert(rt<16);
859 assert(shift<16);
860 //if(imm==1) ...
861 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
862 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
863}
e2b5e7aa 864
865static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 866{
867 assert(rs<16);
868 assert(rt<16);
869 assert(shift<16);
870 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
872}
e2b5e7aa 873
874static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 875{
876 assert(rs<16);
877 assert(rt<16);
878 assert(shift<16);
879 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
881}
57871462 882
3968e69e 883static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
889 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
890}
e2b5e7aa 891
3968e69e 892static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 893{
894 assert(rs<16);
895 assert(rt<16);
896 assert(shift<16);
897 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
898 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
899}
900
e2b5e7aa 901static void emit_cmpimm(int rs,int imm)
57871462 902{
903 u_int armval;
904 if(genimm(imm,&armval)) {
5a05d80c 905 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 906 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
907 }else if(genimm(-imm,&armval)) {
5a05d80c 908 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 909 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
910 }else if(imm>0) {
911 assert(imm<65536);
57871462 912 emit_movimm(imm,HOST_TEMPREG);
57871462 913 assem_debug("cmp %s,r14\n",regname[rs]);
914 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
915 }else{
916 assert(imm>-65536);
57871462 917 emit_movimm(-imm,HOST_TEMPREG);
57871462 918 assem_debug("cmn %s,r14\n",regname[rs]);
919 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
920 }
921}
922
e2b5e7aa 923static void emit_cmovne_imm(int imm,int rt)
57871462 924{
925 assem_debug("movne %s,#%d\n",regname[rt],imm);
926 u_int armval;
cfbd3c6e 927 genimm_checked(imm,&armval);
57871462 928 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
929}
e2b5e7aa 930
931static void emit_cmovl_imm(int imm,int rt)
57871462 932{
933 assem_debug("movlt %s,#%d\n",regname[rt],imm);
934 u_int armval;
cfbd3c6e 935 genimm_checked(imm,&armval);
57871462 936 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
937}
e2b5e7aa 938
939static void emit_cmovb_imm(int imm,int rt)
57871462 940{
941 assem_debug("movcc %s,#%d\n",regname[rt],imm);
942 u_int armval;
cfbd3c6e 943 genimm_checked(imm,&armval);
57871462 944 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
945}
e2b5e7aa 946
3968e69e 947static void emit_cmovae_imm(int imm,int rt)
948{
949 assem_debug("movcs %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
e2b5e7aa 955static void emit_cmovne_reg(int rs,int rt)
57871462 956{
957 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
958 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
959}
e2b5e7aa 960
961static void emit_cmovl_reg(int rs,int rt)
57871462 962{
963 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
964 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
965}
e2b5e7aa 966
967static void emit_cmovs_reg(int rs,int rt)
57871462 968{
969 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
971}
972
e2b5e7aa 973static void emit_slti32(int rs,int imm,int rt)
57871462 974{
975 if(rs!=rt) emit_zeroreg(rt);
976 emit_cmpimm(rs,imm);
977 if(rs==rt) emit_movimm(0,rt);
978 emit_cmovl_imm(1,rt);
979}
e2b5e7aa 980
981static void emit_sltiu32(int rs,int imm,int rt)
57871462 982{
983 if(rs!=rt) emit_zeroreg(rt);
984 emit_cmpimm(rs,imm);
985 if(rs==rt) emit_movimm(0,rt);
986 emit_cmovb_imm(1,rt);
987}
e2b5e7aa 988
e2b5e7aa 989static void emit_cmp(int rs,int rt)
57871462 990{
991 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
992 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
993}
e2b5e7aa 994
995static void emit_set_gz32(int rs, int rt)
57871462 996{
997 //assem_debug("set_gz32\n");
998 emit_cmpimm(rs,1);
999 emit_movimm(1,rt);
1000 emit_cmovl_imm(0,rt);
1001}
e2b5e7aa 1002
1003static void emit_set_nz32(int rs, int rt)
57871462 1004{
1005 //assem_debug("set_nz32\n");
1006 if(rs!=rt) emit_movs(rs,rt);
1007 else emit_test(rs,rs);
1008 emit_cmovne_imm(1,rt);
1009}
e2b5e7aa 1010
e2b5e7aa 1011static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1012{
1013 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1014 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1015 emit_cmp(rs1,rs2);
1016 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1017 emit_cmovl_imm(1,rt);
1018}
e2b5e7aa 1019
1020static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1021{
1022 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1023 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1024 emit_cmp(rs1,rs2);
1025 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1026 emit_cmovb_imm(1,rt);
1027}
e2b5e7aa 1028
2a014d73 1029static int can_jump_or_call(const void *a)
1030{
1031 intptr_t offset = (u_char *)a - out - 8;
1032 return (-33554432 <= offset && offset < 33554432);
1033}
1034
643aeae3 1035static void emit_call(const void *a_)
57871462 1036{
643aeae3 1037 int a = (int)a_;
d1e4ebd9 1038 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1039 u_int offset=genjmp(a);
1040 output_w32(0xeb000000|offset);
1041}
e2b5e7aa 1042
b14b6a8f 1043static void emit_jmp(const void *a_)
57871462 1044{
b14b6a8f 1045 int a = (int)a_;
d1e4ebd9 1046 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1047 u_int offset=genjmp(a);
1048 output_w32(0xea000000|offset);
1049}
e2b5e7aa 1050
643aeae3 1051static void emit_jne(const void *a_)
57871462 1052{
643aeae3 1053 int a = (int)a_;
57871462 1054 assem_debug("bne %x\n",a);
1055 u_int offset=genjmp(a);
1056 output_w32(0x1a000000|offset);
1057}
e2b5e7aa 1058
7c3a5182 1059static void emit_jeq(const void *a_)
57871462 1060{
7c3a5182 1061 int a = (int)a_;
57871462 1062 assem_debug("beq %x\n",a);
1063 u_int offset=genjmp(a);
1064 output_w32(0x0a000000|offset);
1065}
e2b5e7aa 1066
7c3a5182 1067static void emit_js(const void *a_)
57871462 1068{
7c3a5182 1069 int a = (int)a_;
57871462 1070 assem_debug("bmi %x\n",a);
1071 u_int offset=genjmp(a);
1072 output_w32(0x4a000000|offset);
1073}
e2b5e7aa 1074
7c3a5182 1075static void emit_jns(const void *a_)
57871462 1076{
7c3a5182 1077 int a = (int)a_;
57871462 1078 assem_debug("bpl %x\n",a);
1079 u_int offset=genjmp(a);
1080 output_w32(0x5a000000|offset);
1081}
e2b5e7aa 1082
7c3a5182 1083static void emit_jl(const void *a_)
57871462 1084{
7c3a5182 1085 int a = (int)a_;
57871462 1086 assem_debug("blt %x\n",a);
1087 u_int offset=genjmp(a);
1088 output_w32(0xba000000|offset);
1089}
e2b5e7aa 1090
7c3a5182 1091static void emit_jge(const void *a_)
57871462 1092{
7c3a5182 1093 int a = (int)a_;
57871462 1094 assem_debug("bge %x\n",a);
1095 u_int offset=genjmp(a);
1096 output_w32(0xaa000000|offset);
1097}
e2b5e7aa 1098
7c3a5182 1099static void emit_jno(const void *a_)
57871462 1100{
7c3a5182 1101 int a = (int)a_;
57871462 1102 assem_debug("bvc %x\n",a);
1103 u_int offset=genjmp(a);
1104 output_w32(0x7a000000|offset);
1105}
e2b5e7aa 1106
7c3a5182 1107static void emit_jc(const void *a_)
57871462 1108{
7c3a5182 1109 int a = (int)a_;
57871462 1110 assem_debug("bcs %x\n",a);
1111 u_int offset=genjmp(a);
1112 output_w32(0x2a000000|offset);
1113}
e2b5e7aa 1114
7c3a5182 1115static void emit_jcc(const void *a_)
57871462 1116{
b14b6a8f 1117 int a = (int)a_;
57871462 1118 assem_debug("bcc %x\n",a);
1119 u_int offset=genjmp(a);
1120 output_w32(0x3a000000|offset);
1121}
1122
3968e69e 1123static unused void emit_callreg(u_int r)
57871462 1124{
c6c3b1b3 1125 assert(r<15);
1126 assem_debug("blx %s\n",regname[r]);
1127 output_w32(0xe12fff30|r);
57871462 1128}
e2b5e7aa 1129
1130static void emit_jmpreg(u_int r)
57871462 1131{
1132 assem_debug("mov pc,%s\n",regname[r]);
1133 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1134}
1135
be516ebe 1136static void emit_ret(void)
1137{
1138 emit_jmpreg(14);
1139}
1140
e2b5e7aa 1141static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1142{
1143 assert(offset>-4096&&offset<4096);
1144 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1145 if(offset>=0) {
1146 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1147 }else{
1148 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1149 }
1150}
e2b5e7aa 1151
1152static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1153{
1154 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1155 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1156}
e2b5e7aa 1157
1158static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1159{
1160 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1161 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1162}
e2b5e7aa 1163
1164static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1165{
1166 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1167 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1168}
e2b5e7aa 1169
1170static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1171{
1172 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1173 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1174}
e2b5e7aa 1175
1176static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1177{
1178 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1179 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1180}
e2b5e7aa 1181
1182static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1183{
1184 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1185 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1186}
e2b5e7aa 1187
e2b5e7aa 1188static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1189{
1190 assert(offset>-256&&offset<256);
1191 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1192 if(offset>=0) {
1193 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1194 }else{
1195 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1196 }
1197}
e2b5e7aa 1198
e2b5e7aa 1199static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1200{
1201 assert(offset>-256&&offset<256);
1202 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1203 if(offset>=0) {
1204 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1205 }else{
1206 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1207 }
1208}
e2b5e7aa 1209
1210static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1211{
1212 assert(offset>-4096&&offset<4096);
1213 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1214 if(offset>=0) {
1215 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1216 }else{
1217 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1218 }
1219}
e2b5e7aa 1220
e2b5e7aa 1221static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1222{
1223 assert(offset>-256&&offset<256);
1224 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1225 if(offset>=0) {
1226 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1227 }else{
1228 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1229 }
1230}
e2b5e7aa 1231
054175e9 1232static void emit_ldrd(int offset, int rs, int rt)
1233{
1234 assert(offset>-256&&offset<256);
1235 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1236 if(offset>=0) {
1237 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1238 }else{
1239 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1240 }
1241}
e2b5e7aa 1242
643aeae3 1243static void emit_readword(void *addr, int rt)
57871462 1244{
643aeae3 1245 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1246 assert(offset<4096);
1247 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1248 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1249}
e2b5e7aa 1250
e2b5e7aa 1251static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1252{
1253 assert(offset>-4096&&offset<4096);
1254 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1255 if(offset>=0) {
1256 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1257 }else{
1258 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1259 }
1260}
e2b5e7aa 1261
e2b5e7aa 1262static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1263{
1264 assert(offset>-256&&offset<256);
1265 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1266 if(offset>=0) {
1267 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1268 }else{
1269 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1270 }
1271}
e2b5e7aa 1272
1273static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1274{
1275 assert(offset>-4096&&offset<4096);
1276 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1277 if(offset>=0) {
1278 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1279 }else{
1280 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1281 }
1282}
e2b5e7aa 1283
e2b5e7aa 1284static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1285{
1286 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1287 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1288}
e2b5e7aa 1289
1290static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1291{
1292 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1293 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1294}
e2b5e7aa 1295
1296static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1297{
1298 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1299 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1300}
e2b5e7aa 1301
643aeae3 1302static void emit_writeword(int rt, void *addr)
57871462 1303{
643aeae3 1304 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1305 assert(offset<4096);
1306 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1307 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1308}
e2b5e7aa 1309
e2b5e7aa 1310static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1311{
1312 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1313 assert(rs1<16);
1314 assert(rs2<16);
1315 assert(hi<16);
1316 assert(lo<16);
1317 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1318}
e2b5e7aa 1319
1320static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1321{
1322 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1323 assert(rs1<16);
1324 assert(rs2<16);
1325 assert(hi<16);
1326 assert(lo<16);
1327 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1328}
1329
e2b5e7aa 1330static void emit_clz(int rs,int rt)
57871462 1331{
1332 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1333 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1334}
1335
e2b5e7aa 1336static void emit_subcs(int rs1,int rs2,int rt)
57871462 1337{
1338 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1339 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1340}
1341
e2b5e7aa 1342static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1343{
1344 assert(imm>0);
1345 assert(imm<32);
1346 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1347 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1348}
1349
e2b5e7aa 1350static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1351{
1352 assert(imm>0);
1353 assert(imm<32);
1354 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1355 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1356}
1357
e2b5e7aa 1358static void emit_negmi(int rs, int rt)
57871462 1359{
1360 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1361 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1362}
1363
e2b5e7aa 1364static void emit_negsmi(int rs, int rt)
57871462 1365{
1366 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1367 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1368}
1369
e2b5e7aa 1370static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1371{
1372 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1373 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1374}
1375
e2b5e7aa 1376static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1377{
1378 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1379 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1380}
1381
e2b5e7aa 1382static void emit_teq(int rs, int rt)
57871462 1383{
1384 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1385 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1386}
1387
3968e69e 1388static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1389{
1390 u_int armval;
cfbd3c6e 1391 genimm_checked(imm,&armval);
57871462 1392 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1393 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1394}
1395
57871462 1396// Conditionally select one of two immediates, optimizing for small code size
1397// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1398static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1399{
1400 u_int armval;
1401 if(genimm(imm2-imm1,&armval)) {
1402 emit_movimm(imm1,rt);
1403 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1404 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1405 }else if(genimm(imm1-imm2,&armval)) {
1406 emit_movimm(imm1,rt);
1407 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1408 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1409 }
1410 else {
665f33e1 1411 #ifndef HAVE_ARMV7
57871462 1412 emit_movimm(imm1,rt);
1413 add_literal((int)out,imm2);
1414 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1415 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1416 #else
1417 emit_movw(imm1&0x0000FFFF,rt);
1418 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1419 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1420 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1421 }
1422 emit_movt(imm1&0xFFFF0000,rt);
1423 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1424 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1425 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1426 }
1427 #endif
1428 }
1429}
1430
57871462 1431// special case for checking invalid_code
e2b5e7aa 1432static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1433{
1434 assert(imm<128&&imm>=0);
1435 assert(r>=0&&r<16);
1436 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1437 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1438 emit_cmpimm(HOST_TEMPREG,imm);
1439}
1440
e2b5e7aa 1441static void emit_callne(int a)
0bbd1454 1442{
1443 assem_debug("blne %x\n",a);
1444 u_int offset=genjmp(a);
1445 output_w32(0x1b000000|offset);
1446}
1447
57871462 1448// Used to preload hash table entries
e2b5e7aa 1449static unused void emit_prefetchreg(int r)
57871462 1450{
1451 assem_debug("pld %s\n",regname[r]);
1452 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1453}
1454
1455// Special case for mini_ht
e2b5e7aa 1456static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1457{
1458 assert(offset<4096);
1459 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1460 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1461}
1462
e2b5e7aa 1463static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1464{
1465 u_int armval;
cfbd3c6e 1466 genimm_checked(imm,&armval);
b9b61529 1467 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1468 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1469}
1470
e2b5e7aa 1471static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1472{
1473 u_int armval;
cfbd3c6e 1474 genimm_checked(imm,&armval);
b9b61529 1475 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1476 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1477}
1478
e2b5e7aa 1479static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1480{
1481 u_int armval;
1482 genimm_checked(imm,&armval);
1483 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1484 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1485}
1486
e2b5e7aa 1487static void emit_jno_unlikely(int a)
57871462 1488{
1489 //emit_jno(a);
1490 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1491 output_w32(0x72800000|rd_rn_rm(15,15,0));
1492}
1493
054175e9 1494static void save_regs_all(u_int reglist)
57871462 1495{
054175e9 1496 int i;
57871462 1497 if(!reglist) return;
1498 assem_debug("stmia fp,{");
054175e9 1499 for(i=0;i<16;i++)
1500 if(reglist&(1<<i))
1501 assem_debug("r%d,",i);
57871462 1502 assem_debug("}\n");
1503 output_w32(0xe88b0000|reglist);
1504}
e2b5e7aa 1505
054175e9 1506static void restore_regs_all(u_int reglist)
57871462 1507{
054175e9 1508 int i;
57871462 1509 if(!reglist) return;
1510 assem_debug("ldmia fp,{");
054175e9 1511 for(i=0;i<16;i++)
1512 if(reglist&(1<<i))
1513 assem_debug("r%d,",i);
57871462 1514 assem_debug("}\n");
1515 output_w32(0xe89b0000|reglist);
1516}
e2b5e7aa 1517
054175e9 1518// Save registers before function call
1519static void save_regs(u_int reglist)
1520{
4d646738 1521 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1522 save_regs_all(reglist);
1523}
e2b5e7aa 1524
054175e9 1525// Restore registers after function call
1526static void restore_regs(u_int reglist)
1527{
4d646738 1528 reglist&=CALLER_SAVE_REGS;
054175e9 1529 restore_regs_all(reglist);
1530}
57871462 1531
57871462 1532/* Stubs/epilogue */
1533
e2b5e7aa 1534static void literal_pool(int n)
57871462 1535{
1536 if(!literalcount) return;
1537 if(n) {
1538 if((int)out-literals[0][0]<4096-n) return;
1539 }
1540 u_int *ptr;
1541 int i;
1542 for(i=0;i<literalcount;i++)
1543 {
77750690 1544 u_int l_addr=(u_int)out;
1545 int j;
1546 for(j=0;j<i;j++) {
1547 if(literals[j][1]==literals[i][1]) {
1548 //printf("dup %08x\n",literals[i][1]);
1549 l_addr=literals[j][0];
1550 break;
1551 }
1552 }
57871462 1553 ptr=(u_int *)literals[i][0];
77750690 1554 u_int offset=l_addr-(u_int)ptr-8;
57871462 1555 assert(offset<4096);
1556 assert(!(offset&3));
1557 *ptr|=offset;
77750690 1558 if(l_addr==(u_int)out) {
1559 literals[i][0]=l_addr; // remember for dupes
1560 output_w32(literals[i][1]);
1561 }
57871462 1562 }
1563 literalcount=0;
1564}
1565
e2b5e7aa 1566static void literal_pool_jumpover(int n)
57871462 1567{
1568 if(!literalcount) return;
1569 if(n) {
1570 if((int)out-literals[0][0]<4096-n) return;
1571 }
df4dc2b1 1572 void *jaddr = out;
57871462 1573 emit_jmp(0);
1574 literal_pool(0);
df4dc2b1 1575 set_jump_target(jaddr, out);
57871462 1576}
1577
7c3a5182 1578// parsed by get_pointer, find_extjump_insn
1579static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1580{
1581 u_char *ptr=(u_char *)addr;
1582 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1583 (void)ptr;
1584
57871462 1585 emit_loadlp(target,0);
643aeae3 1586 emit_loadlp((u_int)addr,1);
d62c125a 1587 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1588 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1589//DEBUG >
1590#ifdef DEBUG_CYCLE_COUNT
643aeae3 1591 emit_readword(&last_count,ECX);
57871462 1592 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1593 emit_readword(&next_interupt,ECX);
1594 emit_writeword(HOST_CCREG,&Count);
57871462 1595 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1596 emit_writeword(ECX,&last_count);
57871462 1597#endif
1598//DEBUG <
2a014d73 1599 emit_far_jump(linker);
57871462 1600}
1601
d1e4ebd9 1602static void check_extjump2(void *src)
1603{
1604 u_int *ptr = src;
1605 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1606 (void)ptr;
1607}
1608
13e35c04 1609// put rt_val into rt, potentially making use of rs with value rs_val
1610static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1611{
8575a877 1612 u_int armval;
1613 int diff;
1614 if(genimm(rt_val,&armval)) {
1615 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1616 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1617 return;
1618 }
1619 if(genimm(~rt_val,&armval)) {
1620 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1621 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1622 return;
1623 }
1624 diff=rt_val-rs_val;
1625 if(genimm(diff,&armval)) {
1626 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1627 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1628 return;
1629 }else if(genimm(-diff,&armval)) {
1630 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1631 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1632 return;
1633 }
1634 emit_movimm(rt_val,rt);
1635}
1636
1637// return 1 if above function can do it's job cheaply
1638static int is_similar_value(u_int v1,u_int v2)
1639{
13e35c04 1640 u_int xs;
8575a877 1641 int diff;
1642 if(v1==v2) return 1;
1643 diff=v2-v1;
1644 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1645 ;
8575a877 1646 if(xs<0x100) return 1;
1647 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1648 ;
1649 if(xs<0x100) return 1;
1650 return 0;
13e35c04 1651}
cbbab9cd 1652
b14b6a8f 1653static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1654{
1655 switch(type) {
1656 case LOADB_STUB: emit_signextend8(rs,rt); break;
1657 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1658 case LOADH_STUB: emit_signextend16(rs,rt); break;
1659 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1660 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1661 default: assert(0);
1662 }
1663}
1664
b1be1eee 1665#include "pcsxmem.h"
1666#include "pcsxmem_inline.c"
b1be1eee 1667
e2b5e7aa 1668static void do_readstub(int n)
57871462 1669{
b14b6a8f 1670 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1671 literal_pool(256);
b14b6a8f 1672 set_jump_target(stubs[n].addr, out);
1673 enum stub_type type=stubs[n].type;
1674 int i=stubs[n].a;
1675 int rs=stubs[n].b;
81dbbf4c 1676 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1677 u_int reglist=stubs[n].e;
81dbbf4c 1678 const signed char *i_regmap=i_regs->regmap;
581335b0 1679 int rt;
b9b61529 1680 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 1681 rt=get_reg(i_regmap,FTEMP);
1682 }else{
57871462 1683 rt=get_reg(i_regmap,rt1[i]);
1684 }
1685 assert(rs>=0);
df4dc2b1 1686 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1687 void *restore_jump = NULL;
c6c3b1b3 1688 reglist|=(1<<rs);
1689 for(r=0;r<=12;r++) {
1690 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1691 temp=r; break;
1692 }
1693 }
db829eeb 1694 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 1695 reglist&=~(1<<rt);
1696 if(temp==-1) {
1697 save_regs(reglist);
1698 regs_saved=1;
1699 temp=(rs==0)?2:0;
1700 }
1701 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1702 temp2=1;
643aeae3 1703 emit_readword(&mem_rtab,temp);
c6c3b1b3 1704 emit_shrimm(rs,12,temp2);
1705 emit_readword_dualindexedx4(temp,temp2,temp2);
1706 emit_lsls_imm(temp2,1,temp2);
1707 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1708 switch(type) {
1709 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1710 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1711 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1712 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1713 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1714 default: assert(0);
c6c3b1b3 1715 }
1716 }
1717 if(regs_saved) {
df4dc2b1 1718 restore_jump=out;
c6c3b1b3 1719 emit_jcc(0); // jump to reg restore
1720 }
1721 else
b14b6a8f 1722 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1723
1724 if(!regs_saved)
1725 save_regs(reglist);
643aeae3 1726 void *handler=NULL;
c6c3b1b3 1727 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1728 handler=jump_handler_read8;
c6c3b1b3 1729 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1730 handler=jump_handler_read16;
c6c3b1b3 1731 if(type==LOADW_STUB)
643aeae3 1732 handler=jump_handler_read32;
1733 assert(handler);
b96d3df7 1734 pass_args(rs,temp2);
c6c3b1b3 1735 int cc=get_reg(i_regmap,CCREG);
1736 if(cc<0)
1737 emit_loadreg(CCREG,2);
bb4f300c 1738 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1739 emit_far_call(handler);
c6c3b1b3 1740 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 1741 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1742 }
1743 if(restore_jump)
df4dc2b1 1744 set_jump_target(restore_jump, out);
c6c3b1b3 1745 restore_regs(reglist);
b14b6a8f 1746 emit_jmp(stubs[n].retaddr); // return address
57871462 1747}
1748
81dbbf4c 1749static void inline_readstub(enum stub_type type, int i, u_int addr,
1750 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1751{
1752 int rs=get_reg(regmap,target);
57871462 1753 int rt=get_reg(regmap,target);
535d208a 1754 if(rs<0) rs=get_reg(regmap,-1);
57871462 1755 assert(rs>=0);
2a014d73 1756 u_int is_dynamic;
687b4580 1757 uintptr_t host_addr = 0;
643aeae3 1758 void *handler;
b1be1eee 1759 int cc=get_reg(regmap,CCREG);
bb4f300c 1760 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
b1be1eee 1761 return;
643aeae3 1762 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1763 if (handler == NULL) {
db829eeb 1764 if(rt<0||rt1[i]==0)
c6c3b1b3 1765 return;
13e35c04 1766 if(addr!=host_addr)
1767 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1768 switch(type) {
1769 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1770 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1771 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1772 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1773 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1774 default: assert(0);
1775 }
1776 return;
1777 }
b1be1eee 1778 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1779 if(is_dynamic) {
1780 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1781 handler=jump_handler_read8;
b1be1eee 1782 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1783 handler=jump_handler_read16;
b1be1eee 1784 if(type==LOADW_STUB)
643aeae3 1785 handler=jump_handler_read32;
b1be1eee 1786 }
c6c3b1b3 1787
1788 // call a memhandler
db829eeb 1789 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 1790 reglist&=~(1<<rt);
1791 save_regs(reglist);
1792 if(target==0)
1793 emit_movimm(addr,0);
1794 else if(rs!=0)
1795 emit_mov(rs,0);
b1be1eee 1796 if(cc<0)
1797 emit_loadreg(CCREG,2);
1798 if(is_dynamic) {
1799 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
bb4f300c 1800 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
c6c3b1b3 1801 }
b1be1eee 1802 else {
643aeae3 1803 emit_readword(&last_count,3);
bb4f300c 1804 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
b1be1eee 1805 emit_add(2,3,2);
643aeae3 1806 emit_writeword(2,&Count);
b1be1eee 1807 }
1808
2a014d73 1809 emit_far_call(handler);
b1be1eee 1810
db829eeb 1811 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 1812 switch(type) {
1813 case LOADB_STUB: emit_signextend8(0,rt); break;
1814 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1815 case LOADH_STUB: emit_signextend16(0,rt); break;
1816 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1817 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1818 default: assert(0);
1819 }
1820 }
1821 restore_regs(reglist);
57871462 1822}
1823
e2b5e7aa 1824static void do_writestub(int n)
57871462 1825{
b14b6a8f 1826 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1827 literal_pool(256);
b14b6a8f 1828 set_jump_target(stubs[n].addr, out);
1829 enum stub_type type=stubs[n].type;
1830 int i=stubs[n].a;
1831 int rs=stubs[n].b;
81dbbf4c 1832 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1833 u_int reglist=stubs[n].e;
81dbbf4c 1834 const signed char *i_regmap=i_regs->regmap;
581335b0 1835 int rt,r;
b9b61529 1836 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 1837 rt=get_reg(i_regmap,r=FTEMP);
1838 }else{
57871462 1839 rt=get_reg(i_regmap,r=rs2[i]);
1840 }
1841 assert(rs>=0);
1842 assert(rt>=0);
b14b6a8f 1843 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1844 void *restore_jump = NULL;
b96d3df7 1845 int reglist2=reglist|(1<<rs)|(1<<rt);
1846 for(rtmp=0;rtmp<=12;rtmp++) {
1847 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1848 temp=rtmp; break;
1849 }
1850 }
1851 if(temp==-1) {
1852 save_regs(reglist);
1853 regs_saved=1;
1854 for(rtmp=0;rtmp<=3;rtmp++)
1855 if(rtmp!=rs&&rtmp!=rt)
1856 {temp=rtmp;break;}
1857 }
1858 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1859 temp2=3;
643aeae3 1860 emit_readword(&mem_wtab,temp);
b96d3df7 1861 emit_shrimm(rs,12,temp2);
1862 emit_readword_dualindexedx4(temp,temp2,temp2);
1863 emit_lsls_imm(temp2,1,temp2);
1864 switch(type) {
1865 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1866 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1867 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1868 default: assert(0);
1869 }
1870 if(regs_saved) {
df4dc2b1 1871 restore_jump=out;
b96d3df7 1872 emit_jcc(0); // jump to reg restore
1873 }
1874 else
b14b6a8f 1875 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1876
1877 if(!regs_saved)
1878 save_regs(reglist);
643aeae3 1879 void *handler=NULL;
b96d3df7 1880 switch(type) {
643aeae3 1881 case STOREB_STUB: handler=jump_handler_write8; break;
1882 case STOREH_STUB: handler=jump_handler_write16; break;
1883 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1884 default: assert(0);
b96d3df7 1885 }
643aeae3 1886 assert(handler);
b96d3df7 1887 pass_args(rs,rt);
1888 if(temp2!=3)
1889 emit_mov(temp2,3);
1890 int cc=get_reg(i_regmap,CCREG);
1891 if(cc<0)
1892 emit_loadreg(CCREG,2);
bb4f300c 1893 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
b96d3df7 1894 // returns new cycle_count
2a014d73 1895 emit_far_call(handler);
bb4f300c 1896 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
b96d3df7 1897 if(cc<0)
1898 emit_storereg(CCREG,2);
1899 if(restore_jump)
df4dc2b1 1900 set_jump_target(restore_jump, out);
b96d3df7 1901 restore_regs(reglist);
b14b6a8f 1902 emit_jmp(stubs[n].retaddr);
57871462 1903}
1904
81dbbf4c 1905static void inline_writestub(enum stub_type type, int i, u_int addr,
1906 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1907{
1908 int rs=get_reg(regmap,-1);
57871462 1909 int rt=get_reg(regmap,target);
1910 assert(rs>=0);
1911 assert(rt>=0);
687b4580 1912 uintptr_t host_addr = 0;
643aeae3 1913 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1914 if (handler == NULL) {
13e35c04 1915 if(addr!=host_addr)
1916 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1917 switch(type) {
1918 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1919 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1920 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1921 default: assert(0);
1922 }
1923 return;
1924 }
1925
1926 // call a memhandler
1927 save_regs(reglist);
13e35c04 1928 pass_args(rs,rt);
b96d3df7 1929 int cc=get_reg(regmap,CCREG);
1930 if(cc<0)
1931 emit_loadreg(CCREG,2);
bb4f300c 1932 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
643aeae3 1933 emit_movimm((u_int)handler,3);
b96d3df7 1934 // returns new cycle_count
2a014d73 1935 emit_far_call(jump_handler_write_h);
bb4f300c 1936 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
b96d3df7 1937 if(cc<0)
1938 emit_storereg(CCREG,2);
1939 restore_regs(reglist);
57871462 1940}
1941
d1e4ebd9 1942// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
7c3a5182 1943static void do_dirty_stub_emit_args(u_int arg0)
57871462 1944{
665f33e1 1945 #ifndef HAVE_ARMV7
7c3a5182 1946 emit_loadlp((int)source, 1);
1947 emit_loadlp((int)copy, 2);
1948 emit_loadlp(slen*4, 3);
57871462 1949 #else
7c3a5182 1950 emit_movw(((u_int)source)&0x0000FFFF, 1);
1951 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1952 emit_movt(((u_int)source)&0xFFFF0000, 1);
1953 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1954 emit_movw(slen*4, 3);
57871462 1955 #endif
7c3a5182 1956 emit_movimm(arg0, 0);
1957}
1958
1959static void *do_dirty_stub(int i)
1960{
1961 assem_debug("do_dirty_stub %x\n",start+i*4);
1962 do_dirty_stub_emit_args(start + i*4);
2a014d73 1963 emit_far_call(verify_code);
df4dc2b1 1964 void *entry = out;
57871462 1965 load_regs_entry(i);
df4dc2b1 1966 if (entry == out)
1967 entry = instr_addr[i];
57871462 1968 emit_jmp(instr_addr[i]);
1969 return entry;
1970}
1971
e2b5e7aa 1972static void do_dirty_stub_ds()
57871462 1973{
7c3a5182 1974 do_dirty_stub_emit_args(start + 1);
2a014d73 1975 emit_far_call(verify_code_ds);
57871462 1976}
1977
57871462 1978/* Special assem */
1979
81dbbf4c 1980static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 1981{
1982 save_regs_all(reglist);
81dbbf4c 1983 cop2_call_stall_check(op, i, i_regs, 0);
82ed88eb 1984#ifdef PCNT
81dbbf4c 1985 emit_movimm(op, 0);
2a014d73 1986 emit_far_call(pcnt_gte_start);
82ed88eb 1987#endif
81dbbf4c 1988 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 1989}
1990
1991static void c2op_epilogue(u_int op,u_int reglist)
1992{
82ed88eb 1993#ifdef PCNT
1994 emit_movimm(op,0);
2a014d73 1995 emit_far_call(pcnt_gte_end);
82ed88eb 1996#endif
054175e9 1997 restore_regs_all(reglist);
1998}
1999
6c0eefaf 2000static void c2op_call_MACtoIR(int lm,int need_flags)
2001{
2002 if(need_flags)
2a014d73 2003 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2004 else
2a014d73 2005 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2006}
2007
2008static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2009{
2a014d73 2010 emit_far_call(func);
6c0eefaf 2011 // func is C code and trashes r0
2012 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2013 if(need_flags||need_ir)
2014 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2015 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2016}
2017
81dbbf4c 2018static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2019{
81dbbf4c 2020 u_int c2op = source[i] & 0x3f;
2021 u_int reglist_full = get_host_reglist(i_regs->regmap);
2022 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2023 int need_flags, need_ir;
b9b61529 2024
2025 if (gte_handlers[c2op]!=NULL) {
bedfea38 2026 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2027 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2028 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2029 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2030 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2031 need_flags=0;
6c0eefaf 2032 int shift = (source[i] >> 19) & 1;
2033 int lm = (source[i] >> 10) & 1;
054175e9 2034 switch(c2op) {
19776aef 2035#ifndef DRC_DBG
054175e9 2036 case GTE_MVMVA: {
82336ba3 2037#ifdef HAVE_ARMV5
054175e9 2038 int v = (source[i] >> 15) & 3;
2039 int cv = (source[i] >> 13) & 3;
2040 int mx = (source[i] >> 17) & 3;
4d646738 2041 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2042 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2043 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2044 if(v<3)
2045 emit_ldrd(v*8,0,4);
2046 else {
2047 emit_movzwl_indexed(9*4,0,4); // gteIR
2048 emit_movzwl_indexed(10*4,0,6);
2049 emit_movzwl_indexed(11*4,0,5);
2050 emit_orrshl_imm(6,16,4);
2051 }
2052 if(mx<3)
2053 emit_addimm(0,32*4+mx*8*4,6);
2054 else
643aeae3 2055 emit_readword(&zeromem_ptr,6);
054175e9 2056 if(cv<3)
2057 emit_addimm(0,32*4+(cv*8+5)*4,7);
2058 else
643aeae3 2059 emit_readword(&zeromem_ptr,7);
054175e9 2060#ifdef __ARM_NEON__
2061 emit_movimm(source[i],1); // opcode
2a014d73 2062 emit_far_call(gteMVMVA_part_neon);
054175e9 2063 if(need_flags) {
2064 emit_movimm(lm,1);
2a014d73 2065 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2066 }
2067#else
2068 if(cv==3&&shift)
2a014d73 2069 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
054175e9 2070 else {
2071 emit_movimm(shift,1);
2a014d73 2072 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
054175e9 2073 }
6c0eefaf 2074 if(need_flags||need_ir)
2075 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2076#endif
2077#else /* if not HAVE_ARMV5 */
81dbbf4c 2078 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2079 emit_movimm(source[i],1); // opcode
643aeae3 2080 emit_writeword(1,&psxRegs.code);
2a014d73 2081 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2082#endif
2083 break;
2084 }
6c0eefaf 2085 case GTE_OP:
81dbbf4c 2086 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2087 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2088 if(need_flags||need_ir) {
2089 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2090 c2op_call_MACtoIR(lm,need_flags);
2091 }
2092 break;
2093 case GTE_DPCS:
81dbbf4c 2094 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2095 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2096 break;
2097 case GTE_INTPL:
81dbbf4c 2098 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2099 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2100 break;
2101 case GTE_SQR:
81dbbf4c 2102 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2103 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2104 if(need_flags||need_ir) {
2105 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2106 c2op_call_MACtoIR(lm,need_flags);
2107 }
2108 break;
2109 case GTE_DCPL:
81dbbf4c 2110 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2111 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2112 break;
2113 case GTE_GPF:
81dbbf4c 2114 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2115 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2116 break;
2117 case GTE_GPL:
81dbbf4c 2118 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2119 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2120 break;
19776aef 2121#endif
054175e9 2122 default:
81dbbf4c 2123 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2124#ifdef DRC_DBG
2125 emit_movimm(source[i],1); // opcode
643aeae3 2126 emit_writeword(1,&psxRegs.code);
19776aef 2127#endif
2a014d73 2128 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2129 break;
2130 }
2131 c2op_epilogue(c2op,reglist);
2132 }
b9b61529 2133}
2134
3968e69e 2135static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2136{
2137 //value = value & 0x7ffff000;
2138 //if (value & 0x7f87e000) value |= 0x80000000;
2139 emit_shrimm(sl,12,temp);
2140 emit_shlimm(temp,12,temp);
2141 emit_testimm(temp,0x7f000000);
2142 emit_testeqimm(temp,0x00870000);
2143 emit_testeqimm(temp,0x0000e000);
2144 emit_orrne_imm(temp,0x80000000,temp);
2145}
2146
2147static void do_mfc2_31_one(u_int copr,signed char temp)
2148{
2149 emit_readword(&reg_cop2d[copr],temp);
2150 emit_testimm(temp,0x8000); // do we need this?
2151 emit_andne_imm(temp,0,temp);
2152 emit_cmpimm(temp,0xf80);
2153 emit_andimm(temp,0xf80,temp);
2154 emit_cmovae_imm(0xf80,temp);
2155}
2156
2157static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2158{
2159 if (temp < 0) {
2160 host_tempreg_acquire();
2161 temp = HOST_TEMPREG;
2162 }
2163 do_mfc2_31_one(9,temp);
2164 emit_shrimm(temp,7,tl);
2165 do_mfc2_31_one(10,temp);
2166 emit_orrshr_imm(temp,2,tl);
2167 do_mfc2_31_one(11,temp);
2168 emit_orrshl_imm(temp,3,tl);
2169 emit_writeword(tl,&reg_cop2d[29]);
2170 if (temp == HOST_TEMPREG)
2171 host_tempreg_release();
2172}
2173
e2b5e7aa 2174static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 2175{
2176 // case 0x18: MULT
2177 // case 0x19: MULTU
2178 // case 0x1A: DIV
2179 // case 0x1B: DIVU
2180 // case 0x1C: DMULT
2181 // case 0x1D: DMULTU
2182 // case 0x1E: DDIV
2183 // case 0x1F: DDIVU
2184 if(rs1[i]&&rs2[i])
2185 {
2186 if((opcode2[i]&4)==0) // 32-bit
2187 {
2188 if(opcode2[i]==0x18) // MULT
2189 {
2190 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2191 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2192 signed char hi=get_reg(i_regs->regmap,HIREG);
2193 signed char lo=get_reg(i_regs->regmap,LOREG);
2194 assert(m1>=0);
2195 assert(m2>=0);
2196 assert(hi>=0);
2197 assert(lo>=0);
2198 emit_smull(m1,m2,hi,lo);
2199 }
2200 if(opcode2[i]==0x19) // MULTU
2201 {
2202 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2203 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2204 signed char hi=get_reg(i_regs->regmap,HIREG);
2205 signed char lo=get_reg(i_regs->regmap,LOREG);
2206 assert(m1>=0);
2207 assert(m2>=0);
2208 assert(hi>=0);
2209 assert(lo>=0);
2210 emit_umull(m1,m2,hi,lo);
2211 }
2212 if(opcode2[i]==0x1A) // DIV
2213 {
2214 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2215 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2216 assert(d1>=0);
2217 assert(d2>=0);
2218 signed char quotient=get_reg(i_regs->regmap,LOREG);
2219 signed char remainder=get_reg(i_regs->regmap,HIREG);
2220 assert(quotient>=0);
2221 assert(remainder>=0);
2222 emit_movs(d1,remainder);
44a80f6a 2223 emit_movimm(0xffffffff,quotient);
2224 emit_negmi(quotient,quotient); // .. quotient and ..
2225 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2226 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2227 emit_jeq(out+52); // Division by zero
82336ba3 2228 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2229#ifdef HAVE_ARMV5
57871462 2230 emit_clz(HOST_TEMPREG,quotient);
2231 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2232#else
2233 emit_movimm(0,quotient);
2234 emit_addpl_imm(quotient,1,quotient);
2235 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2236 emit_jns(out-2*4);
665f33e1 2237#endif
57871462 2238 emit_orimm(quotient,1<<31,quotient);
2239 emit_shr(quotient,quotient,quotient);
2240 emit_cmp(remainder,HOST_TEMPREG);
2241 emit_subcs(remainder,HOST_TEMPREG,remainder);
2242 emit_adcs(quotient,quotient,quotient);
2243 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2244 emit_jcc(out-16); // -4
57871462 2245 emit_teq(d1,d2);
2246 emit_negmi(quotient,quotient);
2247 emit_test(d1,d1);
2248 emit_negmi(remainder,remainder);
2249 }
2250 if(opcode2[i]==0x1B) // DIVU
2251 {
2252 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2253 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2254 assert(d1>=0);
2255 assert(d2>=0);
2256 signed char quotient=get_reg(i_regs->regmap,LOREG);
2257 signed char remainder=get_reg(i_regs->regmap,HIREG);
2258 assert(quotient>=0);
2259 assert(remainder>=0);
44a80f6a 2260 emit_mov(d1,remainder);
2261 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2262 emit_test(d2,d2);
7c3a5182 2263 emit_jeq(out+40); // Division by zero
665f33e1 2264#ifdef HAVE_ARMV5
57871462 2265 emit_clz(d2,HOST_TEMPREG);
2266 emit_movimm(1<<31,quotient);
2267 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2268#else
2269 emit_movimm(0,HOST_TEMPREG);
82336ba3 2270 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2271 emit_lslpls_imm(d2,1,d2);
7c3a5182 2272 emit_jns(out-2*4);
665f33e1 2273 emit_movimm(1<<31,quotient);
2274#endif
57871462 2275 emit_shr(quotient,HOST_TEMPREG,quotient);
2276 emit_cmp(remainder,d2);
2277 emit_subcs(remainder,d2,remainder);
2278 emit_adcs(quotient,quotient,quotient);
2279 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2280 emit_jcc(out-16); // -4
57871462 2281 }
2282 }
2283 else // 64-bit
71e490c5 2284 assert(0);
57871462 2285 }
2286 else
2287 {
2288 // Multiply by zero is zero.
2289 // MIPS does not have a divide by zero exception.
2290 // The result is undefined, we return zero.
2291 signed char hr=get_reg(i_regs->regmap,HIREG);
2292 signed char lr=get_reg(i_regs->regmap,LOREG);
2293 if(hr>=0) emit_zeroreg(hr);
2294 if(lr>=0) emit_zeroreg(lr);
2295 }
2296}
2297#define multdiv_assemble multdiv_assemble_arm
2298
d1e4ebd9 2299static void do_jump_vaddr(int rs)
2300{
2a014d73 2301 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2302}
2303
e2b5e7aa 2304static void do_preload_rhash(int r) {
57871462 2305 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2306 // register. On ARM the hash can be done with a single instruction (below)
2307}
2308
e2b5e7aa 2309static void do_preload_rhtbl(int ht) {
57871462 2310 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2311}
2312
e2b5e7aa 2313static void do_rhash(int rs,int rh) {
57871462 2314 emit_andimm(rs,0xf8,rh);
2315}
2316
e2b5e7aa 2317static void do_miniht_load(int ht,int rh) {
57871462 2318 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2319 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2320}
2321
e2b5e7aa 2322static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2323 emit_cmp(rh,rs);
2324 emit_ldreq_indexed(ht,4,15);
2325 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2326 if(rs!=7)
2327 emit_mov(rs,7);
2328 rs=7;
57871462 2329 #endif
d1e4ebd9 2330 do_jump_vaddr(rs);
57871462 2331}
2332
e2b5e7aa 2333static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2334 #ifndef HAVE_ARMV7
57871462 2335 emit_movimm(return_address,rt); // PC into link register
643aeae3 2336 add_to_linker(out,return_address,1);
57871462 2337 emit_pcreladdr(temp);
643aeae3 2338 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2339 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2340 #else
2341 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2342 add_to_linker(out,return_address,1);
57871462 2343 emit_pcreladdr(temp);
643aeae3 2344 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2345 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2346 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2347 #endif
2348}
2349
57871462 2350// CPU-architecture-specific initialization
2a014d73 2351static void arch_init(void)
2352{
2353 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2354 struct tramp_insns *ops = ndrc->tramp.ops;
2355 size_t i;
2356 assert(!(diff & 3));
2357 assert(diff < 0x1000);
2358 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2359 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2360 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2361 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2362}
b9b61529 2363
2364// vim:shiftwidth=2:expandtab