drc: add a hack for f1 games
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
4d646738 30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
e2b5e7aa 36#define unused __attribute__((unused))
37
dd114d7d 38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
57871462 44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
b14b6a8f 60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
57871462 72 0,
b14b6a8f 73 jump_vaddr_r12,
57871462 74 0,
75 0,
b14b6a8f 76 0
77};
57871462 78
0bbd1454 79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
d148d265 110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 111
57871462 112/* Linker */
113
df4dc2b1 114static void set_jump_target(void *addr, void *target_)
57871462 115{
df4dc2b1 116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
57871462 118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 129 assert(((uintptr_t)addr&3)==0);
57871462 130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 134 assert(((uintptr_t)addr&3)==0);
57871462 135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
e2b5e7aa 149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
e2b5e7aa 179#endif
57871462 180
181/* Literal pool */
e2b5e7aa 182static void add_literal(int addr,int val)
57871462 183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
9f51b4b9 187 literalcount++;
188}
57871462 189
d148d265 190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
57871462 193{
194 int *ptr=(int *)(stub+4);
d148d265 195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 196 u_int offset=*ptr&0xfff;
d148d265 197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
643aeae3 205static void *get_pointer(void *stub)
57871462 206{
207 //printf("get_pointer(%x)\n",(int)stub);
d148d265 208 int *i_ptr=find_extjump_insn(stub);
3d680478 209 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
df4dc2b1 215static void *get_clean_addr(void *addr)
57871462 216{
df4dc2b1 217 signed int *ptr = addr;
665f33e1 218 #ifndef HAVE_ARMV7
57871462 219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 228 }
df4dc2b1 229 return ptr;
57871462 230}
231
3968e69e 232static int verify_dirty(const u_int *ptr)
57871462 233{
665f33e1 234 #ifndef HAVE_ARMV7
16c8be17 235 u_int offset;
57871462 236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
57871462 250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
df4dc2b1 266static int isclean(void *addr)
57871462 267{
665f33e1 268 #ifndef HAVE_ARMV7
581335b0 269 u_int *ptr=((u_int *)addr)+4;
57871462 270 #else
581335b0 271 u_int *ptr=((u_int *)addr)+6;
57871462 272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
4a35de07 280// get source that block at addr was compiled from (host pointers)
01d26796 281static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 282{
643aeae3 283 u_int *ptr = addr;
665f33e1 284 #ifndef HAVE_ARMV7
16c8be17 285 u_int offset;
57871462 286 // get from literal pool
15776b68 287 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
57871462 300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
57871462 312}
313
57871462 314// Allocate a specific ARM register.
e2b5e7aa 315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 316{
317 int n;
f776eb14 318 int dirty=0;
9f51b4b9 319
57871462 320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
f776eb14 323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
57871462 327 }
9f51b4b9 328
57871462 329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
f776eb14 331 cur->dirty|=dirty<<hr;
57871462 332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
e2b5e7aa 336static void alloc_cc(struct regstat *cur,int i)
57871462 337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
57871462 341/* Assembler */
342
e2b5e7aa 343static unused char regname[16][4] = {
57871462 344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
e2b5e7aa 361static void output_w32(u_int word)
57871462 362{
363 *((u_int *)out)=word;
364 out+=4;
365}
e2b5e7aa 366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
e2b5e7aa 374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
e2b5e7aa 383
384static u_int genimm(u_int imm,u_int *encoded)
57871462 385{
c2e3bd42 386 *encoded=0;
387 if(imm==0) return 1;
57871462 388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
e2b5e7aa 399
400static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
581335b0 404 (void)ret;
cfbd3c6e 405}
e2b5e7aa 406
407static u_int genjmp(u_int addr)
57871462 408{
7c3a5182 409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
e80343e2 414 return 0;
415 }
57871462 416 return ((u_int)offset>>2)&0xffffff;
417}
418
d1e4ebd9 419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
e2b5e7aa 426static void emit_mov(int rs,int rt)
57871462 427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_movs(int rs,int rt)
57871462 433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
e2b5e7aa 438static void emit_add(int rs1,int rs2,int rt)
57871462 439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
39b71d9a 444static void emit_adds(int rs1,int rs2,int rt)
445{
446 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
448}
449#define emit_adds_ptr emit_adds
450
e2b5e7aa 451static void emit_adcs(int rs1,int rs2,int rt)
57871462 452{
453 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
454 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
455}
456
e2b5e7aa 457static void emit_neg(int rs, int rt)
57871462 458{
459 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
460 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
461}
462
e2b5e7aa 463static void emit_sub(int rs1,int rs2,int rt)
57871462 464{
465 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
466 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
467}
468
e2b5e7aa 469static void emit_zeroreg(int rt)
57871462 470{
471 assem_debug("mov %s,#0\n",regname[rt]);
472 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
473}
474
e2b5e7aa 475static void emit_loadlp(u_int imm,u_int rt)
790ee18e 476{
477 add_literal((int)out,imm);
478 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
479 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
480}
e2b5e7aa 481
482static void emit_movw(u_int imm,u_int rt)
790ee18e 483{
484 assert(imm<65536);
485 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
486 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
487}
e2b5e7aa 488
489static void emit_movt(u_int imm,u_int rt)
790ee18e 490{
491 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
492 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
493}
e2b5e7aa 494
495static void emit_movimm(u_int imm,u_int rt)
790ee18e 496{
497 u_int armval;
498 if(genimm(imm,&armval)) {
499 assem_debug("mov %s,#%d\n",regname[rt],imm);
500 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
501 }else if(genimm(~imm,&armval)) {
502 assem_debug("mvn %s,#%d\n",regname[rt],imm);
503 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
504 }else if(imm<65536) {
665f33e1 505 #ifndef HAVE_ARMV7
790ee18e 506 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
507 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
508 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
509 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
510 #else
511 emit_movw(imm,rt);
512 #endif
513 }else{
665f33e1 514 #ifndef HAVE_ARMV7
790ee18e 515 emit_loadlp(imm,rt);
516 #else
517 emit_movw(imm&0x0000FFFF,rt);
518 emit_movt(imm&0xFFFF0000,rt);
519 #endif
520 }
521}
e2b5e7aa 522
523static void emit_pcreladdr(u_int rt)
790ee18e 524{
525 assem_debug("add %s,pc,#?\n",regname[rt]);
526 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
527}
528
e2b5e7aa 529static void emit_loadreg(int r, int hr)
57871462 530{
3d624f89 531 if(r&64) {
c43b5311 532 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 533 assert(0);
534 return;
3d624f89 535 }
57871462 536 if((r&63)==0)
537 emit_zeroreg(hr);
538 else {
7c3a5182 539 int addr = (int)&psxRegs.GPR.r[r];
540 switch (r) {
541 //case HIREG: addr = &hi; break;
542 //case LOREG: addr = &lo; break;
543 case CCREG: addr = (int)&cycle_count; break;
544 case CSREG: addr = (int)&Status; break;
545 case INVCP: addr = (int)&invc_ptr; break;
546 default: assert(r < 34); break;
547 }
57871462 548 u_int offset = addr-(u_int)&dynarec_local;
549 assert(offset<4096);
550 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
551 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
552 }
553}
e2b5e7aa 554
555static void emit_storereg(int r, int hr)
57871462 556{
3d624f89 557 if(r&64) {
c43b5311 558 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 559 assert(0);
560 return;
3d624f89 561 }
7c3a5182 562 int addr = (int)&psxRegs.GPR.r[r];
563 switch (r) {
564 //case HIREG: addr = &hi; break;
565 //case LOREG: addr = &lo; break;
566 case CCREG: addr = (int)&cycle_count; break;
567 default: assert(r < 34); break;
568 }
57871462 569 u_int offset = addr-(u_int)&dynarec_local;
570 assert(offset<4096);
571 assem_debug("str %s,fp+%d\n",regname[hr],offset);
572 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
573}
574
e2b5e7aa 575static void emit_test(int rs, int rt)
57871462 576{
577 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
578 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
579}
580
e2b5e7aa 581static void emit_testimm(int rs,int imm)
57871462 582{
583 u_int armval;
5a05d80c 584 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 585 genimm_checked(imm,&armval);
57871462 586 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
587}
588
e2b5e7aa 589static void emit_testeqimm(int rs,int imm)
b9b61529 590{
591 u_int armval;
592 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 593 genimm_checked(imm,&armval);
b9b61529 594 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
595}
596
e2b5e7aa 597static void emit_not(int rs,int rt)
57871462 598{
599 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
600 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
601}
602
e2b5e7aa 603static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 604{
605 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
606 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
607}
608
e2b5e7aa 609static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 610{
611 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
612 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
613}
e2b5e7aa 614
e2b5e7aa 615static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 616{
617 assert(rs<16);
618 assert(rt<16);
619 assert(imm<32);
620 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
621 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
622}
623
e2b5e7aa 624static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 625{
626 assert(rs<16);
627 assert(rt<16);
628 assert(imm<32);
629 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
630 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
631}
632
e2b5e7aa 633static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 634{
635 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
636 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
637}
638
3968e69e 639static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
640{
641 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
642 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
643}
644
e2b5e7aa 645static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 646{
647 assert(rs<16);
648 assert(rt<16);
649 if(imm!=0) {
57871462 650 u_int armval;
651 if(genimm(imm,&armval)) {
652 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
653 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
654 }else if(genimm(-imm,&armval)) {
8a0a8423 655 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 656 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 657 #ifdef HAVE_ARMV7
658 }else if(rt!=rs&&(u_int)imm<65536) {
659 emit_movw(imm&0x0000ffff,rt);
660 emit_add(rs,rt,rt);
661 }else if(rt!=rs&&(u_int)-imm<65536) {
662 emit_movw(-imm&0x0000ffff,rt);
663 emit_sub(rs,rt,rt);
664 #endif
665 }else if((u_int)-imm<65536) {
57871462 666 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
668 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 670 }else {
671 do {
672 int shift = (ffs(imm) - 1) & ~1;
673 int imm8 = imm & (0xff << shift);
674 genimm_checked(imm8,&armval);
675 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
676 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
677 rs = rt;
678 imm &= ~imm8;
679 }
680 while (imm != 0);
57871462 681 }
682 }
683 else if(rs!=rt) emit_mov(rs,rt);
684}
685
e2b5e7aa 686static void emit_addimm_and_set_flags(int imm,int rt)
57871462 687{
688 assert(imm>-65536&&imm<65536);
689 u_int armval;
690 if(genimm(imm,&armval)) {
691 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
692 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
693 }else if(genimm(-imm,&armval)) {
694 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
695 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
696 }else if(imm<0) {
697 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
698 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
699 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
700 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
701 }else{
702 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
703 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
704 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
705 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
706 }
707}
e2b5e7aa 708
709static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 710{
711 emit_addimm(rt,imm,rt);
712}
713
e2b5e7aa 714static void emit_addnop(u_int r)
57871462 715{
716 assert(r<16);
717 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
718 output_w32(0xe2800000|rd_rn_rm(r,r,0));
719}
720
e2b5e7aa 721static void emit_andimm(int rs,int imm,int rt)
57871462 722{
723 u_int armval;
790ee18e 724 if(imm==0) {
725 emit_zeroreg(rt);
726 }else if(genimm(imm,&armval)) {
57871462 727 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
728 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
729 }else if(genimm(~imm,&armval)) {
730 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
731 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
732 }else if(imm==65535) {
332a4533 733 #ifndef HAVE_ARMV6
57871462 734 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
735 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
736 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
737 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
738 #else
739 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
740 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
741 #endif
742 }else{
743 assert(imm>0&&imm<65535);
665f33e1 744 #ifndef HAVE_ARMV7
57871462 745 assem_debug("mov r14,#%d\n",imm&0xFF00);
746 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
747 assem_debug("add r14,r14,#%d\n",imm&0xFF);
748 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
749 #else
750 emit_movw(imm,HOST_TEMPREG);
751 #endif
752 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
753 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
754 }
755}
756
e2b5e7aa 757static void emit_orimm(int rs,int imm,int rt)
57871462 758{
759 u_int armval;
790ee18e 760 if(imm==0) {
761 if(rs!=rt) emit_mov(rs,rt);
762 }else if(genimm(imm,&armval)) {
57871462 763 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
764 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
765 }else{
766 assert(imm>0&&imm<65536);
767 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
768 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
769 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
770 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
771 }
772}
773
e2b5e7aa 774static void emit_xorimm(int rs,int imm,int rt)
57871462 775{
57871462 776 u_int armval;
790ee18e 777 if(imm==0) {
778 if(rs!=rt) emit_mov(rs,rt);
779 }else if(genimm(imm,&armval)) {
57871462 780 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
781 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
782 }else{
514ed0d9 783 assert(imm>0&&imm<65536);
57871462 784 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
785 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
786 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
787 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
788 }
789}
790
e2b5e7aa 791static void emit_shlimm(int rs,u_int imm,int rt)
57871462 792{
793 assert(imm>0);
794 assert(imm<32);
795 //if(imm==1) ...
796 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
797 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
798}
799
e2b5e7aa 800static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 801{
802 assert(imm>0);
803 assert(imm<32);
804 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
805 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
806}
807
e2b5e7aa 808static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 809{
810 assert(imm>0);
811 assert(imm<32);
812 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
813 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
814}
815
e2b5e7aa 816static void emit_shrimm(int rs,u_int imm,int rt)
57871462 817{
818 assert(imm>0);
819 assert(imm<32);
820 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
821 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
822}
823
e2b5e7aa 824static void emit_sarimm(int rs,u_int imm,int rt)
57871462 825{
826 assert(imm>0);
827 assert(imm<32);
828 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
829 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
830}
831
e2b5e7aa 832static void emit_rorimm(int rs,u_int imm,int rt)
57871462 833{
834 assert(imm>0);
835 assert(imm<32);
836 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
837 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
838}
839
e2b5e7aa 840static void emit_signextend16(int rs,int rt)
b9b61529 841{
332a4533 842 #ifndef HAVE_ARMV6
b9b61529 843 emit_shlimm(rs,16,rt);
844 emit_sarimm(rt,16,rt);
845 #else
846 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
847 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
848 #endif
849}
850
e2b5e7aa 851static void emit_signextend8(int rs,int rt)
c6c3b1b3 852{
332a4533 853 #ifndef HAVE_ARMV6
c6c3b1b3 854 emit_shlimm(rs,24,rt);
855 emit_sarimm(rt,24,rt);
856 #else
857 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
858 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
859 #endif
860}
861
e2b5e7aa 862static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 863{
864 assert(rs<16);
865 assert(rt<16);
866 assert(shift<16);
867 //if(imm==1) ...
868 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
869 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
870}
e2b5e7aa 871
872static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 873{
874 assert(rs<16);
875 assert(rt<16);
876 assert(shift<16);
877 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
878 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
879}
e2b5e7aa 880
881static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 882{
883 assert(rs<16);
884 assert(rt<16);
885 assert(shift<16);
886 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
887 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
888}
57871462 889
3968e69e 890static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 891{
892 assert(rs<16);
893 assert(rt<16);
894 assert(shift<16);
895 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
896 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
897}
e2b5e7aa 898
3968e69e 899static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 900{
901 assert(rs<16);
902 assert(rt<16);
903 assert(shift<16);
904 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
905 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
906}
907
e2b5e7aa 908static void emit_cmpimm(int rs,int imm)
57871462 909{
910 u_int armval;
911 if(genimm(imm,&armval)) {
5a05d80c 912 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 913 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
914 }else if(genimm(-imm,&armval)) {
5a05d80c 915 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 916 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
917 }else if(imm>0) {
918 assert(imm<65536);
57871462 919 emit_movimm(imm,HOST_TEMPREG);
57871462 920 assem_debug("cmp %s,r14\n",regname[rs]);
921 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
922 }else{
923 assert(imm>-65536);
57871462 924 emit_movimm(-imm,HOST_TEMPREG);
57871462 925 assem_debug("cmn %s,r14\n",regname[rs]);
926 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
927 }
928}
929
e2b5e7aa 930static void emit_cmovne_imm(int imm,int rt)
57871462 931{
932 assem_debug("movne %s,#%d\n",regname[rt],imm);
933 u_int armval;
cfbd3c6e 934 genimm_checked(imm,&armval);
57871462 935 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
936}
e2b5e7aa 937
938static void emit_cmovl_imm(int imm,int rt)
57871462 939{
940 assem_debug("movlt %s,#%d\n",regname[rt],imm);
941 u_int armval;
cfbd3c6e 942 genimm_checked(imm,&armval);
57871462 943 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
944}
e2b5e7aa 945
946static void emit_cmovb_imm(int imm,int rt)
57871462 947{
948 assem_debug("movcc %s,#%d\n",regname[rt],imm);
949 u_int armval;
cfbd3c6e 950 genimm_checked(imm,&armval);
57871462 951 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
952}
e2b5e7aa 953
3968e69e 954static void emit_cmovae_imm(int imm,int rt)
955{
956 assem_debug("movcs %s,#%d\n",regname[rt],imm);
957 u_int armval;
958 genimm_checked(imm,&armval);
959 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
960}
961
e2b5e7aa 962static void emit_cmovne_reg(int rs,int rt)
57871462 963{
964 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
965 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
966}
e2b5e7aa 967
968static void emit_cmovl_reg(int rs,int rt)
57871462 969{
970 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
971 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
972}
e2b5e7aa 973
e3c6bdb5 974static void emit_cmovb_reg(int rs,int rt)
975{
976 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
977 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
978}
979
e2b5e7aa 980static void emit_cmovs_reg(int rs,int rt)
57871462 981{
982 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
983 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
984}
985
e2b5e7aa 986static void emit_slti32(int rs,int imm,int rt)
57871462 987{
988 if(rs!=rt) emit_zeroreg(rt);
989 emit_cmpimm(rs,imm);
990 if(rs==rt) emit_movimm(0,rt);
991 emit_cmovl_imm(1,rt);
992}
e2b5e7aa 993
994static void emit_sltiu32(int rs,int imm,int rt)
57871462 995{
996 if(rs!=rt) emit_zeroreg(rt);
997 emit_cmpimm(rs,imm);
998 if(rs==rt) emit_movimm(0,rt);
999 emit_cmovb_imm(1,rt);
1000}
e2b5e7aa 1001
e2b5e7aa 1002static void emit_cmp(int rs,int rt)
57871462 1003{
1004 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1005 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1006}
e2b5e7aa 1007
1008static void emit_set_gz32(int rs, int rt)
57871462 1009{
1010 //assem_debug("set_gz32\n");
1011 emit_cmpimm(rs,1);
1012 emit_movimm(1,rt);
1013 emit_cmovl_imm(0,rt);
1014}
e2b5e7aa 1015
1016static void emit_set_nz32(int rs, int rt)
57871462 1017{
1018 //assem_debug("set_nz32\n");
1019 if(rs!=rt) emit_movs(rs,rt);
1020 else emit_test(rs,rs);
1021 emit_cmovne_imm(1,rt);
1022}
e2b5e7aa 1023
e2b5e7aa 1024static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1025{
1026 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1027 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1028 emit_cmp(rs1,rs2);
1029 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1030 emit_cmovl_imm(1,rt);
1031}
e2b5e7aa 1032
1033static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1034{
1035 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1036 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1037 emit_cmp(rs1,rs2);
1038 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1039 emit_cmovb_imm(1,rt);
1040}
e2b5e7aa 1041
2a014d73 1042static int can_jump_or_call(const void *a)
1043{
1044 intptr_t offset = (u_char *)a - out - 8;
1045 return (-33554432 <= offset && offset < 33554432);
1046}
1047
643aeae3 1048static void emit_call(const void *a_)
57871462 1049{
643aeae3 1050 int a = (int)a_;
d1e4ebd9 1051 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1052 u_int offset=genjmp(a);
1053 output_w32(0xeb000000|offset);
1054}
e2b5e7aa 1055
b14b6a8f 1056static void emit_jmp(const void *a_)
57871462 1057{
b14b6a8f 1058 int a = (int)a_;
d1e4ebd9 1059 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1060 u_int offset=genjmp(a);
1061 output_w32(0xea000000|offset);
1062}
e2b5e7aa 1063
643aeae3 1064static void emit_jne(const void *a_)
57871462 1065{
643aeae3 1066 int a = (int)a_;
57871462 1067 assem_debug("bne %x\n",a);
1068 u_int offset=genjmp(a);
1069 output_w32(0x1a000000|offset);
1070}
e2b5e7aa 1071
7c3a5182 1072static void emit_jeq(const void *a_)
57871462 1073{
7c3a5182 1074 int a = (int)a_;
57871462 1075 assem_debug("beq %x\n",a);
1076 u_int offset=genjmp(a);
1077 output_w32(0x0a000000|offset);
1078}
e2b5e7aa 1079
7c3a5182 1080static void emit_js(const void *a_)
57871462 1081{
7c3a5182 1082 int a = (int)a_;
57871462 1083 assem_debug("bmi %x\n",a);
1084 u_int offset=genjmp(a);
1085 output_w32(0x4a000000|offset);
1086}
e2b5e7aa 1087
7c3a5182 1088static void emit_jns(const void *a_)
57871462 1089{
7c3a5182 1090 int a = (int)a_;
57871462 1091 assem_debug("bpl %x\n",a);
1092 u_int offset=genjmp(a);
1093 output_w32(0x5a000000|offset);
1094}
e2b5e7aa 1095
7c3a5182 1096static void emit_jl(const void *a_)
57871462 1097{
7c3a5182 1098 int a = (int)a_;
57871462 1099 assem_debug("blt %x\n",a);
1100 u_int offset=genjmp(a);
1101 output_w32(0xba000000|offset);
1102}
e2b5e7aa 1103
7c3a5182 1104static void emit_jge(const void *a_)
57871462 1105{
7c3a5182 1106 int a = (int)a_;
57871462 1107 assem_debug("bge %x\n",a);
1108 u_int offset=genjmp(a);
1109 output_w32(0xaa000000|offset);
1110}
e2b5e7aa 1111
7c3a5182 1112static void emit_jno(const void *a_)
57871462 1113{
7c3a5182 1114 int a = (int)a_;
57871462 1115 assem_debug("bvc %x\n",a);
1116 u_int offset=genjmp(a);
1117 output_w32(0x7a000000|offset);
1118}
e2b5e7aa 1119
7c3a5182 1120static void emit_jc(const void *a_)
57871462 1121{
7c3a5182 1122 int a = (int)a_;
57871462 1123 assem_debug("bcs %x\n",a);
1124 u_int offset=genjmp(a);
1125 output_w32(0x2a000000|offset);
1126}
e2b5e7aa 1127
7c3a5182 1128static void emit_jcc(const void *a_)
57871462 1129{
b14b6a8f 1130 int a = (int)a_;
57871462 1131 assem_debug("bcc %x\n",a);
1132 u_int offset=genjmp(a);
1133 output_w32(0x3a000000|offset);
1134}
1135
3968e69e 1136static unused void emit_callreg(u_int r)
57871462 1137{
c6c3b1b3 1138 assert(r<15);
1139 assem_debug("blx %s\n",regname[r]);
1140 output_w32(0xe12fff30|r);
57871462 1141}
e2b5e7aa 1142
1143static void emit_jmpreg(u_int r)
57871462 1144{
1145 assem_debug("mov pc,%s\n",regname[r]);
1146 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1147}
1148
be516ebe 1149static void emit_ret(void)
1150{
1151 emit_jmpreg(14);
1152}
1153
e2b5e7aa 1154static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1155{
1156 assert(offset>-4096&&offset<4096);
1157 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1158 if(offset>=0) {
1159 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1160 }else{
1161 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1162 }
1163}
e2b5e7aa 1164
1165static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1166{
1167 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1169}
39b71d9a 1170#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1171
1172static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1173{
1174 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1176}
e2b5e7aa 1177
1178static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1179{
1180 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1182}
e2b5e7aa 1183
1184static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1185{
1186 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1187 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1188}
e2b5e7aa 1189
1190static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1191{
1192 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1193 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1194}
e2b5e7aa 1195
1196static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1197{
1198 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1199 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1200}
e2b5e7aa 1201
1202static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1203{
1204 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1205 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1206}
e2b5e7aa 1207
e2b5e7aa 1208static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1209{
1210 assert(offset>-256&&offset<256);
1211 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1212 if(offset>=0) {
1213 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1214 }else{
1215 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1216 }
1217}
e2b5e7aa 1218
e2b5e7aa 1219static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1220{
1221 assert(offset>-256&&offset<256);
1222 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1223 if(offset>=0) {
1224 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1225 }else{
1226 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1227 }
1228}
e2b5e7aa 1229
1230static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1231{
1232 assert(offset>-4096&&offset<4096);
1233 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1234 if(offset>=0) {
1235 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1236 }else{
1237 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1238 }
1239}
e2b5e7aa 1240
e2b5e7aa 1241static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1242{
1243 assert(offset>-256&&offset<256);
1244 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1245 if(offset>=0) {
1246 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1247 }else{
1248 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1249 }
1250}
e2b5e7aa 1251
054175e9 1252static void emit_ldrd(int offset, int rs, int rt)
1253{
1254 assert(offset>-256&&offset<256);
1255 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1256 if(offset>=0) {
1257 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1258 }else{
1259 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1260 }
1261}
e2b5e7aa 1262
643aeae3 1263static void emit_readword(void *addr, int rt)
57871462 1264{
643aeae3 1265 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1266 assert(offset<4096);
1267 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1268 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1269}
39b71d9a 1270#define emit_readptr emit_readword
e2b5e7aa 1271
e2b5e7aa 1272static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1273{
1274 assert(offset>-4096&&offset<4096);
1275 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1276 if(offset>=0) {
1277 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1278 }else{
1279 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1280 }
1281}
e2b5e7aa 1282
e2b5e7aa 1283static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1284{
1285 assert(offset>-256&&offset<256);
1286 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1287 if(offset>=0) {
1288 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1289 }else{
1290 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1291 }
1292}
e2b5e7aa 1293
1294static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1295{
1296 assert(offset>-4096&&offset<4096);
1297 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1298 if(offset>=0) {
1299 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1300 }else{
1301 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1302 }
1303}
e2b5e7aa 1304
e2b5e7aa 1305static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1306{
1307 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1308 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1309}
e2b5e7aa 1310
1311static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1312{
1313 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1314 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1315}
e2b5e7aa 1316
1317static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1318{
1319 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1320 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1321}
e2b5e7aa 1322
643aeae3 1323static void emit_writeword(int rt, void *addr)
57871462 1324{
643aeae3 1325 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1326 assert(offset<4096);
1327 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1328 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1329}
e2b5e7aa 1330
e2b5e7aa 1331static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1332{
1333 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1334 assert(rs1<16);
1335 assert(rs2<16);
1336 assert(hi<16);
1337 assert(lo<16);
1338 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1339}
e2b5e7aa 1340
1341static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1342{
1343 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1344 assert(rs1<16);
1345 assert(rs2<16);
1346 assert(hi<16);
1347 assert(lo<16);
1348 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1349}
1350
e2b5e7aa 1351static void emit_clz(int rs,int rt)
57871462 1352{
1353 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1354 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1355}
1356
e2b5e7aa 1357static void emit_subcs(int rs1,int rs2,int rt)
57871462 1358{
1359 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1360 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1361}
1362
e2b5e7aa 1363static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1364{
1365 assert(imm>0);
1366 assert(imm<32);
1367 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1368 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1369}
1370
e2b5e7aa 1371static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1372{
1373 assert(imm>0);
1374 assert(imm<32);
1375 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1376 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1377}
1378
e2b5e7aa 1379static void emit_negmi(int rs, int rt)
57871462 1380{
1381 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1382 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1383}
1384
e2b5e7aa 1385static void emit_negsmi(int rs, int rt)
57871462 1386{
1387 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1388 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1389}
1390
e2b5e7aa 1391static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1392{
1393 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1394 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1395}
1396
e2b5e7aa 1397static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1398{
1399 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1400 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1401}
1402
e2b5e7aa 1403static void emit_teq(int rs, int rt)
57871462 1404{
1405 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1406 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1407}
1408
3968e69e 1409static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1410{
1411 u_int armval;
cfbd3c6e 1412 genimm_checked(imm,&armval);
57871462 1413 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1414 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1415}
1416
57871462 1417// Conditionally select one of two immediates, optimizing for small code size
1418// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1419static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1420{
1421 u_int armval;
1422 if(genimm(imm2-imm1,&armval)) {
1423 emit_movimm(imm1,rt);
1424 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1425 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1426 }else if(genimm(imm1-imm2,&armval)) {
1427 emit_movimm(imm1,rt);
1428 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1429 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1430 }
1431 else {
665f33e1 1432 #ifndef HAVE_ARMV7
57871462 1433 emit_movimm(imm1,rt);
1434 add_literal((int)out,imm2);
1435 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1436 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1437 #else
1438 emit_movw(imm1&0x0000FFFF,rt);
1439 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1440 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1441 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1442 }
1443 emit_movt(imm1&0xFFFF0000,rt);
1444 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1445 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1446 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1447 }
1448 #endif
1449 }
1450}
1451
57871462 1452// special case for checking invalid_code
e2b5e7aa 1453static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1454{
1455 assert(imm<128&&imm>=0);
1456 assert(r>=0&&r<16);
1457 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1458 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1459 emit_cmpimm(HOST_TEMPREG,imm);
1460}
1461
e2b5e7aa 1462static void emit_callne(int a)
0bbd1454 1463{
1464 assem_debug("blne %x\n",a);
1465 u_int offset=genjmp(a);
1466 output_w32(0x1b000000|offset);
1467}
1468
57871462 1469// Used to preload hash table entries
e2b5e7aa 1470static unused void emit_prefetchreg(int r)
57871462 1471{
1472 assem_debug("pld %s\n",regname[r]);
1473 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1474}
1475
1476// Special case for mini_ht
e2b5e7aa 1477static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1478{
1479 assert(offset<4096);
1480 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1481 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1482}
1483
e2b5e7aa 1484static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1485{
1486 u_int armval;
cfbd3c6e 1487 genimm_checked(imm,&armval);
b9b61529 1488 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1489 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1490}
1491
e2b5e7aa 1492static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1493{
1494 u_int armval;
cfbd3c6e 1495 genimm_checked(imm,&armval);
b9b61529 1496 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1497 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1498}
1499
e2b5e7aa 1500static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1501{
1502 u_int armval;
1503 genimm_checked(imm,&armval);
1504 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1505 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1506}
1507
e2b5e7aa 1508static void emit_jno_unlikely(int a)
57871462 1509{
1510 //emit_jno(a);
1511 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1512 output_w32(0x72800000|rd_rn_rm(15,15,0));
1513}
1514
054175e9 1515static void save_regs_all(u_int reglist)
57871462 1516{
054175e9 1517 int i;
57871462 1518 if(!reglist) return;
1519 assem_debug("stmia fp,{");
054175e9 1520 for(i=0;i<16;i++)
1521 if(reglist&(1<<i))
1522 assem_debug("r%d,",i);
57871462 1523 assem_debug("}\n");
1524 output_w32(0xe88b0000|reglist);
1525}
e2b5e7aa 1526
054175e9 1527static void restore_regs_all(u_int reglist)
57871462 1528{
054175e9 1529 int i;
57871462 1530 if(!reglist) return;
1531 assem_debug("ldmia fp,{");
054175e9 1532 for(i=0;i<16;i++)
1533 if(reglist&(1<<i))
1534 assem_debug("r%d,",i);
57871462 1535 assem_debug("}\n");
1536 output_w32(0xe89b0000|reglist);
1537}
e2b5e7aa 1538
054175e9 1539// Save registers before function call
1540static void save_regs(u_int reglist)
1541{
4d646738 1542 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1543 save_regs_all(reglist);
1544}
e2b5e7aa 1545
054175e9 1546// Restore registers after function call
1547static void restore_regs(u_int reglist)
1548{
4d646738 1549 reglist&=CALLER_SAVE_REGS;
054175e9 1550 restore_regs_all(reglist);
1551}
57871462 1552
57871462 1553/* Stubs/epilogue */
1554
e2b5e7aa 1555static void literal_pool(int n)
57871462 1556{
1557 if(!literalcount) return;
1558 if(n) {
1559 if((int)out-literals[0][0]<4096-n) return;
1560 }
1561 u_int *ptr;
1562 int i;
1563 for(i=0;i<literalcount;i++)
1564 {
77750690 1565 u_int l_addr=(u_int)out;
1566 int j;
1567 for(j=0;j<i;j++) {
1568 if(literals[j][1]==literals[i][1]) {
1569 //printf("dup %08x\n",literals[i][1]);
1570 l_addr=literals[j][0];
1571 break;
1572 }
1573 }
57871462 1574 ptr=(u_int *)literals[i][0];
77750690 1575 u_int offset=l_addr-(u_int)ptr-8;
57871462 1576 assert(offset<4096);
1577 assert(!(offset&3));
1578 *ptr|=offset;
77750690 1579 if(l_addr==(u_int)out) {
1580 literals[i][0]=l_addr; // remember for dupes
1581 output_w32(literals[i][1]);
1582 }
57871462 1583 }
1584 literalcount=0;
1585}
1586
e2b5e7aa 1587static void literal_pool_jumpover(int n)
57871462 1588{
1589 if(!literalcount) return;
1590 if(n) {
1591 if((int)out-literals[0][0]<4096-n) return;
1592 }
df4dc2b1 1593 void *jaddr = out;
57871462 1594 emit_jmp(0);
1595 literal_pool(0);
df4dc2b1 1596 set_jump_target(jaddr, out);
57871462 1597}
1598
7c3a5182 1599// parsed by get_pointer, find_extjump_insn
1600static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1601{
1602 u_char *ptr=(u_char *)addr;
1603 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1604 (void)ptr;
1605
57871462 1606 emit_loadlp(target,0);
643aeae3 1607 emit_loadlp((u_int)addr,1);
d62c125a 1608 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1609 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1610//DEBUG >
1611#ifdef DEBUG_CYCLE_COUNT
643aeae3 1612 emit_readword(&last_count,ECX);
57871462 1613 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1614 emit_readword(&next_interupt,ECX);
1615 emit_writeword(HOST_CCREG,&Count);
57871462 1616 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1617 emit_writeword(ECX,&last_count);
57871462 1618#endif
1619//DEBUG <
2a014d73 1620 emit_far_jump(linker);
57871462 1621}
1622
d1e4ebd9 1623static void check_extjump2(void *src)
1624{
1625 u_int *ptr = src;
1626 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1627 (void)ptr;
1628}
1629
13e35c04 1630// put rt_val into rt, potentially making use of rs with value rs_val
1631static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1632{
8575a877 1633 u_int armval;
1634 int diff;
1635 if(genimm(rt_val,&armval)) {
1636 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1637 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1638 return;
1639 }
1640 if(genimm(~rt_val,&armval)) {
1641 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1642 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1643 return;
1644 }
1645 diff=rt_val-rs_val;
1646 if(genimm(diff,&armval)) {
1647 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1648 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1649 return;
1650 }else if(genimm(-diff,&armval)) {
1651 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1652 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1653 return;
1654 }
1655 emit_movimm(rt_val,rt);
1656}
1657
1658// return 1 if above function can do it's job cheaply
1659static int is_similar_value(u_int v1,u_int v2)
1660{
13e35c04 1661 u_int xs;
8575a877 1662 int diff;
1663 if(v1==v2) return 1;
1664 diff=v2-v1;
1665 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1666 ;
8575a877 1667 if(xs<0x100) return 1;
1668 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1669 ;
1670 if(xs<0x100) return 1;
1671 return 0;
13e35c04 1672}
cbbab9cd 1673
b14b6a8f 1674static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1675{
1676 switch(type) {
1677 case LOADB_STUB: emit_signextend8(rs,rt); break;
1678 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1679 case LOADH_STUB: emit_signextend16(rs,rt); break;
1680 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1681 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1682 default: assert(0);
1683 }
1684}
1685
b1be1eee 1686#include "pcsxmem.h"
1687#include "pcsxmem_inline.c"
b1be1eee 1688
e2b5e7aa 1689static void do_readstub(int n)
57871462 1690{
b14b6a8f 1691 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1692 literal_pool(256);
b14b6a8f 1693 set_jump_target(stubs[n].addr, out);
1694 enum stub_type type=stubs[n].type;
1695 int i=stubs[n].a;
1696 int rs=stubs[n].b;
81dbbf4c 1697 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1698 u_int reglist=stubs[n].e;
81dbbf4c 1699 const signed char *i_regmap=i_regs->regmap;
581335b0 1700 int rt;
cf95b4f0 1701 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1702 rt=get_reg(i_regmap,FTEMP);
1703 }else{
cf95b4f0 1704 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1705 }
1706 assert(rs>=0);
df4dc2b1 1707 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1708 void *restore_jump = NULL;
c6c3b1b3 1709 reglist|=(1<<rs);
1710 for(r=0;r<=12;r++) {
1711 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1712 temp=r; break;
1713 }
1714 }
cf95b4f0 1715 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1716 reglist&=~(1<<rt);
1717 if(temp==-1) {
1718 save_regs(reglist);
1719 regs_saved=1;
1720 temp=(rs==0)?2:0;
1721 }
1722 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1723 temp2=1;
643aeae3 1724 emit_readword(&mem_rtab,temp);
c6c3b1b3 1725 emit_shrimm(rs,12,temp2);
1726 emit_readword_dualindexedx4(temp,temp2,temp2);
1727 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1728 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1729 switch(type) {
1730 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1731 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1732 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1733 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1734 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1735 default: assert(0);
c6c3b1b3 1736 }
1737 }
1738 if(regs_saved) {
df4dc2b1 1739 restore_jump=out;
c6c3b1b3 1740 emit_jcc(0); // jump to reg restore
1741 }
1742 else
b14b6a8f 1743 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1744
1745 if(!regs_saved)
1746 save_regs(reglist);
643aeae3 1747 void *handler=NULL;
c6c3b1b3 1748 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1749 handler=jump_handler_read8;
c6c3b1b3 1750 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1751 handler=jump_handler_read16;
c6c3b1b3 1752 if(type==LOADW_STUB)
643aeae3 1753 handler=jump_handler_read32;
1754 assert(handler);
b96d3df7 1755 pass_args(rs,temp2);
c6c3b1b3 1756 int cc=get_reg(i_regmap,CCREG);
1757 if(cc<0)
1758 emit_loadreg(CCREG,2);
bb4f300c 1759 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1760 emit_far_call(handler);
cf95b4f0 1761 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1762 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1763 }
1764 if(restore_jump)
df4dc2b1 1765 set_jump_target(restore_jump, out);
c6c3b1b3 1766 restore_regs(reglist);
b14b6a8f 1767 emit_jmp(stubs[n].retaddr); // return address
57871462 1768}
1769
81dbbf4c 1770static void inline_readstub(enum stub_type type, int i, u_int addr,
1771 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1772{
1773 int rs=get_reg(regmap,target);
57871462 1774 int rt=get_reg(regmap,target);
535d208a 1775 if(rs<0) rs=get_reg(regmap,-1);
57871462 1776 assert(rs>=0);
2a014d73 1777 u_int is_dynamic;
687b4580 1778 uintptr_t host_addr = 0;
643aeae3 1779 void *handler;
b1be1eee 1780 int cc=get_reg(regmap,CCREG);
bb4f300c 1781 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
b1be1eee 1782 return;
643aeae3 1783 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1784 if (handler == NULL) {
cf95b4f0 1785 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1786 return;
13e35c04 1787 if(addr!=host_addr)
1788 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1789 switch(type) {
1790 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1791 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1792 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1793 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1794 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1795 default: assert(0);
1796 }
1797 return;
1798 }
b1be1eee 1799 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1800 if(is_dynamic) {
1801 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1802 handler=jump_handler_read8;
b1be1eee 1803 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1804 handler=jump_handler_read16;
b1be1eee 1805 if(type==LOADW_STUB)
643aeae3 1806 handler=jump_handler_read32;
b1be1eee 1807 }
c6c3b1b3 1808
1809 // call a memhandler
cf95b4f0 1810 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1811 reglist&=~(1<<rt);
1812 save_regs(reglist);
1813 if(target==0)
1814 emit_movimm(addr,0);
1815 else if(rs!=0)
1816 emit_mov(rs,0);
b1be1eee 1817 if(cc<0)
1818 emit_loadreg(CCREG,2);
1819 if(is_dynamic) {
1820 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
bb4f300c 1821 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
c6c3b1b3 1822 }
b1be1eee 1823 else {
643aeae3 1824 emit_readword(&last_count,3);
bb4f300c 1825 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
b1be1eee 1826 emit_add(2,3,2);
643aeae3 1827 emit_writeword(2,&Count);
b1be1eee 1828 }
1829
2a014d73 1830 emit_far_call(handler);
b1be1eee 1831
cf95b4f0 1832 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1833 switch(type) {
1834 case LOADB_STUB: emit_signextend8(0,rt); break;
1835 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1836 case LOADH_STUB: emit_signextend16(0,rt); break;
1837 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1838 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1839 default: assert(0);
1840 }
1841 }
1842 restore_regs(reglist);
57871462 1843}
1844
e2b5e7aa 1845static void do_writestub(int n)
57871462 1846{
b14b6a8f 1847 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1848 literal_pool(256);
b14b6a8f 1849 set_jump_target(stubs[n].addr, out);
1850 enum stub_type type=stubs[n].type;
1851 int i=stubs[n].a;
1852 int rs=stubs[n].b;
81dbbf4c 1853 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1854 u_int reglist=stubs[n].e;
81dbbf4c 1855 const signed char *i_regmap=i_regs->regmap;
581335b0 1856 int rt,r;
cf95b4f0 1857 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1858 rt=get_reg(i_regmap,r=FTEMP);
1859 }else{
cf95b4f0 1860 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1861 }
1862 assert(rs>=0);
1863 assert(rt>=0);
b14b6a8f 1864 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1865 void *restore_jump = NULL;
b96d3df7 1866 int reglist2=reglist|(1<<rs)|(1<<rt);
1867 for(rtmp=0;rtmp<=12;rtmp++) {
1868 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1869 temp=rtmp; break;
1870 }
1871 }
1872 if(temp==-1) {
1873 save_regs(reglist);
1874 regs_saved=1;
1875 for(rtmp=0;rtmp<=3;rtmp++)
1876 if(rtmp!=rs&&rtmp!=rt)
1877 {temp=rtmp;break;}
1878 }
1879 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1880 temp2=3;
643aeae3 1881 emit_readword(&mem_wtab,temp);
b96d3df7 1882 emit_shrimm(rs,12,temp2);
1883 emit_readword_dualindexedx4(temp,temp2,temp2);
1884 emit_lsls_imm(temp2,1,temp2);
1885 switch(type) {
1886 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1887 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1888 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1889 default: assert(0);
1890 }
1891 if(regs_saved) {
df4dc2b1 1892 restore_jump=out;
b96d3df7 1893 emit_jcc(0); // jump to reg restore
1894 }
1895 else
b14b6a8f 1896 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1897
1898 if(!regs_saved)
1899 save_regs(reglist);
643aeae3 1900 void *handler=NULL;
b96d3df7 1901 switch(type) {
643aeae3 1902 case STOREB_STUB: handler=jump_handler_write8; break;
1903 case STOREH_STUB: handler=jump_handler_write16; break;
1904 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1905 default: assert(0);
b96d3df7 1906 }
643aeae3 1907 assert(handler);
b96d3df7 1908 pass_args(rs,rt);
1909 if(temp2!=3)
1910 emit_mov(temp2,3);
1911 int cc=get_reg(i_regmap,CCREG);
1912 if(cc<0)
1913 emit_loadreg(CCREG,2);
bb4f300c 1914 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
b96d3df7 1915 // returns new cycle_count
2a014d73 1916 emit_far_call(handler);
bb4f300c 1917 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
b96d3df7 1918 if(cc<0)
1919 emit_storereg(CCREG,2);
1920 if(restore_jump)
df4dc2b1 1921 set_jump_target(restore_jump, out);
b96d3df7 1922 restore_regs(reglist);
b14b6a8f 1923 emit_jmp(stubs[n].retaddr);
57871462 1924}
1925
81dbbf4c 1926static void inline_writestub(enum stub_type type, int i, u_int addr,
1927 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1928{
1929 int rs=get_reg(regmap,-1);
57871462 1930 int rt=get_reg(regmap,target);
1931 assert(rs>=0);
1932 assert(rt>=0);
687b4580 1933 uintptr_t host_addr = 0;
643aeae3 1934 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1935 if (handler == NULL) {
13e35c04 1936 if(addr!=host_addr)
1937 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1938 switch(type) {
1939 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1940 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1941 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1942 default: assert(0);
1943 }
1944 return;
1945 }
1946
1947 // call a memhandler
1948 save_regs(reglist);
13e35c04 1949 pass_args(rs,rt);
b96d3df7 1950 int cc=get_reg(regmap,CCREG);
1951 if(cc<0)
1952 emit_loadreg(CCREG,2);
bb4f300c 1953 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
643aeae3 1954 emit_movimm((u_int)handler,3);
b96d3df7 1955 // returns new cycle_count
2a014d73 1956 emit_far_call(jump_handler_write_h);
bb4f300c 1957 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
b96d3df7 1958 if(cc<0)
1959 emit_storereg(CCREG,2);
1960 restore_regs(reglist);
57871462 1961}
1962
d1e4ebd9 1963// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1964static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 1965{
665f33e1 1966 #ifndef HAVE_ARMV7
7c3a5182 1967 emit_loadlp((int)source, 1);
1968 emit_loadlp((int)copy, 2);
3d680478 1969 emit_loadlp(source_len, 3);
57871462 1970 #else
7c3a5182 1971 emit_movw(((u_int)source)&0x0000FFFF, 1);
1972 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1973 emit_movt(((u_int)source)&0xFFFF0000, 1);
1974 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 1975 emit_movw(source_len, 3);
57871462 1976 #endif
7c3a5182 1977 emit_movimm(arg0, 0);
1978}
1979
3d680478 1980static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 1981{
1982 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 1983 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 1984 emit_far_call(verify_code);
df4dc2b1 1985 void *entry = out;
57871462 1986 load_regs_entry(i);
df4dc2b1 1987 if (entry == out)
1988 entry = instr_addr[i];
57871462 1989 emit_jmp(instr_addr[i]);
1990 return entry;
1991}
1992
3d680478 1993static void do_dirty_stub_ds(u_int source_len)
57871462 1994{
3d680478 1995 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 1996 emit_far_call(verify_code_ds);
57871462 1997}
1998
57871462 1999/* Special assem */
2000
81dbbf4c 2001static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2002{
2003 save_regs_all(reglist);
32631e6a 2004 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2005#ifdef PCNT
81dbbf4c 2006 emit_movimm(op, 0);
2a014d73 2007 emit_far_call(pcnt_gte_start);
82ed88eb 2008#endif
81dbbf4c 2009 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2010}
2011
2012static void c2op_epilogue(u_int op,u_int reglist)
2013{
82ed88eb 2014#ifdef PCNT
2015 emit_movimm(op,0);
2a014d73 2016 emit_far_call(pcnt_gte_end);
82ed88eb 2017#endif
054175e9 2018 restore_regs_all(reglist);
2019}
2020
6c0eefaf 2021static void c2op_call_MACtoIR(int lm,int need_flags)
2022{
2023 if(need_flags)
2a014d73 2024 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2025 else
2a014d73 2026 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2027}
2028
2029static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2030{
2a014d73 2031 emit_far_call(func);
6c0eefaf 2032 // func is C code and trashes r0
2033 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2034 if(need_flags||need_ir)
2035 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2036 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2037}
2038
81dbbf4c 2039static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2040{
81dbbf4c 2041 u_int c2op = source[i] & 0x3f;
2042 u_int reglist_full = get_host_reglist(i_regs->regmap);
2043 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2044 int need_flags, need_ir;
b9b61529 2045
2046 if (gte_handlers[c2op]!=NULL) {
bedfea38 2047 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2048 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2049 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2050 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2051 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2052 need_flags=0;
6c0eefaf 2053 int shift = (source[i] >> 19) & 1;
2054 int lm = (source[i] >> 10) & 1;
054175e9 2055 switch(c2op) {
19776aef 2056#ifndef DRC_DBG
054175e9 2057 case GTE_MVMVA: {
82336ba3 2058#ifdef HAVE_ARMV5
054175e9 2059 int v = (source[i] >> 15) & 3;
2060 int cv = (source[i] >> 13) & 3;
2061 int mx = (source[i] >> 17) & 3;
4d646738 2062 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2063 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2064 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2065 if(v<3)
2066 emit_ldrd(v*8,0,4);
2067 else {
2068 emit_movzwl_indexed(9*4,0,4); // gteIR
2069 emit_movzwl_indexed(10*4,0,6);
2070 emit_movzwl_indexed(11*4,0,5);
2071 emit_orrshl_imm(6,16,4);
2072 }
2073 if(mx<3)
2074 emit_addimm(0,32*4+mx*8*4,6);
2075 else
643aeae3 2076 emit_readword(&zeromem_ptr,6);
054175e9 2077 if(cv<3)
2078 emit_addimm(0,32*4+(cv*8+5)*4,7);
2079 else
643aeae3 2080 emit_readword(&zeromem_ptr,7);
054175e9 2081#ifdef __ARM_NEON__
2082 emit_movimm(source[i],1); // opcode
2a014d73 2083 emit_far_call(gteMVMVA_part_neon);
054175e9 2084 if(need_flags) {
2085 emit_movimm(lm,1);
2a014d73 2086 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2087 }
2088#else
2089 if(cv==3&&shift)
2a014d73 2090 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
054175e9 2091 else {
2092 emit_movimm(shift,1);
2a014d73 2093 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
054175e9 2094 }
6c0eefaf 2095 if(need_flags||need_ir)
2096 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2097#endif
2098#else /* if not HAVE_ARMV5 */
81dbbf4c 2099 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2100 emit_movimm(source[i],1); // opcode
643aeae3 2101 emit_writeword(1,&psxRegs.code);
2a014d73 2102 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2103#endif
2104 break;
2105 }
6c0eefaf 2106 case GTE_OP:
81dbbf4c 2107 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2108 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2109 if(need_flags||need_ir) {
2110 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2111 c2op_call_MACtoIR(lm,need_flags);
2112 }
2113 break;
2114 case GTE_DPCS:
81dbbf4c 2115 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2116 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2117 break;
2118 case GTE_INTPL:
81dbbf4c 2119 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2120 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2121 break;
2122 case GTE_SQR:
81dbbf4c 2123 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2124 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2125 if(need_flags||need_ir) {
2126 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2127 c2op_call_MACtoIR(lm,need_flags);
2128 }
2129 break;
2130 case GTE_DCPL:
81dbbf4c 2131 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2132 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2133 break;
2134 case GTE_GPF:
81dbbf4c 2135 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2136 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2137 break;
2138 case GTE_GPL:
81dbbf4c 2139 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2140 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2141 break;
19776aef 2142#endif
054175e9 2143 default:
81dbbf4c 2144 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2145#ifdef DRC_DBG
2146 emit_movimm(source[i],1); // opcode
643aeae3 2147 emit_writeword(1,&psxRegs.code);
19776aef 2148#endif
2a014d73 2149 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2150 break;
2151 }
2152 c2op_epilogue(c2op,reglist);
2153 }
b9b61529 2154}
2155
3968e69e 2156static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2157{
2158 //value = value & 0x7ffff000;
2159 //if (value & 0x7f87e000) value |= 0x80000000;
2160 emit_shrimm(sl,12,temp);
2161 emit_shlimm(temp,12,temp);
2162 emit_testimm(temp,0x7f000000);
2163 emit_testeqimm(temp,0x00870000);
2164 emit_testeqimm(temp,0x0000e000);
2165 emit_orrne_imm(temp,0x80000000,temp);
2166}
2167
2168static void do_mfc2_31_one(u_int copr,signed char temp)
2169{
2170 emit_readword(&reg_cop2d[copr],temp);
2171 emit_testimm(temp,0x8000); // do we need this?
2172 emit_andne_imm(temp,0,temp);
2173 emit_cmpimm(temp,0xf80);
2174 emit_andimm(temp,0xf80,temp);
2175 emit_cmovae_imm(0xf80,temp);
2176}
2177
2178static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2179{
2180 if (temp < 0) {
2181 host_tempreg_acquire();
2182 temp = HOST_TEMPREG;
2183 }
2184 do_mfc2_31_one(9,temp);
2185 emit_shrimm(temp,7,tl);
2186 do_mfc2_31_one(10,temp);
2187 emit_orrshr_imm(temp,2,tl);
2188 do_mfc2_31_one(11,temp);
2189 emit_orrshl_imm(temp,3,tl);
2190 emit_writeword(tl,&reg_cop2d[29]);
2191 if (temp == HOST_TEMPREG)
2192 host_tempreg_release();
2193}
2194
e2b5e7aa 2195static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 2196{
2197 // case 0x18: MULT
2198 // case 0x19: MULTU
2199 // case 0x1A: DIV
2200 // case 0x1B: DIVU
2201 // case 0x1C: DMULT
2202 // case 0x1D: DMULTU
2203 // case 0x1E: DDIV
2204 // case 0x1F: DDIVU
cf95b4f0 2205 if(dops[i].rs1&&dops[i].rs2)
57871462 2206 {
cf95b4f0 2207 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2208 {
cf95b4f0 2209 if(dops[i].opcode2==0x18) // MULT
57871462 2210 {
cf95b4f0 2211 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2212 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2213 signed char hi=get_reg(i_regs->regmap,HIREG);
2214 signed char lo=get_reg(i_regs->regmap,LOREG);
2215 assert(m1>=0);
2216 assert(m2>=0);
2217 assert(hi>=0);
2218 assert(lo>=0);
2219 emit_smull(m1,m2,hi,lo);
2220 }
cf95b4f0 2221 if(dops[i].opcode2==0x19) // MULTU
57871462 2222 {
cf95b4f0 2223 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2224 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2225 signed char hi=get_reg(i_regs->regmap,HIREG);
2226 signed char lo=get_reg(i_regs->regmap,LOREG);
2227 assert(m1>=0);
2228 assert(m2>=0);
2229 assert(hi>=0);
2230 assert(lo>=0);
2231 emit_umull(m1,m2,hi,lo);
2232 }
cf95b4f0 2233 if(dops[i].opcode2==0x1A) // DIV
57871462 2234 {
cf95b4f0 2235 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2236 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2237 assert(d1>=0);
2238 assert(d2>=0);
2239 signed char quotient=get_reg(i_regs->regmap,LOREG);
2240 signed char remainder=get_reg(i_regs->regmap,HIREG);
2241 assert(quotient>=0);
2242 assert(remainder>=0);
2243 emit_movs(d1,remainder);
44a80f6a 2244 emit_movimm(0xffffffff,quotient);
2245 emit_negmi(quotient,quotient); // .. quotient and ..
2246 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2247 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2248 emit_jeq(out+52); // Division by zero
82336ba3 2249 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2250#ifdef HAVE_ARMV5
57871462 2251 emit_clz(HOST_TEMPREG,quotient);
2252 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2253#else
2254 emit_movimm(0,quotient);
2255 emit_addpl_imm(quotient,1,quotient);
2256 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2257 emit_jns(out-2*4);
665f33e1 2258#endif
57871462 2259 emit_orimm(quotient,1<<31,quotient);
2260 emit_shr(quotient,quotient,quotient);
2261 emit_cmp(remainder,HOST_TEMPREG);
2262 emit_subcs(remainder,HOST_TEMPREG,remainder);
2263 emit_adcs(quotient,quotient,quotient);
2264 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2265 emit_jcc(out-16); // -4
57871462 2266 emit_teq(d1,d2);
2267 emit_negmi(quotient,quotient);
2268 emit_test(d1,d1);
2269 emit_negmi(remainder,remainder);
2270 }
cf95b4f0 2271 if(dops[i].opcode2==0x1B) // DIVU
57871462 2272 {
cf95b4f0 2273 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2274 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2275 assert(d1>=0);
2276 assert(d2>=0);
2277 signed char quotient=get_reg(i_regs->regmap,LOREG);
2278 signed char remainder=get_reg(i_regs->regmap,HIREG);
2279 assert(quotient>=0);
2280 assert(remainder>=0);
44a80f6a 2281 emit_mov(d1,remainder);
2282 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2283 emit_test(d2,d2);
7c3a5182 2284 emit_jeq(out+40); // Division by zero
665f33e1 2285#ifdef HAVE_ARMV5
57871462 2286 emit_clz(d2,HOST_TEMPREG);
2287 emit_movimm(1<<31,quotient);
2288 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2289#else
2290 emit_movimm(0,HOST_TEMPREG);
82336ba3 2291 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2292 emit_lslpls_imm(d2,1,d2);
7c3a5182 2293 emit_jns(out-2*4);
665f33e1 2294 emit_movimm(1<<31,quotient);
2295#endif
57871462 2296 emit_shr(quotient,HOST_TEMPREG,quotient);
2297 emit_cmp(remainder,d2);
2298 emit_subcs(remainder,d2,remainder);
2299 emit_adcs(quotient,quotient,quotient);
2300 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2301 emit_jcc(out-16); // -4
57871462 2302 }
2303 }
2304 else // 64-bit
71e490c5 2305 assert(0);
57871462 2306 }
2307 else
2308 {
2309 // Multiply by zero is zero.
2310 // MIPS does not have a divide by zero exception.
2311 // The result is undefined, we return zero.
2312 signed char hr=get_reg(i_regs->regmap,HIREG);
2313 signed char lr=get_reg(i_regs->regmap,LOREG);
2314 if(hr>=0) emit_zeroreg(hr);
2315 if(lr>=0) emit_zeroreg(lr);
2316 }
2317}
2318#define multdiv_assemble multdiv_assemble_arm
2319
d1e4ebd9 2320static void do_jump_vaddr(int rs)
2321{
2a014d73 2322 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2323}
2324
e2b5e7aa 2325static void do_preload_rhash(int r) {
57871462 2326 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2327 // register. On ARM the hash can be done with a single instruction (below)
2328}
2329
e2b5e7aa 2330static void do_preload_rhtbl(int ht) {
57871462 2331 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2332}
2333
e2b5e7aa 2334static void do_rhash(int rs,int rh) {
57871462 2335 emit_andimm(rs,0xf8,rh);
2336}
2337
e2b5e7aa 2338static void do_miniht_load(int ht,int rh) {
57871462 2339 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2340 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2341}
2342
e2b5e7aa 2343static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2344 emit_cmp(rh,rs);
2345 emit_ldreq_indexed(ht,4,15);
2346 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2347 if(rs!=7)
2348 emit_mov(rs,7);
2349 rs=7;
57871462 2350 #endif
d1e4ebd9 2351 do_jump_vaddr(rs);
57871462 2352}
2353
e2b5e7aa 2354static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2355 #ifndef HAVE_ARMV7
57871462 2356 emit_movimm(return_address,rt); // PC into link register
643aeae3 2357 add_to_linker(out,return_address,1);
57871462 2358 emit_pcreladdr(temp);
643aeae3 2359 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2360 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2361 #else
2362 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2363 add_to_linker(out,return_address,1);
57871462 2364 emit_pcreladdr(temp);
643aeae3 2365 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2366 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2367 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2368 #endif
2369}
2370
57871462 2371// CPU-architecture-specific initialization
2a014d73 2372static void arch_init(void)
2373{
2374 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2375 struct tramp_insns *ops = ndrc->tramp.ops;
2376 size_t i;
2377 assert(!(diff & 3));
2378 assert(diff < 0x1000);
2379 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2380 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2381 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2382 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2383}
b9b61529 2384
2385// vim:shiftwidth=2:expandtab