drc: get rid of RAM_FIXED, revive ROREG
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
4d646738 30#ifndef __MACH__
31#define CALLER_SAVE_REGS 0x100f
32#else
33#define CALLER_SAVE_REGS 0x120f
34#endif
35
e2b5e7aa 36#define unused __attribute__((unused))
37
dd114d7d 38#ifdef DRC_DBG
39#pragma GCC diagnostic ignored "-Wunused-function"
40#pragma GCC diagnostic ignored "-Wunused-variable"
41#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
42#endif
43
57871462 44void indirect_jump_indexed();
45void indirect_jump();
46void do_interrupt();
47void jump_vaddr_r0();
48void jump_vaddr_r1();
49void jump_vaddr_r2();
50void jump_vaddr_r3();
51void jump_vaddr_r4();
52void jump_vaddr_r5();
53void jump_vaddr_r6();
54void jump_vaddr_r7();
55void jump_vaddr_r8();
56void jump_vaddr_r9();
57void jump_vaddr_r10();
58void jump_vaddr_r12();
59
b14b6a8f 60void * const jump_vaddr_reg[16] = {
61 jump_vaddr_r0,
62 jump_vaddr_r1,
63 jump_vaddr_r2,
64 jump_vaddr_r3,
65 jump_vaddr_r4,
66 jump_vaddr_r5,
67 jump_vaddr_r6,
68 jump_vaddr_r7,
69 jump_vaddr_r8,
70 jump_vaddr_r9,
71 jump_vaddr_r10,
57871462 72 0,
b14b6a8f 73 jump_vaddr_r12,
57871462 74 0,
75 0,
b14b6a8f 76 0
77};
57871462 78
0bbd1454 79void invalidate_addr_r0();
80void invalidate_addr_r1();
81void invalidate_addr_r2();
82void invalidate_addr_r3();
83void invalidate_addr_r4();
84void invalidate_addr_r5();
85void invalidate_addr_r6();
86void invalidate_addr_r7();
87void invalidate_addr_r8();
88void invalidate_addr_r9();
89void invalidate_addr_r10();
90void invalidate_addr_r12();
91
92const u_int invalidate_addr_reg[16] = {
93 (int)invalidate_addr_r0,
94 (int)invalidate_addr_r1,
95 (int)invalidate_addr_r2,
96 (int)invalidate_addr_r3,
97 (int)invalidate_addr_r4,
98 (int)invalidate_addr_r5,
99 (int)invalidate_addr_r6,
100 (int)invalidate_addr_r7,
101 (int)invalidate_addr_r8,
102 (int)invalidate_addr_r9,
103 (int)invalidate_addr_r10,
104 0,
105 (int)invalidate_addr_r12,
106 0,
107 0,
108 0};
109
d148d265 110static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 111
57871462 112/* Linker */
113
df4dc2b1 114static void set_jump_target(void *addr, void *target_)
57871462 115{
df4dc2b1 116 u_int target = (u_int)target_;
117 u_char *ptr = addr;
57871462 118 u_int *ptr2=(u_int *)ptr;
119 if(ptr[3]==0xe2) {
120 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 124 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 125 }
126 else if(ptr[3]==0x72) {
127 // generated by emit_jno_unlikely
128 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 129 assert(((uintptr_t)addr&3)==0);
57871462 130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
132 }
133 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 134 assert(((uintptr_t)addr&3)==0);
57871462 135 assert((target&3)==0);
136 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
137 }
138 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
139 }
140 else {
141 assert((ptr[3]&0x0e)==0xa);
142 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
143 }
144}
145
146// This optionally copies the instruction from the target of the branch into
147// the space before the branch. Works, but the difference in speed is
148// usually insignificant.
e2b5e7aa 149#if 0
150static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 151{
152 u_char *ptr=(u_char *)addr;
153 u_int *ptr2=(u_int *)ptr;
154 assert(!copy||ptr2[-1]==0xe28dd000);
155 if(ptr[3]==0xe2) {
156 assert(!copy);
157 assert((target-(u_int)ptr2-8)<4096);
158 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
159 }
160 else {
161 assert((ptr[3]&0x0e)==0xa);
162 u_int target_insn=*(u_int *)target;
163 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
164 copy=0;
165 }
166 if((target_insn&0x0c100000)==0x04100000) { // Load
167 copy=0;
168 }
169 if(target_insn&0x08000000) {
170 copy=0;
171 }
172 if(copy) {
173 ptr2[-1]=target_insn;
174 target+=4;
175 }
176 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
177 }
178}
e2b5e7aa 179#endif
57871462 180
181/* Literal pool */
e2b5e7aa 182static void add_literal(int addr,int val)
57871462 183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
9f51b4b9 187 literalcount++;
188}
57871462 189
d148d265 190// from a pointer to external jump stub (which was produced by emit_extjump2)
191// find where the jumping insn is
192static void *find_extjump_insn(void *stub)
57871462 193{
194 int *ptr=(int *)(stub+4);
d148d265 195 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 196 u_int offset=*ptr&0xfff;
d148d265 197 void **l_ptr=(void *)ptr+offset+8;
198 return *l_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
643aeae3 205static void *get_pointer(void *stub)
57871462 206{
207 //printf("get_pointer(%x)\n",(int)stub);
d148d265 208 int *i_ptr=find_extjump_insn(stub);
3d680478 209 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 210 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 211}
212
213// Find the "clean" entry point from a "dirty" entry point
214// by skipping past the call to verify_code
df4dc2b1 215static void *get_clean_addr(void *addr)
57871462 216{
df4dc2b1 217 signed int *ptr = addr;
665f33e1 218 #ifndef HAVE_ARMV7
57871462 219 ptr+=4;
220 #else
221 ptr+=6;
222 #endif
223 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
224 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
225 ptr++;
226 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 227 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 228 }
df4dc2b1 229 return ptr;
57871462 230}
231
3968e69e 232static int verify_dirty(const u_int *ptr)
57871462 233{
665f33e1 234 #ifndef HAVE_ARMV7
16c8be17 235 u_int offset;
57871462 236 // get from literal pool
15776b68 237 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 238 offset=*ptr&0xfff;
239 u_int source=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 assert((*ptr&0xFFFF0000)==0xe59f0000);
242 offset=*ptr&0xfff;
243 u_int copy=*(u_int*)((void *)ptr+offset+8);
244 ptr++;
245 assert((*ptr&0xFFFF0000)==0xe59f0000);
246 offset=*ptr&0xfff;
247 u_int len=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 ptr++;
57871462 250 #else
251 // ARMv7 movw/movt
252 assert((*ptr&0xFFF00000)==0xe3000000);
253 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
254 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
255 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
256 ptr+=6;
257 #endif
258 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
259 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 260 //printf("verify_dirty: %x %x %x\n",source,copy,len);
261 return !memcmp((void *)source,(void *)copy,len);
262}
263
264// This doesn't necessarily find all clean entry points, just
265// guarantees that it's not dirty
df4dc2b1 266static int isclean(void *addr)
57871462 267{
665f33e1 268 #ifndef HAVE_ARMV7
581335b0 269 u_int *ptr=((u_int *)addr)+4;
57871462 270 #else
581335b0 271 u_int *ptr=((u_int *)addr)+6;
57871462 272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
275 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 276 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
277 return 1;
278}
279
4a35de07 280// get source that block at addr was compiled from (host pointers)
01d26796 281static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 282{
643aeae3 283 u_int *ptr = addr;
665f33e1 284 #ifndef HAVE_ARMV7
16c8be17 285 u_int offset;
57871462 286 // get from literal pool
15776b68 287 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 288 offset=*ptr&0xfff;
289 u_int source=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 //assert((*ptr&0xFFFF0000)==0xe59f0000);
292 //offset=*ptr&0xfff;
293 //u_int copy=*(u_int*)((void *)ptr+offset+8);
294 ptr++;
295 assert((*ptr&0xFFFF0000)==0xe59f0000);
296 offset=*ptr&0xfff;
297 u_int len=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 ptr++;
57871462 300 #else
301 // ARMv7 movw/movt
302 assert((*ptr&0xFFF00000)==0xe3000000);
303 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
304 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
305 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
306 ptr+=6;
307 #endif
308 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
309 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 310 *start=(u_char *)source;
311 *end=(u_char *)source+len;
57871462 312}
313
57871462 314// Allocate a specific ARM register.
e2b5e7aa 315static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 316{
317 int n;
f776eb14 318 int dirty=0;
9f51b4b9 319
57871462 320 // see if it's already allocated (and dealloc it)
321 for(n=0;n<HOST_REGS;n++)
322 {
f776eb14 323 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
324 dirty=(cur->dirty>>n)&1;
325 cur->regmap[n]=-1;
326 }
57871462 327 }
9f51b4b9 328
57871462 329 cur->regmap[hr]=reg;
330 cur->dirty&=~(1<<hr);
f776eb14 331 cur->dirty|=dirty<<hr;
57871462 332 cur->isconst&=~(1<<hr);
333}
334
335// Alloc cycle count into dedicated register
e2b5e7aa 336static void alloc_cc(struct regstat *cur,int i)
57871462 337{
338 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
339}
340
57871462 341/* Assembler */
342
e2b5e7aa 343static unused char regname[16][4] = {
57871462 344 "r0",
345 "r1",
346 "r2",
347 "r3",
348 "r4",
349 "r5",
350 "r6",
351 "r7",
352 "r8",
353 "r9",
354 "r10",
355 "fp",
356 "r12",
357 "sp",
358 "lr",
359 "pc"};
360
e2b5e7aa 361static void output_w32(u_int word)
57871462 362{
363 *((u_int *)out)=word;
364 out+=4;
365}
e2b5e7aa 366
367static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(rm<16);
372 return((rn<<16)|(rd<<12)|rm);
373}
e2b5e7aa 374
375static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(imm<256);
380 assert((shift&1)==0);
381 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
382}
e2b5e7aa 383
384static u_int genimm(u_int imm,u_int *encoded)
57871462 385{
c2e3bd42 386 *encoded=0;
387 if(imm==0) return 1;
57871462 388 int i=32;
389 while(i>0)
390 {
391 if(imm<256) {
392 *encoded=((i&30)<<7)|imm;
393 return 1;
394 }
395 imm=(imm>>2)|(imm<<30);i-=2;
396 }
397 return 0;
398}
e2b5e7aa 399
400static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 401{
402 u_int ret=genimm(imm,encoded);
403 assert(ret);
581335b0 404 (void)ret;
cfbd3c6e 405}
e2b5e7aa 406
407static u_int genjmp(u_int addr)
57871462 408{
7c3a5182 409 if (addr < 3) return 0; // a branch that will be patched later
410 int offset = addr-(int)out-8;
411 if (offset < -33554432 || offset >= 33554432) {
412 SysPrintf("genjmp: out of range: %08x\n", offset);
413 abort();
e80343e2 414 return 0;
415 }
57871462 416 return ((u_int)offset>>2)&0xffffff;
417}
418
d1e4ebd9 419static unused void emit_breakpoint(void)
420{
421 assem_debug("bkpt #0\n");
422 //output_w32(0xe1200070);
423 output_w32(0xe7f001f0);
424}
425
e2b5e7aa 426static void emit_mov(int rs,int rt)
57871462 427{
428 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
429 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
430}
431
e2b5e7aa 432static void emit_movs(int rs,int rt)
57871462 433{
434 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
435 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
436}
437
e2b5e7aa 438static void emit_add(int rs1,int rs2,int rt)
57871462 439{
440 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
441 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
442}
443
39b71d9a 444static void emit_adds(int rs1,int rs2,int rt)
445{
446 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
447 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
448}
449#define emit_adds_ptr emit_adds
450
e2b5e7aa 451static void emit_adcs(int rs1,int rs2,int rt)
57871462 452{
453 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
454 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
455}
456
e2b5e7aa 457static void emit_neg(int rs, int rt)
57871462 458{
459 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
460 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
461}
462
e2b5e7aa 463static void emit_sub(int rs1,int rs2,int rt)
57871462 464{
465 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
466 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
467}
468
e2b5e7aa 469static void emit_zeroreg(int rt)
57871462 470{
471 assem_debug("mov %s,#0\n",regname[rt]);
472 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
473}
474
e2b5e7aa 475static void emit_loadlp(u_int imm,u_int rt)
790ee18e 476{
477 add_literal((int)out,imm);
478 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
479 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
480}
e2b5e7aa 481
482static void emit_movw(u_int imm,u_int rt)
790ee18e 483{
484 assert(imm<65536);
485 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
486 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
487}
e2b5e7aa 488
489static void emit_movt(u_int imm,u_int rt)
790ee18e 490{
491 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
492 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
493}
e2b5e7aa 494
495static void emit_movimm(u_int imm,u_int rt)
790ee18e 496{
497 u_int armval;
498 if(genimm(imm,&armval)) {
499 assem_debug("mov %s,#%d\n",regname[rt],imm);
500 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
501 }else if(genimm(~imm,&armval)) {
502 assem_debug("mvn %s,#%d\n",regname[rt],imm);
503 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
504 }else if(imm<65536) {
665f33e1 505 #ifndef HAVE_ARMV7
790ee18e 506 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
507 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
508 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
509 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
510 #else
511 emit_movw(imm,rt);
512 #endif
513 }else{
665f33e1 514 #ifndef HAVE_ARMV7
790ee18e 515 emit_loadlp(imm,rt);
516 #else
517 emit_movw(imm&0x0000FFFF,rt);
518 emit_movt(imm&0xFFFF0000,rt);
519 #endif
520 }
521}
e2b5e7aa 522
523static void emit_pcreladdr(u_int rt)
790ee18e 524{
525 assem_debug("add %s,pc,#?\n",regname[rt]);
526 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
527}
528
e2b5e7aa 529static void emit_loadreg(int r, int hr)
57871462 530{
3d624f89 531 if(r&64) {
c43b5311 532 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 533 assert(0);
534 return;
3d624f89 535 }
57871462 536 if((r&63)==0)
537 emit_zeroreg(hr);
538 else {
7c3a5182 539 int addr = (int)&psxRegs.GPR.r[r];
540 switch (r) {
541 //case HIREG: addr = &hi; break;
542 //case LOREG: addr = &lo; break;
543 case CCREG: addr = (int)&cycle_count; break;
544 case CSREG: addr = (int)&Status; break;
545 case INVCP: addr = (int)&invc_ptr; break;
37387d8b 546 case ROREG: addr = (int)&ram_offset; break;
7c3a5182 547 default: assert(r < 34); break;
548 }
57871462 549 u_int offset = addr-(u_int)&dynarec_local;
550 assert(offset<4096);
551 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
552 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
553 }
554}
e2b5e7aa 555
556static void emit_storereg(int r, int hr)
57871462 557{
3d624f89 558 if(r&64) {
c43b5311 559 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 560 assert(0);
561 return;
3d624f89 562 }
7c3a5182 563 int addr = (int)&psxRegs.GPR.r[r];
564 switch (r) {
565 //case HIREG: addr = &hi; break;
566 //case LOREG: addr = &lo; break;
567 case CCREG: addr = (int)&cycle_count; break;
568 default: assert(r < 34); break;
569 }
57871462 570 u_int offset = addr-(u_int)&dynarec_local;
571 assert(offset<4096);
572 assem_debug("str %s,fp+%d\n",regname[hr],offset);
573 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
574}
575
e2b5e7aa 576static void emit_test(int rs, int rt)
57871462 577{
578 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
579 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
580}
581
e2b5e7aa 582static void emit_testimm(int rs,int imm)
57871462 583{
584 u_int armval;
5a05d80c 585 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 586 genimm_checked(imm,&armval);
57871462 587 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
588}
589
e2b5e7aa 590static void emit_testeqimm(int rs,int imm)
b9b61529 591{
592 u_int armval;
593 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 594 genimm_checked(imm,&armval);
b9b61529 595 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
596}
597
e2b5e7aa 598static void emit_not(int rs,int rt)
57871462 599{
600 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
601 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
602}
603
e2b5e7aa 604static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 605{
606 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
608}
609
e2b5e7aa 610static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 611{
612 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
613 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
614}
e2b5e7aa 615
e2b5e7aa 616static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 617{
618 assert(rs<16);
619 assert(rt<16);
620 assert(imm<32);
621 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
622 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
623}
624
e2b5e7aa 625static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 626{
627 assert(rs<16);
628 assert(rt<16);
629 assert(imm<32);
630 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
631 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
632}
633
e2b5e7aa 634static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 635{
636 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
637 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
638}
639
3968e69e 640static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
641{
642 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
643 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
644}
645
e2b5e7aa 646static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 647{
648 assert(rs<16);
649 assert(rt<16);
650 if(imm!=0) {
57871462 651 u_int armval;
652 if(genimm(imm,&armval)) {
653 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
654 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
655 }else if(genimm(-imm,&armval)) {
8a0a8423 656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 657 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 658 #ifdef HAVE_ARMV7
659 }else if(rt!=rs&&(u_int)imm<65536) {
660 emit_movw(imm&0x0000ffff,rt);
661 emit_add(rs,rt,rt);
662 }else if(rt!=rs&&(u_int)-imm<65536) {
663 emit_movw(-imm&0x0000ffff,rt);
664 emit_sub(rs,rt,rt);
665 #endif
666 }else if((u_int)-imm<65536) {
57871462 667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
668 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
670 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 671 }else {
672 do {
673 int shift = (ffs(imm) - 1) & ~1;
674 int imm8 = imm & (0xff << shift);
675 genimm_checked(imm8,&armval);
676 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
677 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
678 rs = rt;
679 imm &= ~imm8;
680 }
681 while (imm != 0);
57871462 682 }
683 }
684 else if(rs!=rt) emit_mov(rs,rt);
685}
686
e2b5e7aa 687static void emit_addimm_and_set_flags(int imm,int rt)
57871462 688{
689 assert(imm>-65536&&imm<65536);
690 u_int armval;
691 if(genimm(imm,&armval)) {
692 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(genimm(-imm,&armval)) {
695 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
696 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
697 }else if(imm<0) {
698 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
700 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
701 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
702 }else{
703 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
704 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
705 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
706 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
707 }
708}
e2b5e7aa 709
e2b5e7aa 710static void emit_addnop(u_int r)
57871462 711{
712 assert(r<16);
713 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
714 output_w32(0xe2800000|rd_rn_rm(r,r,0));
715}
716
e2b5e7aa 717static void emit_andimm(int rs,int imm,int rt)
57871462 718{
719 u_int armval;
790ee18e 720 if(imm==0) {
721 emit_zeroreg(rt);
722 }else if(genimm(imm,&armval)) {
57871462 723 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(genimm(~imm,&armval)) {
726 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
727 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
728 }else if(imm==65535) {
332a4533 729 #ifndef HAVE_ARMV6
57871462 730 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
731 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
732 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
733 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
734 #else
735 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
736 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
737 #endif
738 }else{
739 assert(imm>0&&imm<65535);
665f33e1 740 #ifndef HAVE_ARMV7
57871462 741 assem_debug("mov r14,#%d\n",imm&0xFF00);
742 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
743 assem_debug("add r14,r14,#%d\n",imm&0xFF);
744 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
745 #else
746 emit_movw(imm,HOST_TEMPREG);
747 #endif
748 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
749 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
750 }
751}
752
e2b5e7aa 753static void emit_orimm(int rs,int imm,int rt)
57871462 754{
755 u_int armval;
790ee18e 756 if(imm==0) {
757 if(rs!=rt) emit_mov(rs,rt);
758 }else if(genimm(imm,&armval)) {
57871462 759 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
760 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
761 }else{
762 assert(imm>0&&imm<65536);
763 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
764 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
765 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
766 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
767 }
768}
769
e2b5e7aa 770static void emit_xorimm(int rs,int imm,int rt)
57871462 771{
57871462 772 u_int armval;
790ee18e 773 if(imm==0) {
774 if(rs!=rt) emit_mov(rs,rt);
775 }else if(genimm(imm,&armval)) {
57871462 776 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
777 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
778 }else{
514ed0d9 779 assert(imm>0&&imm<65536);
57871462 780 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
781 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
782 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
783 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
784 }
785}
786
e2b5e7aa 787static void emit_shlimm(int rs,u_int imm,int rt)
57871462 788{
789 assert(imm>0);
790 assert(imm<32);
791 //if(imm==1) ...
792 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
793 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
794}
795
e2b5e7aa 796static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 797{
798 assert(imm>0);
799 assert(imm<32);
800 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
801 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
802}
803
e2b5e7aa 804static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 805{
806 assert(imm>0);
807 assert(imm<32);
808 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
809 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
810}
811
e2b5e7aa 812static void emit_shrimm(int rs,u_int imm,int rt)
57871462 813{
814 assert(imm>0);
815 assert(imm<32);
816 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
817 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
818}
819
e2b5e7aa 820static void emit_sarimm(int rs,u_int imm,int rt)
57871462 821{
822 assert(imm>0);
823 assert(imm<32);
824 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
825 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
826}
827
e2b5e7aa 828static void emit_rorimm(int rs,u_int imm,int rt)
57871462 829{
830 assert(imm>0);
831 assert(imm<32);
832 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
833 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
834}
835
e2b5e7aa 836static void emit_signextend16(int rs,int rt)
b9b61529 837{
332a4533 838 #ifndef HAVE_ARMV6
b9b61529 839 emit_shlimm(rs,16,rt);
840 emit_sarimm(rt,16,rt);
841 #else
842 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
844 #endif
845}
846
e2b5e7aa 847static void emit_signextend8(int rs,int rt)
c6c3b1b3 848{
332a4533 849 #ifndef HAVE_ARMV6
c6c3b1b3 850 emit_shlimm(rs,24,rt);
851 emit_sarimm(rt,24,rt);
852 #else
853 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
854 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
855 #endif
856}
857
e2b5e7aa 858static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 859{
860 assert(rs<16);
861 assert(rt<16);
862 assert(shift<16);
863 //if(imm==1) ...
864 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
865 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
866}
e2b5e7aa 867
868static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 869{
870 assert(rs<16);
871 assert(rt<16);
872 assert(shift<16);
873 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
874 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
875}
e2b5e7aa 876
877static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 878{
879 assert(rs<16);
880 assert(rt<16);
881 assert(shift<16);
882 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
883 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
884}
57871462 885
3968e69e 886static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 887{
888 assert(rs<16);
889 assert(rt<16);
890 assert(shift<16);
891 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
892 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
893}
e2b5e7aa 894
3968e69e 895static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 896{
897 assert(rs<16);
898 assert(rt<16);
899 assert(shift<16);
900 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
901 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
902}
903
e2b5e7aa 904static void emit_cmpimm(int rs,int imm)
57871462 905{
906 u_int armval;
907 if(genimm(imm,&armval)) {
5a05d80c 908 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 909 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
910 }else if(genimm(-imm,&armval)) {
5a05d80c 911 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 912 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
913 }else if(imm>0) {
914 assert(imm<65536);
57871462 915 emit_movimm(imm,HOST_TEMPREG);
57871462 916 assem_debug("cmp %s,r14\n",regname[rs]);
917 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
918 }else{
919 assert(imm>-65536);
57871462 920 emit_movimm(-imm,HOST_TEMPREG);
57871462 921 assem_debug("cmn %s,r14\n",regname[rs]);
922 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
923 }
924}
925
e2b5e7aa 926static void emit_cmovne_imm(int imm,int rt)
57871462 927{
928 assem_debug("movne %s,#%d\n",regname[rt],imm);
929 u_int armval;
cfbd3c6e 930 genimm_checked(imm,&armval);
57871462 931 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
932}
e2b5e7aa 933
934static void emit_cmovl_imm(int imm,int rt)
57871462 935{
936 assem_debug("movlt %s,#%d\n",regname[rt],imm);
937 u_int armval;
cfbd3c6e 938 genimm_checked(imm,&armval);
57871462 939 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
940}
e2b5e7aa 941
942static void emit_cmovb_imm(int imm,int rt)
57871462 943{
944 assem_debug("movcc %s,#%d\n",regname[rt],imm);
945 u_int armval;
cfbd3c6e 946 genimm_checked(imm,&armval);
57871462 947 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
948}
e2b5e7aa 949
3968e69e 950static void emit_cmovae_imm(int imm,int rt)
951{
952 assem_debug("movcs %s,#%d\n",regname[rt],imm);
953 u_int armval;
954 genimm_checked(imm,&armval);
955 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
956}
957
e2b5e7aa 958static void emit_cmovne_reg(int rs,int rt)
57871462 959{
960 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
961 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
962}
e2b5e7aa 963
964static void emit_cmovl_reg(int rs,int rt)
57871462 965{
966 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
967 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
968}
e2b5e7aa 969
e3c6bdb5 970static void emit_cmovb_reg(int rs,int rt)
971{
972 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
973 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
974}
975
e2b5e7aa 976static void emit_cmovs_reg(int rs,int rt)
57871462 977{
978 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
979 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
980}
981
e2b5e7aa 982static void emit_slti32(int rs,int imm,int rt)
57871462 983{
984 if(rs!=rt) emit_zeroreg(rt);
985 emit_cmpimm(rs,imm);
986 if(rs==rt) emit_movimm(0,rt);
987 emit_cmovl_imm(1,rt);
988}
e2b5e7aa 989
990static void emit_sltiu32(int rs,int imm,int rt)
57871462 991{
992 if(rs!=rt) emit_zeroreg(rt);
993 emit_cmpimm(rs,imm);
994 if(rs==rt) emit_movimm(0,rt);
995 emit_cmovb_imm(1,rt);
996}
e2b5e7aa 997
e2b5e7aa 998static void emit_cmp(int rs,int rt)
57871462 999{
1000 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1001 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1002}
e2b5e7aa 1003
1004static void emit_set_gz32(int rs, int rt)
57871462 1005{
1006 //assem_debug("set_gz32\n");
1007 emit_cmpimm(rs,1);
1008 emit_movimm(1,rt);
1009 emit_cmovl_imm(0,rt);
1010}
e2b5e7aa 1011
1012static void emit_set_nz32(int rs, int rt)
57871462 1013{
1014 //assem_debug("set_nz32\n");
1015 if(rs!=rt) emit_movs(rs,rt);
1016 else emit_test(rs,rs);
1017 emit_cmovne_imm(1,rt);
1018}
e2b5e7aa 1019
e2b5e7aa 1020static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1021{
1022 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1023 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1024 emit_cmp(rs1,rs2);
1025 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1026 emit_cmovl_imm(1,rt);
1027}
e2b5e7aa 1028
1029static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1030{
1031 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1032 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1033 emit_cmp(rs1,rs2);
1034 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1035 emit_cmovb_imm(1,rt);
1036}
e2b5e7aa 1037
2a014d73 1038static int can_jump_or_call(const void *a)
1039{
1040 intptr_t offset = (u_char *)a - out - 8;
1041 return (-33554432 <= offset && offset < 33554432);
1042}
1043
643aeae3 1044static void emit_call(const void *a_)
57871462 1045{
643aeae3 1046 int a = (int)a_;
d1e4ebd9 1047 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1048 u_int offset=genjmp(a);
1049 output_w32(0xeb000000|offset);
1050}
e2b5e7aa 1051
b14b6a8f 1052static void emit_jmp(const void *a_)
57871462 1053{
b14b6a8f 1054 int a = (int)a_;
d1e4ebd9 1055 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1056 u_int offset=genjmp(a);
1057 output_w32(0xea000000|offset);
1058}
e2b5e7aa 1059
643aeae3 1060static void emit_jne(const void *a_)
57871462 1061{
643aeae3 1062 int a = (int)a_;
57871462 1063 assem_debug("bne %x\n",a);
1064 u_int offset=genjmp(a);
1065 output_w32(0x1a000000|offset);
1066}
e2b5e7aa 1067
7c3a5182 1068static void emit_jeq(const void *a_)
57871462 1069{
7c3a5182 1070 int a = (int)a_;
57871462 1071 assem_debug("beq %x\n",a);
1072 u_int offset=genjmp(a);
1073 output_w32(0x0a000000|offset);
1074}
e2b5e7aa 1075
7c3a5182 1076static void emit_js(const void *a_)
57871462 1077{
7c3a5182 1078 int a = (int)a_;
57871462 1079 assem_debug("bmi %x\n",a);
1080 u_int offset=genjmp(a);
1081 output_w32(0x4a000000|offset);
1082}
e2b5e7aa 1083
7c3a5182 1084static void emit_jns(const void *a_)
57871462 1085{
7c3a5182 1086 int a = (int)a_;
57871462 1087 assem_debug("bpl %x\n",a);
1088 u_int offset=genjmp(a);
1089 output_w32(0x5a000000|offset);
1090}
e2b5e7aa 1091
7c3a5182 1092static void emit_jl(const void *a_)
57871462 1093{
7c3a5182 1094 int a = (int)a_;
57871462 1095 assem_debug("blt %x\n",a);
1096 u_int offset=genjmp(a);
1097 output_w32(0xba000000|offset);
1098}
e2b5e7aa 1099
7c3a5182 1100static void emit_jge(const void *a_)
57871462 1101{
7c3a5182 1102 int a = (int)a_;
57871462 1103 assem_debug("bge %x\n",a);
1104 u_int offset=genjmp(a);
1105 output_w32(0xaa000000|offset);
1106}
e2b5e7aa 1107
7c3a5182 1108static void emit_jno(const void *a_)
57871462 1109{
7c3a5182 1110 int a = (int)a_;
57871462 1111 assem_debug("bvc %x\n",a);
1112 u_int offset=genjmp(a);
1113 output_w32(0x7a000000|offset);
1114}
e2b5e7aa 1115
7c3a5182 1116static void emit_jc(const void *a_)
57871462 1117{
7c3a5182 1118 int a = (int)a_;
57871462 1119 assem_debug("bcs %x\n",a);
1120 u_int offset=genjmp(a);
1121 output_w32(0x2a000000|offset);
1122}
e2b5e7aa 1123
7c3a5182 1124static void emit_jcc(const void *a_)
57871462 1125{
b14b6a8f 1126 int a = (int)a_;
57871462 1127 assem_debug("bcc %x\n",a);
1128 u_int offset=genjmp(a);
1129 output_w32(0x3a000000|offset);
1130}
1131
3968e69e 1132static unused void emit_callreg(u_int r)
57871462 1133{
c6c3b1b3 1134 assert(r<15);
1135 assem_debug("blx %s\n",regname[r]);
1136 output_w32(0xe12fff30|r);
57871462 1137}
e2b5e7aa 1138
1139static void emit_jmpreg(u_int r)
57871462 1140{
1141 assem_debug("mov pc,%s\n",regname[r]);
1142 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1143}
1144
be516ebe 1145static void emit_ret(void)
1146{
1147 emit_jmpreg(14);
1148}
1149
e2b5e7aa 1150static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1151{
1152 assert(offset>-4096&&offset<4096);
1153 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1154 if(offset>=0) {
1155 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1156 }else{
1157 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1158 }
1159}
e2b5e7aa 1160
1161static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1162{
1163 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1164 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1165}
39b71d9a 1166#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1167
1168static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1169{
1170 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1171 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1172}
e2b5e7aa 1173
1174static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1175{
1176 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1177 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1178}
e2b5e7aa 1179
37387d8b 1180static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1181{
1182 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1183 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1184}
1185
e2b5e7aa 1186static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1187{
1188 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1189 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1190}
e2b5e7aa 1191
37387d8b 1192static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1193{
1194 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1195 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1196}
1197
e2b5e7aa 1198static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1199{
1200 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1201 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1202}
e2b5e7aa 1203
37387d8b 1204static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1205{
1206 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1207 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1208}
1209
e2b5e7aa 1210static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1211{
1212 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1213 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1214}
e2b5e7aa 1215
37387d8b 1216static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1217{
1218 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1219 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1220}
1221
e2b5e7aa 1222static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1223{
1224 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1225 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1226}
1227
1228static void emit_str_dualindexed(int rs1, int rs2, int rt)
1229{
1230 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1231 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1232}
1233
1234static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1235{
1236 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1237 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1238}
1239
1240static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1241{
1242 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1243 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1244}
e2b5e7aa 1245
e2b5e7aa 1246static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1247{
1248 assert(offset>-256&&offset<256);
1249 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1250 if(offset>=0) {
1251 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1252 }else{
1253 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1254 }
1255}
e2b5e7aa 1256
e2b5e7aa 1257static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1258{
1259 assert(offset>-256&&offset<256);
1260 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1261 if(offset>=0) {
1262 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1263 }else{
1264 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1265 }
1266}
e2b5e7aa 1267
1268static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1269{
1270 assert(offset>-4096&&offset<4096);
1271 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1272 if(offset>=0) {
1273 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1274 }else{
1275 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1276 }
1277}
e2b5e7aa 1278
e2b5e7aa 1279static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1280{
1281 assert(offset>-256&&offset<256);
1282 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1283 if(offset>=0) {
1284 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1285 }else{
1286 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1287 }
1288}
e2b5e7aa 1289
054175e9 1290static void emit_ldrd(int offset, int rs, int rt)
1291{
1292 assert(offset>-256&&offset<256);
1293 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1294 if(offset>=0) {
1295 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1296 }else{
1297 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1298 }
1299}
e2b5e7aa 1300
643aeae3 1301static void emit_readword(void *addr, int rt)
57871462 1302{
643aeae3 1303 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1304 assert(offset<4096);
1305 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1306 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1307}
39b71d9a 1308#define emit_readptr emit_readword
e2b5e7aa 1309
e2b5e7aa 1310static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1311{
1312 assert(offset>-4096&&offset<4096);
1313 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1314 if(offset>=0) {
1315 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1316 }else{
1317 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1318 }
1319}
e2b5e7aa 1320
e2b5e7aa 1321static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1322{
1323 assert(offset>-256&&offset<256);
1324 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1325 if(offset>=0) {
1326 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1327 }else{
1328 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1329 }
1330}
e2b5e7aa 1331
1332static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1333{
1334 assert(offset>-4096&&offset<4096);
1335 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1336 if(offset>=0) {
1337 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1338 }else{
1339 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1340 }
1341}
e2b5e7aa 1342
e2b5e7aa 1343static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1344{
1345 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1346 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1347}
e2b5e7aa 1348
1349static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1350{
1351 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1352 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1353}
e2b5e7aa 1354
1355static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1356{
1357 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1358 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1359}
e2b5e7aa 1360
643aeae3 1361static void emit_writeword(int rt, void *addr)
57871462 1362{
643aeae3 1363 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1364 assert(offset<4096);
1365 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1366 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1367}
e2b5e7aa 1368
e2b5e7aa 1369static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1370{
1371 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1372 assert(rs1<16);
1373 assert(rs2<16);
1374 assert(hi<16);
1375 assert(lo<16);
1376 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1377}
e2b5e7aa 1378
1379static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1380{
1381 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1382 assert(rs1<16);
1383 assert(rs2<16);
1384 assert(hi<16);
1385 assert(lo<16);
1386 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1387}
1388
e2b5e7aa 1389static void emit_clz(int rs,int rt)
57871462 1390{
1391 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1392 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1393}
1394
e2b5e7aa 1395static void emit_subcs(int rs1,int rs2,int rt)
57871462 1396{
1397 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1398 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1399}
1400
e2b5e7aa 1401static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1402{
1403 assert(imm>0);
1404 assert(imm<32);
1405 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1406 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1407}
1408
e2b5e7aa 1409static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1410{
1411 assert(imm>0);
1412 assert(imm<32);
1413 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1414 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1415}
1416
e2b5e7aa 1417static void emit_negmi(int rs, int rt)
57871462 1418{
1419 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1420 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1421}
1422
e2b5e7aa 1423static void emit_negsmi(int rs, int rt)
57871462 1424{
1425 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1426 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1427}
1428
e2b5e7aa 1429static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1430{
1431 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1432 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1433}
1434
e2b5e7aa 1435static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1436{
1437 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1438 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1439}
1440
e2b5e7aa 1441static void emit_teq(int rs, int rt)
57871462 1442{
1443 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1444 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1445}
1446
3968e69e 1447static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1448{
1449 u_int armval;
cfbd3c6e 1450 genimm_checked(imm,&armval);
57871462 1451 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1452 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1453}
1454
57871462 1455// Conditionally select one of two immediates, optimizing for small code size
1456// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1457static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1458{
1459 u_int armval;
1460 if(genimm(imm2-imm1,&armval)) {
1461 emit_movimm(imm1,rt);
1462 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1463 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1464 }else if(genimm(imm1-imm2,&armval)) {
1465 emit_movimm(imm1,rt);
1466 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1467 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1468 }
1469 else {
665f33e1 1470 #ifndef HAVE_ARMV7
57871462 1471 emit_movimm(imm1,rt);
1472 add_literal((int)out,imm2);
1473 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1474 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1475 #else
1476 emit_movw(imm1&0x0000FFFF,rt);
1477 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1478 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1479 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1480 }
1481 emit_movt(imm1&0xFFFF0000,rt);
1482 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1483 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1484 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1485 }
1486 #endif
1487 }
1488}
1489
57871462 1490// special case for checking invalid_code
e2b5e7aa 1491static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1492{
1493 assert(imm<128&&imm>=0);
1494 assert(r>=0&&r<16);
1495 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1496 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1497 emit_cmpimm(HOST_TEMPREG,imm);
1498}
1499
e2b5e7aa 1500static void emit_callne(int a)
0bbd1454 1501{
1502 assem_debug("blne %x\n",a);
1503 u_int offset=genjmp(a);
1504 output_w32(0x1b000000|offset);
1505}
1506
57871462 1507// Used to preload hash table entries
e2b5e7aa 1508static unused void emit_prefetchreg(int r)
57871462 1509{
1510 assem_debug("pld %s\n",regname[r]);
1511 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1512}
1513
1514// Special case for mini_ht
e2b5e7aa 1515static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1516{
1517 assert(offset<4096);
1518 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1519 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1520}
1521
e2b5e7aa 1522static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1523{
1524 u_int armval;
cfbd3c6e 1525 genimm_checked(imm,&armval);
b9b61529 1526 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1527 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1528}
1529
e2b5e7aa 1530static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1531{
1532 u_int armval;
cfbd3c6e 1533 genimm_checked(imm,&armval);
b9b61529 1534 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1535 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1536}
1537
e2b5e7aa 1538static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1539{
1540 u_int armval;
1541 genimm_checked(imm,&armval);
1542 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1543 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1544}
1545
e2b5e7aa 1546static void emit_jno_unlikely(int a)
57871462 1547{
1548 //emit_jno(a);
1549 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1550 output_w32(0x72800000|rd_rn_rm(15,15,0));
1551}
1552
054175e9 1553static void save_regs_all(u_int reglist)
57871462 1554{
054175e9 1555 int i;
57871462 1556 if(!reglist) return;
1557 assem_debug("stmia fp,{");
054175e9 1558 for(i=0;i<16;i++)
1559 if(reglist&(1<<i))
1560 assem_debug("r%d,",i);
57871462 1561 assem_debug("}\n");
1562 output_w32(0xe88b0000|reglist);
1563}
e2b5e7aa 1564
054175e9 1565static void restore_regs_all(u_int reglist)
57871462 1566{
054175e9 1567 int i;
57871462 1568 if(!reglist) return;
1569 assem_debug("ldmia fp,{");
054175e9 1570 for(i=0;i<16;i++)
1571 if(reglist&(1<<i))
1572 assem_debug("r%d,",i);
57871462 1573 assem_debug("}\n");
1574 output_w32(0xe89b0000|reglist);
1575}
e2b5e7aa 1576
054175e9 1577// Save registers before function call
1578static void save_regs(u_int reglist)
1579{
4d646738 1580 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1581 save_regs_all(reglist);
1582}
e2b5e7aa 1583
054175e9 1584// Restore registers after function call
1585static void restore_regs(u_int reglist)
1586{
4d646738 1587 reglist&=CALLER_SAVE_REGS;
054175e9 1588 restore_regs_all(reglist);
1589}
57871462 1590
57871462 1591/* Stubs/epilogue */
1592
e2b5e7aa 1593static void literal_pool(int n)
57871462 1594{
1595 if(!literalcount) return;
1596 if(n) {
1597 if((int)out-literals[0][0]<4096-n) return;
1598 }
1599 u_int *ptr;
1600 int i;
1601 for(i=0;i<literalcount;i++)
1602 {
77750690 1603 u_int l_addr=(u_int)out;
1604 int j;
1605 for(j=0;j<i;j++) {
1606 if(literals[j][1]==literals[i][1]) {
1607 //printf("dup %08x\n",literals[i][1]);
1608 l_addr=literals[j][0];
1609 break;
1610 }
1611 }
57871462 1612 ptr=(u_int *)literals[i][0];
77750690 1613 u_int offset=l_addr-(u_int)ptr-8;
57871462 1614 assert(offset<4096);
1615 assert(!(offset&3));
1616 *ptr|=offset;
77750690 1617 if(l_addr==(u_int)out) {
1618 literals[i][0]=l_addr; // remember for dupes
1619 output_w32(literals[i][1]);
1620 }
57871462 1621 }
1622 literalcount=0;
1623}
1624
e2b5e7aa 1625static void literal_pool_jumpover(int n)
57871462 1626{
1627 if(!literalcount) return;
1628 if(n) {
1629 if((int)out-literals[0][0]<4096-n) return;
1630 }
df4dc2b1 1631 void *jaddr = out;
57871462 1632 emit_jmp(0);
1633 literal_pool(0);
df4dc2b1 1634 set_jump_target(jaddr, out);
57871462 1635}
1636
7c3a5182 1637// parsed by get_pointer, find_extjump_insn
1638static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1639{
1640 u_char *ptr=(u_char *)addr;
1641 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1642 (void)ptr;
1643
57871462 1644 emit_loadlp(target,0);
643aeae3 1645 emit_loadlp((u_int)addr,1);
d62c125a 1646 assert(addr>=ndrc->translation_cache&&addr<(ndrc->translation_cache+(1<<TARGET_SIZE_2)));
57871462 1647 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1648//DEBUG >
1649#ifdef DEBUG_CYCLE_COUNT
643aeae3 1650 emit_readword(&last_count,ECX);
57871462 1651 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1652 emit_readword(&next_interupt,ECX);
1653 emit_writeword(HOST_CCREG,&Count);
57871462 1654 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1655 emit_writeword(ECX,&last_count);
57871462 1656#endif
1657//DEBUG <
2a014d73 1658 emit_far_jump(linker);
57871462 1659}
1660
d1e4ebd9 1661static void check_extjump2(void *src)
1662{
1663 u_int *ptr = src;
1664 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1665 (void)ptr;
1666}
1667
13e35c04 1668// put rt_val into rt, potentially making use of rs with value rs_val
1669static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1670{
8575a877 1671 u_int armval;
1672 int diff;
1673 if(genimm(rt_val,&armval)) {
1674 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1675 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1676 return;
1677 }
1678 if(genimm(~rt_val,&armval)) {
1679 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1680 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1681 return;
1682 }
1683 diff=rt_val-rs_val;
1684 if(genimm(diff,&armval)) {
1685 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1686 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1687 return;
1688 }else if(genimm(-diff,&armval)) {
1689 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1690 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1691 return;
1692 }
1693 emit_movimm(rt_val,rt);
1694}
1695
1696// return 1 if above function can do it's job cheaply
1697static int is_similar_value(u_int v1,u_int v2)
1698{
13e35c04 1699 u_int xs;
8575a877 1700 int diff;
1701 if(v1==v2) return 1;
1702 diff=v2-v1;
1703 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1704 ;
8575a877 1705 if(xs<0x100) return 1;
1706 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1707 ;
1708 if(xs<0x100) return 1;
1709 return 0;
13e35c04 1710}
cbbab9cd 1711
b14b6a8f 1712static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1713{
1714 switch(type) {
1715 case LOADB_STUB: emit_signextend8(rs,rt); break;
1716 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1717 case LOADH_STUB: emit_signextend16(rs,rt); break;
1718 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1719 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1720 default: assert(0);
1721 }
1722}
1723
b1be1eee 1724#include "pcsxmem.h"
1725#include "pcsxmem_inline.c"
b1be1eee 1726
e2b5e7aa 1727static void do_readstub(int n)
57871462 1728{
b14b6a8f 1729 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1730 literal_pool(256);
b14b6a8f 1731 set_jump_target(stubs[n].addr, out);
1732 enum stub_type type=stubs[n].type;
1733 int i=stubs[n].a;
1734 int rs=stubs[n].b;
81dbbf4c 1735 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1736 u_int reglist=stubs[n].e;
81dbbf4c 1737 const signed char *i_regmap=i_regs->regmap;
581335b0 1738 int rt;
cf95b4f0 1739 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1740 rt=get_reg(i_regmap,FTEMP);
1741 }else{
cf95b4f0 1742 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1743 }
1744 assert(rs>=0);
df4dc2b1 1745 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1746 void *restore_jump = NULL;
c6c3b1b3 1747 reglist|=(1<<rs);
1748 for(r=0;r<=12;r++) {
1749 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1750 temp=r; break;
1751 }
1752 }
cf95b4f0 1753 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1754 reglist&=~(1<<rt);
1755 if(temp==-1) {
1756 save_regs(reglist);
1757 regs_saved=1;
1758 temp=(rs==0)?2:0;
1759 }
1760 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1761 temp2=1;
643aeae3 1762 emit_readword(&mem_rtab,temp);
c6c3b1b3 1763 emit_shrimm(rs,12,temp2);
1764 emit_readword_dualindexedx4(temp,temp2,temp2);
1765 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1766 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1767 switch(type) {
1768 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1769 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1770 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1771 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1772 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1773 default: assert(0);
c6c3b1b3 1774 }
1775 }
1776 if(regs_saved) {
df4dc2b1 1777 restore_jump=out;
c6c3b1b3 1778 emit_jcc(0); // jump to reg restore
1779 }
1780 else
b14b6a8f 1781 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1782
1783 if(!regs_saved)
1784 save_regs(reglist);
643aeae3 1785 void *handler=NULL;
c6c3b1b3 1786 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1787 handler=jump_handler_read8;
c6c3b1b3 1788 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1789 handler=jump_handler_read16;
c6c3b1b3 1790 if(type==LOADW_STUB)
643aeae3 1791 handler=jump_handler_read32;
1792 assert(handler);
b96d3df7 1793 pass_args(rs,temp2);
c6c3b1b3 1794 int cc=get_reg(i_regmap,CCREG);
1795 if(cc<0)
1796 emit_loadreg(CCREG,2);
bb4f300c 1797 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
2a014d73 1798 emit_far_call(handler);
cf95b4f0 1799 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1800 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1801 }
1802 if(restore_jump)
df4dc2b1 1803 set_jump_target(restore_jump, out);
c6c3b1b3 1804 restore_regs(reglist);
b14b6a8f 1805 emit_jmp(stubs[n].retaddr); // return address
57871462 1806}
1807
81dbbf4c 1808static void inline_readstub(enum stub_type type, int i, u_int addr,
1809 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1810{
1811 int rs=get_reg(regmap,target);
57871462 1812 int rt=get_reg(regmap,target);
535d208a 1813 if(rs<0) rs=get_reg(regmap,-1);
57871462 1814 assert(rs>=0);
2a014d73 1815 u_int is_dynamic;
687b4580 1816 uintptr_t host_addr = 0;
643aeae3 1817 void *handler;
b1be1eee 1818 int cc=get_reg(regmap,CCREG);
bb4f300c 1819 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj),cc,target?rs:-1,rt))
b1be1eee 1820 return;
643aeae3 1821 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1822 if (handler == NULL) {
cf95b4f0 1823 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1824 return;
13e35c04 1825 if(addr!=host_addr)
1826 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1827 switch(type) {
1828 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1829 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1830 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1831 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1832 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1833 default: assert(0);
1834 }
1835 return;
1836 }
b1be1eee 1837 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1838 if(is_dynamic) {
1839 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1840 handler=jump_handler_read8;
b1be1eee 1841 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1842 handler=jump_handler_read16;
b1be1eee 1843 if(type==LOADW_STUB)
643aeae3 1844 handler=jump_handler_read32;
b1be1eee 1845 }
c6c3b1b3 1846
1847 // call a memhandler
cf95b4f0 1848 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1849 reglist&=~(1<<rt);
1850 save_regs(reglist);
1851 if(target==0)
1852 emit_movimm(addr,0);
1853 else if(rs!=0)
1854 emit_mov(rs,0);
b1be1eee 1855 if(cc<0)
1856 emit_loadreg(CCREG,2);
1857 if(is_dynamic) {
1858 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
bb4f300c 1859 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
c6c3b1b3 1860 }
b1be1eee 1861 else {
643aeae3 1862 emit_readword(&last_count,3);
bb4f300c 1863 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
b1be1eee 1864 emit_add(2,3,2);
643aeae3 1865 emit_writeword(2,&Count);
b1be1eee 1866 }
1867
2a014d73 1868 emit_far_call(handler);
b1be1eee 1869
cf95b4f0 1870 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1871 switch(type) {
1872 case LOADB_STUB: emit_signextend8(0,rt); break;
1873 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1874 case LOADH_STUB: emit_signextend16(0,rt); break;
1875 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1876 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1877 default: assert(0);
1878 }
1879 }
1880 restore_regs(reglist);
57871462 1881}
1882
e2b5e7aa 1883static void do_writestub(int n)
57871462 1884{
b14b6a8f 1885 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1886 literal_pool(256);
b14b6a8f 1887 set_jump_target(stubs[n].addr, out);
1888 enum stub_type type=stubs[n].type;
1889 int i=stubs[n].a;
1890 int rs=stubs[n].b;
81dbbf4c 1891 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1892 u_int reglist=stubs[n].e;
81dbbf4c 1893 const signed char *i_regmap=i_regs->regmap;
581335b0 1894 int rt,r;
cf95b4f0 1895 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1896 rt=get_reg(i_regmap,r=FTEMP);
1897 }else{
cf95b4f0 1898 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1899 }
1900 assert(rs>=0);
1901 assert(rt>=0);
b14b6a8f 1902 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1903 void *restore_jump = NULL;
b96d3df7 1904 int reglist2=reglist|(1<<rs)|(1<<rt);
1905 for(rtmp=0;rtmp<=12;rtmp++) {
1906 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1907 temp=rtmp; break;
1908 }
1909 }
1910 if(temp==-1) {
1911 save_regs(reglist);
1912 regs_saved=1;
1913 for(rtmp=0;rtmp<=3;rtmp++)
1914 if(rtmp!=rs&&rtmp!=rt)
1915 {temp=rtmp;break;}
1916 }
1917 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1918 temp2=3;
643aeae3 1919 emit_readword(&mem_wtab,temp);
b96d3df7 1920 emit_shrimm(rs,12,temp2);
1921 emit_readword_dualindexedx4(temp,temp2,temp2);
1922 emit_lsls_imm(temp2,1,temp2);
1923 switch(type) {
1924 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1925 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1926 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1927 default: assert(0);
1928 }
1929 if(regs_saved) {
df4dc2b1 1930 restore_jump=out;
b96d3df7 1931 emit_jcc(0); // jump to reg restore
1932 }
1933 else
b14b6a8f 1934 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1935
1936 if(!regs_saved)
1937 save_regs(reglist);
643aeae3 1938 void *handler=NULL;
b96d3df7 1939 switch(type) {
643aeae3 1940 case STOREB_STUB: handler=jump_handler_write8; break;
1941 case STOREH_STUB: handler=jump_handler_write16; break;
1942 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1943 default: assert(0);
b96d3df7 1944 }
643aeae3 1945 assert(handler);
b96d3df7 1946 pass_args(rs,rt);
1947 if(temp2!=3)
1948 emit_mov(temp2,3);
1949 int cc=get_reg(i_regmap,CCREG);
1950 if(cc<0)
1951 emit_loadreg(CCREG,2);
bb4f300c 1952 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d),2);
b96d3df7 1953 // returns new cycle_count
2a014d73 1954 emit_far_call(handler);
bb4f300c 1955 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d),cc<0?2:cc);
b96d3df7 1956 if(cc<0)
1957 emit_storereg(CCREG,2);
1958 if(restore_jump)
df4dc2b1 1959 set_jump_target(restore_jump, out);
b96d3df7 1960 restore_regs(reglist);
b14b6a8f 1961 emit_jmp(stubs[n].retaddr);
57871462 1962}
1963
81dbbf4c 1964static void inline_writestub(enum stub_type type, int i, u_int addr,
1965 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1966{
1967 int rs=get_reg(regmap,-1);
57871462 1968 int rt=get_reg(regmap,target);
1969 assert(rs>=0);
1970 assert(rt>=0);
687b4580 1971 uintptr_t host_addr = 0;
643aeae3 1972 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1973 if (handler == NULL) {
13e35c04 1974 if(addr!=host_addr)
1975 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1976 switch(type) {
1977 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1978 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1979 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1980 default: assert(0);
1981 }
1982 return;
1983 }
1984
1985 // call a memhandler
1986 save_regs(reglist);
13e35c04 1987 pass_args(rs,rt);
b96d3df7 1988 int cc=get_reg(regmap,CCREG);
1989 if(cc<0)
1990 emit_loadreg(CCREG,2);
bb4f300c 1991 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj),2);
643aeae3 1992 emit_movimm((u_int)handler,3);
b96d3df7 1993 // returns new cycle_count
2a014d73 1994 emit_far_call(jump_handler_write_h);
bb4f300c 1995 emit_addimm(0,-CLOCK_ADJUST(adj),cc<0?2:cc);
b96d3df7 1996 if(cc<0)
1997 emit_storereg(CCREG,2);
1998 restore_regs(reglist);
57871462 1999}
2000
d1e4ebd9 2001// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 2002static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 2003{
665f33e1 2004 #ifndef HAVE_ARMV7
7c3a5182 2005 emit_loadlp((int)source, 1);
2006 emit_loadlp((int)copy, 2);
3d680478 2007 emit_loadlp(source_len, 3);
57871462 2008 #else
7c3a5182 2009 emit_movw(((u_int)source)&0x0000FFFF, 1);
2010 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2011 emit_movt(((u_int)source)&0xFFFF0000, 1);
2012 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 2013 emit_movw(source_len, 3);
57871462 2014 #endif
7c3a5182 2015 emit_movimm(arg0, 0);
2016}
2017
3d680478 2018static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 2019{
2020 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 2021 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 2022 emit_far_call(verify_code);
df4dc2b1 2023 void *entry = out;
57871462 2024 load_regs_entry(i);
df4dc2b1 2025 if (entry == out)
2026 entry = instr_addr[i];
57871462 2027 emit_jmp(instr_addr[i]);
2028 return entry;
2029}
2030
3d680478 2031static void do_dirty_stub_ds(u_int source_len)
57871462 2032{
3d680478 2033 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 2034 emit_far_call(verify_code_ds);
57871462 2035}
2036
57871462 2037/* Special assem */
2038
81dbbf4c 2039static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2040{
2041 save_regs_all(reglist);
32631e6a 2042 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2043#ifdef PCNT
81dbbf4c 2044 emit_movimm(op, 0);
2a014d73 2045 emit_far_call(pcnt_gte_start);
82ed88eb 2046#endif
81dbbf4c 2047 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2048}
2049
2050static void c2op_epilogue(u_int op,u_int reglist)
2051{
82ed88eb 2052#ifdef PCNT
2053 emit_movimm(op,0);
2a014d73 2054 emit_far_call(pcnt_gte_end);
82ed88eb 2055#endif
054175e9 2056 restore_regs_all(reglist);
2057}
2058
6c0eefaf 2059static void c2op_call_MACtoIR(int lm,int need_flags)
2060{
2061 if(need_flags)
2a014d73 2062 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2063 else
2a014d73 2064 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2065}
2066
2067static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2068{
2a014d73 2069 emit_far_call(func);
6c0eefaf 2070 // func is C code and trashes r0
2071 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2072 if(need_flags||need_ir)
2073 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2074 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2075}
2076
81dbbf4c 2077static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2078{
81dbbf4c 2079 u_int c2op = source[i] & 0x3f;
2080 u_int reglist_full = get_host_reglist(i_regs->regmap);
2081 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2082 int need_flags, need_ir;
b9b61529 2083
2084 if (gte_handlers[c2op]!=NULL) {
bedfea38 2085 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2086 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2087 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2088 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2089 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2090 need_flags=0;
6c0eefaf 2091 int shift = (source[i] >> 19) & 1;
2092 int lm = (source[i] >> 10) & 1;
054175e9 2093 switch(c2op) {
19776aef 2094#ifndef DRC_DBG
054175e9 2095 case GTE_MVMVA: {
82336ba3 2096#ifdef HAVE_ARMV5
054175e9 2097 int v = (source[i] >> 15) & 3;
2098 int cv = (source[i] >> 13) & 3;
2099 int mx = (source[i] >> 17) & 3;
4d646738 2100 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2101 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2102 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2103 if(v<3)
2104 emit_ldrd(v*8,0,4);
2105 else {
2106 emit_movzwl_indexed(9*4,0,4); // gteIR
2107 emit_movzwl_indexed(10*4,0,6);
2108 emit_movzwl_indexed(11*4,0,5);
2109 emit_orrshl_imm(6,16,4);
2110 }
2111 if(mx<3)
2112 emit_addimm(0,32*4+mx*8*4,6);
2113 else
643aeae3 2114 emit_readword(&zeromem_ptr,6);
054175e9 2115 if(cv<3)
2116 emit_addimm(0,32*4+(cv*8+5)*4,7);
2117 else
643aeae3 2118 emit_readword(&zeromem_ptr,7);
054175e9 2119#ifdef __ARM_NEON__
2120 emit_movimm(source[i],1); // opcode
2a014d73 2121 emit_far_call(gteMVMVA_part_neon);
054175e9 2122 if(need_flags) {
2123 emit_movimm(lm,1);
2a014d73 2124 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2125 }
2126#else
2127 if(cv==3&&shift)
2a014d73 2128 emit_far_call((int)gteMVMVA_part_cv3sh12_arm);
054175e9 2129 else {
2130 emit_movimm(shift,1);
2a014d73 2131 emit_far_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
054175e9 2132 }
6c0eefaf 2133 if(need_flags||need_ir)
2134 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2135#endif
2136#else /* if not HAVE_ARMV5 */
81dbbf4c 2137 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2138 emit_movimm(source[i],1); // opcode
643aeae3 2139 emit_writeword(1,&psxRegs.code);
2a014d73 2140 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2141#endif
2142 break;
2143 }
6c0eefaf 2144 case GTE_OP:
81dbbf4c 2145 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2146 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2147 if(need_flags||need_ir) {
2148 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2149 c2op_call_MACtoIR(lm,need_flags);
2150 }
2151 break;
2152 case GTE_DPCS:
81dbbf4c 2153 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2154 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2155 break;
2156 case GTE_INTPL:
81dbbf4c 2157 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2158 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2159 break;
2160 case GTE_SQR:
81dbbf4c 2161 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2162 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2163 if(need_flags||need_ir) {
2164 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2165 c2op_call_MACtoIR(lm,need_flags);
2166 }
2167 break;
2168 case GTE_DCPL:
81dbbf4c 2169 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2170 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2171 break;
2172 case GTE_GPF:
81dbbf4c 2173 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2174 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2175 break;
2176 case GTE_GPL:
81dbbf4c 2177 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2178 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2179 break;
19776aef 2180#endif
054175e9 2181 default:
81dbbf4c 2182 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2183#ifdef DRC_DBG
2184 emit_movimm(source[i],1); // opcode
643aeae3 2185 emit_writeword(1,&psxRegs.code);
19776aef 2186#endif
2a014d73 2187 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2188 break;
2189 }
2190 c2op_epilogue(c2op,reglist);
2191 }
b9b61529 2192}
2193
3968e69e 2194static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2195{
2196 //value = value & 0x7ffff000;
2197 //if (value & 0x7f87e000) value |= 0x80000000;
2198 emit_shrimm(sl,12,temp);
2199 emit_shlimm(temp,12,temp);
2200 emit_testimm(temp,0x7f000000);
2201 emit_testeqimm(temp,0x00870000);
2202 emit_testeqimm(temp,0x0000e000);
2203 emit_orrne_imm(temp,0x80000000,temp);
2204}
2205
2206static void do_mfc2_31_one(u_int copr,signed char temp)
2207{
2208 emit_readword(&reg_cop2d[copr],temp);
2209 emit_testimm(temp,0x8000); // do we need this?
2210 emit_andne_imm(temp,0,temp);
2211 emit_cmpimm(temp,0xf80);
2212 emit_andimm(temp,0xf80,temp);
2213 emit_cmovae_imm(0xf80,temp);
2214}
2215
2216static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2217{
2218 if (temp < 0) {
2219 host_tempreg_acquire();
2220 temp = HOST_TEMPREG;
2221 }
2222 do_mfc2_31_one(9,temp);
2223 emit_shrimm(temp,7,tl);
2224 do_mfc2_31_one(10,temp);
2225 emit_orrshr_imm(temp,2,tl);
2226 do_mfc2_31_one(11,temp);
2227 emit_orrshl_imm(temp,3,tl);
2228 emit_writeword(tl,&reg_cop2d[29]);
2229 if (temp == HOST_TEMPREG)
2230 host_tempreg_release();
2231}
2232
e2b5e7aa 2233static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 2234{
2235 // case 0x18: MULT
2236 // case 0x19: MULTU
2237 // case 0x1A: DIV
2238 // case 0x1B: DIVU
2239 // case 0x1C: DMULT
2240 // case 0x1D: DMULTU
2241 // case 0x1E: DDIV
2242 // case 0x1F: DDIVU
cf95b4f0 2243 if(dops[i].rs1&&dops[i].rs2)
57871462 2244 {
cf95b4f0 2245 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2246 {
cf95b4f0 2247 if(dops[i].opcode2==0x18) // MULT
57871462 2248 {
cf95b4f0 2249 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2250 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2251 signed char hi=get_reg(i_regs->regmap,HIREG);
2252 signed char lo=get_reg(i_regs->regmap,LOREG);
2253 assert(m1>=0);
2254 assert(m2>=0);
2255 assert(hi>=0);
2256 assert(lo>=0);
2257 emit_smull(m1,m2,hi,lo);
2258 }
cf95b4f0 2259 if(dops[i].opcode2==0x19) // MULTU
57871462 2260 {
cf95b4f0 2261 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2262 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2263 signed char hi=get_reg(i_regs->regmap,HIREG);
2264 signed char lo=get_reg(i_regs->regmap,LOREG);
2265 assert(m1>=0);
2266 assert(m2>=0);
2267 assert(hi>=0);
2268 assert(lo>=0);
2269 emit_umull(m1,m2,hi,lo);
2270 }
cf95b4f0 2271 if(dops[i].opcode2==0x1A) // DIV
57871462 2272 {
cf95b4f0 2273 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2274 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2275 assert(d1>=0);
2276 assert(d2>=0);
2277 signed char quotient=get_reg(i_regs->regmap,LOREG);
2278 signed char remainder=get_reg(i_regs->regmap,HIREG);
2279 assert(quotient>=0);
2280 assert(remainder>=0);
2281 emit_movs(d1,remainder);
44a80f6a 2282 emit_movimm(0xffffffff,quotient);
2283 emit_negmi(quotient,quotient); // .. quotient and ..
2284 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2285 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2286 emit_jeq(out+52); // Division by zero
82336ba3 2287 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2288#ifdef HAVE_ARMV5
57871462 2289 emit_clz(HOST_TEMPREG,quotient);
2290 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2291#else
2292 emit_movimm(0,quotient);
2293 emit_addpl_imm(quotient,1,quotient);
2294 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2295 emit_jns(out-2*4);
665f33e1 2296#endif
57871462 2297 emit_orimm(quotient,1<<31,quotient);
2298 emit_shr(quotient,quotient,quotient);
2299 emit_cmp(remainder,HOST_TEMPREG);
2300 emit_subcs(remainder,HOST_TEMPREG,remainder);
2301 emit_adcs(quotient,quotient,quotient);
2302 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2303 emit_jcc(out-16); // -4
57871462 2304 emit_teq(d1,d2);
2305 emit_negmi(quotient,quotient);
2306 emit_test(d1,d1);
2307 emit_negmi(remainder,remainder);
2308 }
cf95b4f0 2309 if(dops[i].opcode2==0x1B) // DIVU
57871462 2310 {
cf95b4f0 2311 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2312 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2313 assert(d1>=0);
2314 assert(d2>=0);
2315 signed char quotient=get_reg(i_regs->regmap,LOREG);
2316 signed char remainder=get_reg(i_regs->regmap,HIREG);
2317 assert(quotient>=0);
2318 assert(remainder>=0);
44a80f6a 2319 emit_mov(d1,remainder);
2320 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2321 emit_test(d2,d2);
7c3a5182 2322 emit_jeq(out+40); // Division by zero
665f33e1 2323#ifdef HAVE_ARMV5
57871462 2324 emit_clz(d2,HOST_TEMPREG);
2325 emit_movimm(1<<31,quotient);
2326 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2327#else
2328 emit_movimm(0,HOST_TEMPREG);
82336ba3 2329 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2330 emit_lslpls_imm(d2,1,d2);
7c3a5182 2331 emit_jns(out-2*4);
665f33e1 2332 emit_movimm(1<<31,quotient);
2333#endif
57871462 2334 emit_shr(quotient,HOST_TEMPREG,quotient);
2335 emit_cmp(remainder,d2);
2336 emit_subcs(remainder,d2,remainder);
2337 emit_adcs(quotient,quotient,quotient);
2338 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2339 emit_jcc(out-16); // -4
57871462 2340 }
2341 }
2342 else // 64-bit
71e490c5 2343 assert(0);
57871462 2344 }
2345 else
2346 {
2347 // Multiply by zero is zero.
2348 // MIPS does not have a divide by zero exception.
2349 // The result is undefined, we return zero.
2350 signed char hr=get_reg(i_regs->regmap,HIREG);
2351 signed char lr=get_reg(i_regs->regmap,LOREG);
2352 if(hr>=0) emit_zeroreg(hr);
2353 if(lr>=0) emit_zeroreg(lr);
2354 }
2355}
2356#define multdiv_assemble multdiv_assemble_arm
2357
d1e4ebd9 2358static void do_jump_vaddr(int rs)
2359{
2a014d73 2360 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2361}
2362
e2b5e7aa 2363static void do_preload_rhash(int r) {
57871462 2364 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2365 // register. On ARM the hash can be done with a single instruction (below)
2366}
2367
e2b5e7aa 2368static void do_preload_rhtbl(int ht) {
57871462 2369 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2370}
2371
e2b5e7aa 2372static void do_rhash(int rs,int rh) {
57871462 2373 emit_andimm(rs,0xf8,rh);
2374}
2375
e2b5e7aa 2376static void do_miniht_load(int ht,int rh) {
57871462 2377 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2378 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2379}
2380
e2b5e7aa 2381static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2382 emit_cmp(rh,rs);
2383 emit_ldreq_indexed(ht,4,15);
2384 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2385 if(rs!=7)
2386 emit_mov(rs,7);
2387 rs=7;
57871462 2388 #endif
d1e4ebd9 2389 do_jump_vaddr(rs);
57871462 2390}
2391
e2b5e7aa 2392static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2393 #ifndef HAVE_ARMV7
57871462 2394 emit_movimm(return_address,rt); // PC into link register
643aeae3 2395 add_to_linker(out,return_address,1);
57871462 2396 emit_pcreladdr(temp);
643aeae3 2397 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2398 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2399 #else
2400 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2401 add_to_linker(out,return_address,1);
57871462 2402 emit_pcreladdr(temp);
643aeae3 2403 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2404 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2405 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2406 #endif
2407}
2408
57871462 2409// CPU-architecture-specific initialization
2a014d73 2410static void arch_init(void)
2411{
2412 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2413 struct tramp_insns *ops = ndrc->tramp.ops;
2414 size_t i;
2415 assert(!(diff & 3));
2416 assert(diff < 0x1000);
2417 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2418 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2419 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2420 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2421}
b9b61529 2422
2423// vim:shiftwidth=2:expandtab