drc: something works on arm64
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
643aeae3 33u_char *translation_cache;
1e212a25 34#else
643aeae3 35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
bdeade46 36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
b14b6a8f 68void * const jump_vaddr_reg[16] = {
69 jump_vaddr_r0,
70 jump_vaddr_r1,
71 jump_vaddr_r2,
72 jump_vaddr_r3,
73 jump_vaddr_r4,
74 jump_vaddr_r5,
75 jump_vaddr_r6,
76 jump_vaddr_r7,
77 jump_vaddr_r8,
78 jump_vaddr_r9,
79 jump_vaddr_r10,
57871462 80 0,
b14b6a8f 81 jump_vaddr_r12,
57871462 82 0,
83 0,
b14b6a8f 84 0
85};
57871462 86
0bbd1454 87void invalidate_addr_r0();
88void invalidate_addr_r1();
89void invalidate_addr_r2();
90void invalidate_addr_r3();
91void invalidate_addr_r4();
92void invalidate_addr_r5();
93void invalidate_addr_r6();
94void invalidate_addr_r7();
95void invalidate_addr_r8();
96void invalidate_addr_r9();
97void invalidate_addr_r10();
98void invalidate_addr_r12();
99
100const u_int invalidate_addr_reg[16] = {
101 (int)invalidate_addr_r0,
102 (int)invalidate_addr_r1,
103 (int)invalidate_addr_r2,
104 (int)invalidate_addr_r3,
105 (int)invalidate_addr_r4,
106 (int)invalidate_addr_r5,
107 (int)invalidate_addr_r6,
108 (int)invalidate_addr_r7,
109 (int)invalidate_addr_r8,
110 (int)invalidate_addr_r9,
111 (int)invalidate_addr_r10,
112 0,
113 (int)invalidate_addr_r12,
114 0,
115 0,
116 0};
117
d148d265 118static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 119
57871462 120/* Linker */
121
df4dc2b1 122static void set_jump_target(void *addr, void *target_)
57871462 123{
df4dc2b1 124 u_int target = (u_int)target_;
125 u_char *ptr = addr;
57871462 126 u_int *ptr2=(u_int *)ptr;
127 if(ptr[3]==0xe2) {
128 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 129 assert(((uintptr_t)addr&3)==0);
57871462 130 assert((target&3)==0);
131 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 132 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 133 }
134 else if(ptr[3]==0x72) {
135 // generated by emit_jno_unlikely
136 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 137 assert(((uintptr_t)addr&3)==0);
57871462 138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
140 }
141 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 142 assert(((uintptr_t)addr&3)==0);
57871462 143 assert((target&3)==0);
144 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
145 }
146 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
147 }
148 else {
149 assert((ptr[3]&0x0e)==0xa);
150 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
151 }
152}
153
154// This optionally copies the instruction from the target of the branch into
155// the space before the branch. Works, but the difference in speed is
156// usually insignificant.
e2b5e7aa 157#if 0
158static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 159{
160 u_char *ptr=(u_char *)addr;
161 u_int *ptr2=(u_int *)ptr;
162 assert(!copy||ptr2[-1]==0xe28dd000);
163 if(ptr[3]==0xe2) {
164 assert(!copy);
165 assert((target-(u_int)ptr2-8)<4096);
166 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
167 }
168 else {
169 assert((ptr[3]&0x0e)==0xa);
170 u_int target_insn=*(u_int *)target;
171 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
172 copy=0;
173 }
174 if((target_insn&0x0c100000)==0x04100000) { // Load
175 copy=0;
176 }
177 if(target_insn&0x08000000) {
178 copy=0;
179 }
180 if(copy) {
181 ptr2[-1]=target_insn;
182 target+=4;
183 }
184 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
185 }
186}
e2b5e7aa 187#endif
57871462 188
189/* Literal pool */
e2b5e7aa 190static void add_literal(int addr,int val)
57871462 191{
15776b68 192 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 193 literals[literalcount][0]=addr;
194 literals[literalcount][1]=val;
9f51b4b9 195 literalcount++;
196}
57871462 197
d148d265 198// from a pointer to external jump stub (which was produced by emit_extjump2)
199// find where the jumping insn is
200static void *find_extjump_insn(void *stub)
57871462 201{
202 int *ptr=(int *)(stub+4);
d148d265 203 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 204 u_int offset=*ptr&0xfff;
d148d265 205 void **l_ptr=(void *)ptr+offset+8;
206 return *l_ptr;
57871462 207}
208
f968d35d 209// find where external branch is liked to using addr of it's stub:
210// get address that insn one after stub loads (dyna_linker arg1),
211// treat it as a pointer to branch insn,
212// return addr where that branch jumps to
643aeae3 213static void *get_pointer(void *stub)
57871462 214{
215 //printf("get_pointer(%x)\n",(int)stub);
d148d265 216 int *i_ptr=find_extjump_insn(stub);
57871462 217 assert((*i_ptr&0x0f000000)==0x0a000000);
643aeae3 218 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 219}
220
221// Find the "clean" entry point from a "dirty" entry point
222// by skipping past the call to verify_code
df4dc2b1 223static void *get_clean_addr(void *addr)
57871462 224{
df4dc2b1 225 signed int *ptr = addr;
665f33e1 226 #ifndef HAVE_ARMV7
57871462 227 ptr+=4;
228 #else
229 ptr+=6;
230 #endif
231 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
232 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
233 ptr++;
234 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 235 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 236 }
df4dc2b1 237 return ptr;
57871462 238}
239
3968e69e 240static int verify_dirty(const u_int *ptr)
57871462 241{
665f33e1 242 #ifndef HAVE_ARMV7
16c8be17 243 u_int offset;
57871462 244 // get from literal pool
15776b68 245 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 246 offset=*ptr&0xfff;
247 u_int source=*(u_int*)((void *)ptr+offset+8);
248 ptr++;
249 assert((*ptr&0xFFFF0000)==0xe59f0000);
250 offset=*ptr&0xfff;
251 u_int copy=*(u_int*)((void *)ptr+offset+8);
252 ptr++;
253 assert((*ptr&0xFFFF0000)==0xe59f0000);
254 offset=*ptr&0xfff;
255 u_int len=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 ptr++;
57871462 258 #else
259 // ARMv7 movw/movt
260 assert((*ptr&0xFFF00000)==0xe3000000);
261 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
262 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
263 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
264 ptr+=6;
265 #endif
266 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
267 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 268 //printf("verify_dirty: %x %x %x\n",source,copy,len);
269 return !memcmp((void *)source,(void *)copy,len);
270}
271
272// This doesn't necessarily find all clean entry points, just
273// guarantees that it's not dirty
df4dc2b1 274static int isclean(void *addr)
57871462 275{
665f33e1 276 #ifndef HAVE_ARMV7
581335b0 277 u_int *ptr=((u_int *)addr)+4;
57871462 278 #else
581335b0 279 u_int *ptr=((u_int *)addr)+6;
57871462 280 #endif
281 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
282 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
283 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 284 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
285 return 1;
286}
287
4a35de07 288// get source that block at addr was compiled from (host pointers)
01d26796 289static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 290{
643aeae3 291 u_int *ptr = addr;
665f33e1 292 #ifndef HAVE_ARMV7
16c8be17 293 u_int offset;
57871462 294 // get from literal pool
15776b68 295 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 296 offset=*ptr&0xfff;
297 u_int source=*(u_int*)((void *)ptr+offset+8);
298 ptr++;
299 //assert((*ptr&0xFFFF0000)==0xe59f0000);
300 //offset=*ptr&0xfff;
301 //u_int copy=*(u_int*)((void *)ptr+offset+8);
302 ptr++;
303 assert((*ptr&0xFFFF0000)==0xe59f0000);
304 offset=*ptr&0xfff;
305 u_int len=*(u_int*)((void *)ptr+offset+8);
306 ptr++;
307 ptr++;
57871462 308 #else
309 // ARMv7 movw/movt
310 assert((*ptr&0xFFF00000)==0xe3000000);
311 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
312 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
313 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
314 ptr+=6;
315 #endif
316 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
317 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 318 *start=(u_char *)source;
319 *end=(u_char *)source+len;
57871462 320}
321
57871462 322// Allocate a specific ARM register.
e2b5e7aa 323static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 324{
325 int n;
f776eb14 326 int dirty=0;
9f51b4b9 327
57871462 328 // see if it's already allocated (and dealloc it)
329 for(n=0;n<HOST_REGS;n++)
330 {
f776eb14 331 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
332 dirty=(cur->dirty>>n)&1;
333 cur->regmap[n]=-1;
334 }
57871462 335 }
9f51b4b9 336
57871462 337 cur->regmap[hr]=reg;
338 cur->dirty&=~(1<<hr);
f776eb14 339 cur->dirty|=dirty<<hr;
57871462 340 cur->isconst&=~(1<<hr);
341}
342
343// Alloc cycle count into dedicated register
e2b5e7aa 344static void alloc_cc(struct regstat *cur,int i)
57871462 345{
346 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
347}
348
57871462 349/* Assembler */
350
e2b5e7aa 351static unused char regname[16][4] = {
57871462 352 "r0",
353 "r1",
354 "r2",
355 "r3",
356 "r4",
357 "r5",
358 "r6",
359 "r7",
360 "r8",
361 "r9",
362 "r10",
363 "fp",
364 "r12",
365 "sp",
366 "lr",
367 "pc"};
368
e2b5e7aa 369static void output_w32(u_int word)
57871462 370{
371 *((u_int *)out)=word;
372 out+=4;
373}
e2b5e7aa 374
375static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 376{
377 assert(rd<16);
378 assert(rn<16);
379 assert(rm<16);
380 return((rn<<16)|(rd<<12)|rm);
381}
e2b5e7aa 382
383static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 384{
385 assert(rd<16);
386 assert(rn<16);
387 assert(imm<256);
388 assert((shift&1)==0);
389 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
390}
e2b5e7aa 391
392static u_int genimm(u_int imm,u_int *encoded)
57871462 393{
c2e3bd42 394 *encoded=0;
395 if(imm==0) return 1;
57871462 396 int i=32;
397 while(i>0)
398 {
399 if(imm<256) {
400 *encoded=((i&30)<<7)|imm;
401 return 1;
402 }
403 imm=(imm>>2)|(imm<<30);i-=2;
404 }
405 return 0;
406}
e2b5e7aa 407
408static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 409{
410 u_int ret=genimm(imm,encoded);
411 assert(ret);
581335b0 412 (void)ret;
cfbd3c6e 413}
e2b5e7aa 414
415static u_int genjmp(u_int addr)
57871462 416{
7c3a5182 417 if (addr < 3) return 0; // a branch that will be patched later
418 int offset = addr-(int)out-8;
419 if (offset < -33554432 || offset >= 33554432) {
420 SysPrintf("genjmp: out of range: %08x\n", offset);
421 abort();
e80343e2 422 return 0;
423 }
57871462 424 return ((u_int)offset>>2)&0xffffff;
425}
426
d1e4ebd9 427static unused void emit_breakpoint(void)
428{
429 assem_debug("bkpt #0\n");
430 //output_w32(0xe1200070);
431 output_w32(0xe7f001f0);
432}
433
e2b5e7aa 434static void emit_mov(int rs,int rt)
57871462 435{
436 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
437 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
438}
439
e2b5e7aa 440static void emit_movs(int rs,int rt)
57871462 441{
442 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
443 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
444}
445
e2b5e7aa 446static void emit_add(int rs1,int rs2,int rt)
57871462 447{
448 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
449 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
450}
451
e2b5e7aa 452static void emit_adcs(int rs1,int rs2,int rt)
57871462 453{
454 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
455 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
456}
457
e2b5e7aa 458static void emit_neg(int rs, int rt)
57871462 459{
460 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
461 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
462}
463
e2b5e7aa 464static void emit_sub(int rs1,int rs2,int rt)
57871462 465{
466 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
467 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
468}
469
e2b5e7aa 470static void emit_zeroreg(int rt)
57871462 471{
472 assem_debug("mov %s,#0\n",regname[rt]);
473 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
474}
475
e2b5e7aa 476static void emit_loadlp(u_int imm,u_int rt)
790ee18e 477{
478 add_literal((int)out,imm);
479 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
480 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
481}
e2b5e7aa 482
483static void emit_movw(u_int imm,u_int rt)
790ee18e 484{
485 assert(imm<65536);
486 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
487 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
488}
e2b5e7aa 489
490static void emit_movt(u_int imm,u_int rt)
790ee18e 491{
492 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
493 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
494}
e2b5e7aa 495
496static void emit_movimm(u_int imm,u_int rt)
790ee18e 497{
498 u_int armval;
499 if(genimm(imm,&armval)) {
500 assem_debug("mov %s,#%d\n",regname[rt],imm);
501 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
502 }else if(genimm(~imm,&armval)) {
503 assem_debug("mvn %s,#%d\n",regname[rt],imm);
504 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
505 }else if(imm<65536) {
665f33e1 506 #ifndef HAVE_ARMV7
790ee18e 507 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
508 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
509 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
510 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
511 #else
512 emit_movw(imm,rt);
513 #endif
514 }else{
665f33e1 515 #ifndef HAVE_ARMV7
790ee18e 516 emit_loadlp(imm,rt);
517 #else
518 emit_movw(imm&0x0000FFFF,rt);
519 emit_movt(imm&0xFFFF0000,rt);
520 #endif
521 }
522}
e2b5e7aa 523
524static void emit_pcreladdr(u_int rt)
790ee18e 525{
526 assem_debug("add %s,pc,#?\n",regname[rt]);
527 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
528}
529
e2b5e7aa 530static void emit_loadreg(int r, int hr)
57871462 531{
3d624f89 532 if(r&64) {
c43b5311 533 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 534 assert(0);
535 return;
3d624f89 536 }
57871462 537 if((r&63)==0)
538 emit_zeroreg(hr);
539 else {
7c3a5182 540 int addr = (int)&psxRegs.GPR.r[r];
541 switch (r) {
542 //case HIREG: addr = &hi; break;
543 //case LOREG: addr = &lo; break;
544 case CCREG: addr = (int)&cycle_count; break;
545 case CSREG: addr = (int)&Status; break;
546 case INVCP: addr = (int)&invc_ptr; break;
547 default: assert(r < 34); break;
548 }
57871462 549 u_int offset = addr-(u_int)&dynarec_local;
550 assert(offset<4096);
551 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
552 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
553 }
554}
e2b5e7aa 555
556static void emit_storereg(int r, int hr)
57871462 557{
3d624f89 558 if(r&64) {
c43b5311 559 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 560 assert(0);
561 return;
3d624f89 562 }
7c3a5182 563 int addr = (int)&psxRegs.GPR.r[r];
564 switch (r) {
565 //case HIREG: addr = &hi; break;
566 //case LOREG: addr = &lo; break;
567 case CCREG: addr = (int)&cycle_count; break;
568 default: assert(r < 34); break;
569 }
57871462 570 u_int offset = addr-(u_int)&dynarec_local;
571 assert(offset<4096);
572 assem_debug("str %s,fp+%d\n",regname[hr],offset);
573 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
574}
575
e2b5e7aa 576static void emit_test(int rs, int rt)
57871462 577{
578 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
579 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
580}
581
e2b5e7aa 582static void emit_testimm(int rs,int imm)
57871462 583{
584 u_int armval;
5a05d80c 585 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 586 genimm_checked(imm,&armval);
57871462 587 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
588}
589
e2b5e7aa 590static void emit_testeqimm(int rs,int imm)
b9b61529 591{
592 u_int armval;
593 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 594 genimm_checked(imm,&armval);
b9b61529 595 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
596}
597
e2b5e7aa 598static void emit_not(int rs,int rt)
57871462 599{
600 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
601 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
602}
603
e2b5e7aa 604static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 605{
606 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
607 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
608}
609
e2b5e7aa 610static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 611{
612 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
613 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
614}
e2b5e7aa 615
e2b5e7aa 616static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 617{
618 assert(rs<16);
619 assert(rt<16);
620 assert(imm<32);
621 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
622 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
623}
624
e2b5e7aa 625static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 626{
627 assert(rs<16);
628 assert(rt<16);
629 assert(imm<32);
630 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
631 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
632}
633
e2b5e7aa 634static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 635{
636 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
637 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
638}
639
3968e69e 640static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
641{
642 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
643 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
644}
645
e2b5e7aa 646static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 647{
648 assert(rs<16);
649 assert(rt<16);
650 if(imm!=0) {
57871462 651 u_int armval;
652 if(genimm(imm,&armval)) {
653 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
654 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
655 }else if(genimm(-imm,&armval)) {
8a0a8423 656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 657 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 658 #ifdef HAVE_ARMV7
659 }else if(rt!=rs&&(u_int)imm<65536) {
660 emit_movw(imm&0x0000ffff,rt);
661 emit_add(rs,rt,rt);
662 }else if(rt!=rs&&(u_int)-imm<65536) {
663 emit_movw(-imm&0x0000ffff,rt);
664 emit_sub(rs,rt,rt);
665 #endif
666 }else if((u_int)-imm<65536) {
57871462 667 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
668 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
669 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
670 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 671 }else {
672 do {
673 int shift = (ffs(imm) - 1) & ~1;
674 int imm8 = imm & (0xff << shift);
675 genimm_checked(imm8,&armval);
676 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
677 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
678 rs = rt;
679 imm &= ~imm8;
680 }
681 while (imm != 0);
57871462 682 }
683 }
684 else if(rs!=rt) emit_mov(rs,rt);
685}
686
e2b5e7aa 687static void emit_addimm_and_set_flags(int imm,int rt)
57871462 688{
689 assert(imm>-65536&&imm<65536);
690 u_int armval;
691 if(genimm(imm,&armval)) {
692 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(genimm(-imm,&armval)) {
695 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
696 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
697 }else if(imm<0) {
698 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
699 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
700 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
701 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
702 }else{
703 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
704 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
705 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
706 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
707 }
708}
e2b5e7aa 709
710static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 711{
712 emit_addimm(rt,imm,rt);
713}
714
e2b5e7aa 715static void emit_addnop(u_int r)
57871462 716{
717 assert(r<16);
718 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
719 output_w32(0xe2800000|rd_rn_rm(r,r,0));
720}
721
e2b5e7aa 722static void emit_andimm(int rs,int imm,int rt)
57871462 723{
724 u_int armval;
790ee18e 725 if(imm==0) {
726 emit_zeroreg(rt);
727 }else if(genimm(imm,&armval)) {
57871462 728 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
729 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
730 }else if(genimm(~imm,&armval)) {
731 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
732 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
733 }else if(imm==65535) {
332a4533 734 #ifndef HAVE_ARMV6
57871462 735 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
736 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
737 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
738 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
739 #else
740 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
741 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
742 #endif
743 }else{
744 assert(imm>0&&imm<65535);
665f33e1 745 #ifndef HAVE_ARMV7
57871462 746 assem_debug("mov r14,#%d\n",imm&0xFF00);
747 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
748 assem_debug("add r14,r14,#%d\n",imm&0xFF);
749 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
750 #else
751 emit_movw(imm,HOST_TEMPREG);
752 #endif
753 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
754 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
755 }
756}
757
e2b5e7aa 758static void emit_orimm(int rs,int imm,int rt)
57871462 759{
760 u_int armval;
790ee18e 761 if(imm==0) {
762 if(rs!=rt) emit_mov(rs,rt);
763 }else if(genimm(imm,&armval)) {
57871462 764 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
765 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
766 }else{
767 assert(imm>0&&imm<65536);
768 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
769 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
770 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
771 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
772 }
773}
774
e2b5e7aa 775static void emit_xorimm(int rs,int imm,int rt)
57871462 776{
57871462 777 u_int armval;
790ee18e 778 if(imm==0) {
779 if(rs!=rt) emit_mov(rs,rt);
780 }else if(genimm(imm,&armval)) {
57871462 781 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
783 }else{
514ed0d9 784 assert(imm>0&&imm<65536);
57871462 785 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
786 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
787 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
788 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
789 }
790}
791
e2b5e7aa 792static void emit_shlimm(int rs,u_int imm,int rt)
57871462 793{
794 assert(imm>0);
795 assert(imm<32);
796 //if(imm==1) ...
797 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
e2b5e7aa 801static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
e2b5e7aa 809static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
815}
816
e2b5e7aa 817static void emit_shrimm(int rs,u_int imm,int rt)
57871462 818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
823}
824
e2b5e7aa 825static void emit_sarimm(int rs,u_int imm,int rt)
57871462 826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
831}
832
e2b5e7aa 833static void emit_rorimm(int rs,u_int imm,int rt)
57871462 834{
835 assert(imm>0);
836 assert(imm<32);
837 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
838 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
839}
840
e2b5e7aa 841static void emit_signextend16(int rs,int rt)
b9b61529 842{
332a4533 843 #ifndef HAVE_ARMV6
b9b61529 844 emit_shlimm(rs,16,rt);
845 emit_sarimm(rt,16,rt);
846 #else
847 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
848 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
849 #endif
850}
851
e2b5e7aa 852static void emit_signextend8(int rs,int rt)
c6c3b1b3 853{
332a4533 854 #ifndef HAVE_ARMV6
c6c3b1b3 855 emit_shlimm(rs,24,rt);
856 emit_sarimm(rt,24,rt);
857 #else
858 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
860 #endif
861}
862
e2b5e7aa 863static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 864{
865 assert(rs<16);
866 assert(rt<16);
867 assert(shift<16);
868 //if(imm==1) ...
869 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
870 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
871}
e2b5e7aa 872
873static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 874{
875 assert(rs<16);
876 assert(rt<16);
877 assert(shift<16);
878 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
879 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
880}
e2b5e7aa 881
882static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 883{
884 assert(rs<16);
885 assert(rt<16);
886 assert(shift<16);
887 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
888 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
889}
57871462 890
3968e69e 891static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 892{
893 assert(rs<16);
894 assert(rt<16);
895 assert(shift<16);
896 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
897 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
898}
e2b5e7aa 899
3968e69e 900static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 901{
902 assert(rs<16);
903 assert(rt<16);
904 assert(shift<16);
905 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
906 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
907}
908
e2b5e7aa 909static void emit_cmpimm(int rs,int imm)
57871462 910{
911 u_int armval;
912 if(genimm(imm,&armval)) {
5a05d80c 913 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 914 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
915 }else if(genimm(-imm,&armval)) {
5a05d80c 916 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 917 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
918 }else if(imm>0) {
919 assert(imm<65536);
57871462 920 emit_movimm(imm,HOST_TEMPREG);
57871462 921 assem_debug("cmp %s,r14\n",regname[rs]);
922 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
923 }else{
924 assert(imm>-65536);
57871462 925 emit_movimm(-imm,HOST_TEMPREG);
57871462 926 assem_debug("cmn %s,r14\n",regname[rs]);
927 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
928 }
929}
930
e2b5e7aa 931static void emit_cmovne_imm(int imm,int rt)
57871462 932{
933 assem_debug("movne %s,#%d\n",regname[rt],imm);
934 u_int armval;
cfbd3c6e 935 genimm_checked(imm,&armval);
57871462 936 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
937}
e2b5e7aa 938
939static void emit_cmovl_imm(int imm,int rt)
57871462 940{
941 assem_debug("movlt %s,#%d\n",regname[rt],imm);
942 u_int armval;
cfbd3c6e 943 genimm_checked(imm,&armval);
57871462 944 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
945}
e2b5e7aa 946
947static void emit_cmovb_imm(int imm,int rt)
57871462 948{
949 assem_debug("movcc %s,#%d\n",regname[rt],imm);
950 u_int armval;
cfbd3c6e 951 genimm_checked(imm,&armval);
57871462 952 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
953}
e2b5e7aa 954
3968e69e 955static void emit_cmovae_imm(int imm,int rt)
956{
957 assem_debug("movcs %s,#%d\n",regname[rt],imm);
958 u_int armval;
959 genimm_checked(imm,&armval);
960 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
961}
962
e2b5e7aa 963static void emit_cmovne_reg(int rs,int rt)
57871462 964{
965 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
966 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
967}
e2b5e7aa 968
969static void emit_cmovl_reg(int rs,int rt)
57871462 970{
971 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
972 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
973}
e2b5e7aa 974
975static void emit_cmovs_reg(int rs,int rt)
57871462 976{
977 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
978 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
979}
980
e2b5e7aa 981static void emit_slti32(int rs,int imm,int rt)
57871462 982{
983 if(rs!=rt) emit_zeroreg(rt);
984 emit_cmpimm(rs,imm);
985 if(rs==rt) emit_movimm(0,rt);
986 emit_cmovl_imm(1,rt);
987}
e2b5e7aa 988
989static void emit_sltiu32(int rs,int imm,int rt)
57871462 990{
991 if(rs!=rt) emit_zeroreg(rt);
992 emit_cmpimm(rs,imm);
993 if(rs==rt) emit_movimm(0,rt);
994 emit_cmovb_imm(1,rt);
995}
e2b5e7aa 996
e2b5e7aa 997static void emit_cmp(int rs,int rt)
57871462 998{
999 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1000 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1001}
e2b5e7aa 1002
1003static void emit_set_gz32(int rs, int rt)
57871462 1004{
1005 //assem_debug("set_gz32\n");
1006 emit_cmpimm(rs,1);
1007 emit_movimm(1,rt);
1008 emit_cmovl_imm(0,rt);
1009}
e2b5e7aa 1010
1011static void emit_set_nz32(int rs, int rt)
57871462 1012{
1013 //assem_debug("set_nz32\n");
1014 if(rs!=rt) emit_movs(rs,rt);
1015 else emit_test(rs,rs);
1016 emit_cmovne_imm(1,rt);
1017}
e2b5e7aa 1018
e2b5e7aa 1019static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1020{
1021 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1022 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1023 emit_cmp(rs1,rs2);
1024 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1025 emit_cmovl_imm(1,rt);
1026}
e2b5e7aa 1027
1028static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1029{
1030 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1031 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1032 emit_cmp(rs1,rs2);
1033 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1034 emit_cmovb_imm(1,rt);
1035}
e2b5e7aa 1036
643aeae3 1037static void emit_call(const void *a_)
57871462 1038{
643aeae3 1039 int a = (int)a_;
d1e4ebd9 1040 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1041 u_int offset=genjmp(a);
1042 output_w32(0xeb000000|offset);
1043}
e2b5e7aa 1044
b14b6a8f 1045static void emit_jmp(const void *a_)
57871462 1046{
b14b6a8f 1047 int a = (int)a_;
d1e4ebd9 1048 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1049 u_int offset=genjmp(a);
1050 output_w32(0xea000000|offset);
1051}
e2b5e7aa 1052
643aeae3 1053static void emit_jne(const void *a_)
57871462 1054{
643aeae3 1055 int a = (int)a_;
57871462 1056 assem_debug("bne %x\n",a);
1057 u_int offset=genjmp(a);
1058 output_w32(0x1a000000|offset);
1059}
e2b5e7aa 1060
7c3a5182 1061static void emit_jeq(const void *a_)
57871462 1062{
7c3a5182 1063 int a = (int)a_;
57871462 1064 assem_debug("beq %x\n",a);
1065 u_int offset=genjmp(a);
1066 output_w32(0x0a000000|offset);
1067}
e2b5e7aa 1068
7c3a5182 1069static void emit_js(const void *a_)
57871462 1070{
7c3a5182 1071 int a = (int)a_;
57871462 1072 assem_debug("bmi %x\n",a);
1073 u_int offset=genjmp(a);
1074 output_w32(0x4a000000|offset);
1075}
e2b5e7aa 1076
7c3a5182 1077static void emit_jns(const void *a_)
57871462 1078{
7c3a5182 1079 int a = (int)a_;
57871462 1080 assem_debug("bpl %x\n",a);
1081 u_int offset=genjmp(a);
1082 output_w32(0x5a000000|offset);
1083}
e2b5e7aa 1084
7c3a5182 1085static void emit_jl(const void *a_)
57871462 1086{
7c3a5182 1087 int a = (int)a_;
57871462 1088 assem_debug("blt %x\n",a);
1089 u_int offset=genjmp(a);
1090 output_w32(0xba000000|offset);
1091}
e2b5e7aa 1092
7c3a5182 1093static void emit_jge(const void *a_)
57871462 1094{
7c3a5182 1095 int a = (int)a_;
57871462 1096 assem_debug("bge %x\n",a);
1097 u_int offset=genjmp(a);
1098 output_w32(0xaa000000|offset);
1099}
e2b5e7aa 1100
7c3a5182 1101static void emit_jno(const void *a_)
57871462 1102{
7c3a5182 1103 int a = (int)a_;
57871462 1104 assem_debug("bvc %x\n",a);
1105 u_int offset=genjmp(a);
1106 output_w32(0x7a000000|offset);
1107}
e2b5e7aa 1108
7c3a5182 1109static void emit_jc(const void *a_)
57871462 1110{
7c3a5182 1111 int a = (int)a_;
57871462 1112 assem_debug("bcs %x\n",a);
1113 u_int offset=genjmp(a);
1114 output_w32(0x2a000000|offset);
1115}
e2b5e7aa 1116
7c3a5182 1117static void emit_jcc(const void *a_)
57871462 1118{
b14b6a8f 1119 int a = (int)a_;
57871462 1120 assem_debug("bcc %x\n",a);
1121 u_int offset=genjmp(a);
1122 output_w32(0x3a000000|offset);
1123}
1124
3968e69e 1125static unused void emit_callreg(u_int r)
57871462 1126{
c6c3b1b3 1127 assert(r<15);
1128 assem_debug("blx %s\n",regname[r]);
1129 output_w32(0xe12fff30|r);
57871462 1130}
e2b5e7aa 1131
1132static void emit_jmpreg(u_int r)
57871462 1133{
1134 assem_debug("mov pc,%s\n",regname[r]);
1135 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1136}
1137
be516ebe 1138static void emit_ret(void)
1139{
1140 emit_jmpreg(14);
1141}
1142
e2b5e7aa 1143static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1144{
1145 assert(offset>-4096&&offset<4096);
1146 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1147 if(offset>=0) {
1148 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1149 }else{
1150 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1151 }
1152}
e2b5e7aa 1153
1154static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1155{
1156 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1157 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1158}
e2b5e7aa 1159
1160static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1161{
1162 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1163 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1164}
e2b5e7aa 1165
1166static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1167{
1168 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1169 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1170}
e2b5e7aa 1171
1172static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1173{
1174 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1175 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1176}
e2b5e7aa 1177
1178static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1179{
1180 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1181 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1182}
e2b5e7aa 1183
1184static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1185{
1186 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1187 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1188}
e2b5e7aa 1189
e2b5e7aa 1190static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1191{
1192 assert(offset>-256&&offset<256);
1193 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1194 if(offset>=0) {
1195 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1196 }else{
1197 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1198 }
1199}
e2b5e7aa 1200
e2b5e7aa 1201static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1202{
1203 assert(offset>-256&&offset<256);
1204 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1205 if(offset>=0) {
1206 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1207 }else{
1208 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1209 }
1210}
e2b5e7aa 1211
1212static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1213{
1214 assert(offset>-4096&&offset<4096);
1215 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1216 if(offset>=0) {
1217 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1218 }else{
1219 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1220 }
1221}
e2b5e7aa 1222
e2b5e7aa 1223static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1224{
1225 assert(offset>-256&&offset<256);
1226 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1227 if(offset>=0) {
1228 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1229 }else{
1230 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1231 }
1232}
e2b5e7aa 1233
054175e9 1234static void emit_ldrd(int offset, int rs, int rt)
1235{
1236 assert(offset>-256&&offset<256);
1237 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1238 if(offset>=0) {
1239 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1240 }else{
1241 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1242 }
1243}
e2b5e7aa 1244
643aeae3 1245static void emit_readword(void *addr, int rt)
57871462 1246{
643aeae3 1247 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1248 assert(offset<4096);
1249 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1250 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1251}
e2b5e7aa 1252
e2b5e7aa 1253static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1254{
1255 assert(offset>-4096&&offset<4096);
1256 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1257 if(offset>=0) {
1258 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1259 }else{
1260 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1261 }
1262}
e2b5e7aa 1263
e2b5e7aa 1264static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1265{
1266 assert(offset>-256&&offset<256);
1267 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1268 if(offset>=0) {
1269 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1270 }else{
1271 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1272 }
1273}
e2b5e7aa 1274
1275static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1276{
1277 assert(offset>-4096&&offset<4096);
1278 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1279 if(offset>=0) {
1280 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1281 }else{
1282 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1283 }
1284}
e2b5e7aa 1285
e2b5e7aa 1286static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1287{
1288 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1289 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1290}
e2b5e7aa 1291
1292static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1293{
1294 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1295 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1296}
e2b5e7aa 1297
1298static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1299{
1300 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1301 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1302}
e2b5e7aa 1303
643aeae3 1304static void emit_writeword(int rt, void *addr)
57871462 1305{
643aeae3 1306 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1307 assert(offset<4096);
1308 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1309 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1310}
e2b5e7aa 1311
e2b5e7aa 1312static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1313{
1314 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1315 assert(rs1<16);
1316 assert(rs2<16);
1317 assert(hi<16);
1318 assert(lo<16);
1319 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1320}
e2b5e7aa 1321
1322static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1323{
1324 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1325 assert(rs1<16);
1326 assert(rs2<16);
1327 assert(hi<16);
1328 assert(lo<16);
1329 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1330}
1331
e2b5e7aa 1332static void emit_clz(int rs,int rt)
57871462 1333{
1334 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1335 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1336}
1337
e2b5e7aa 1338static void emit_subcs(int rs1,int rs2,int rt)
57871462 1339{
1340 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1341 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1342}
1343
e2b5e7aa 1344static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1345{
1346 assert(imm>0);
1347 assert(imm<32);
1348 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1349 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1350}
1351
e2b5e7aa 1352static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1353{
1354 assert(imm>0);
1355 assert(imm<32);
1356 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1357 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1358}
1359
e2b5e7aa 1360static void emit_negmi(int rs, int rt)
57871462 1361{
1362 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1363 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1364}
1365
e2b5e7aa 1366static void emit_negsmi(int rs, int rt)
57871462 1367{
1368 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1369 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1370}
1371
e2b5e7aa 1372static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1373{
1374 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1375 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1376}
1377
e2b5e7aa 1378static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1379{
1380 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1381 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1382}
1383
e2b5e7aa 1384static void emit_teq(int rs, int rt)
57871462 1385{
1386 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1387 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1388}
1389
3968e69e 1390static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1391{
1392 u_int armval;
cfbd3c6e 1393 genimm_checked(imm,&armval);
57871462 1394 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1395 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1396}
1397
57871462 1398// Conditionally select one of two immediates, optimizing for small code size
1399// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1400static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1401{
1402 u_int armval;
1403 if(genimm(imm2-imm1,&armval)) {
1404 emit_movimm(imm1,rt);
1405 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1406 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1407 }else if(genimm(imm1-imm2,&armval)) {
1408 emit_movimm(imm1,rt);
1409 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1410 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1411 }
1412 else {
665f33e1 1413 #ifndef HAVE_ARMV7
57871462 1414 emit_movimm(imm1,rt);
1415 add_literal((int)out,imm2);
1416 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1417 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1418 #else
1419 emit_movw(imm1&0x0000FFFF,rt);
1420 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1421 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1422 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1423 }
1424 emit_movt(imm1&0xFFFF0000,rt);
1425 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1426 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1427 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1428 }
1429 #endif
1430 }
1431}
1432
57871462 1433// special case for checking invalid_code
e2b5e7aa 1434static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1435{
1436 assert(imm<128&&imm>=0);
1437 assert(r>=0&&r<16);
1438 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1439 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1440 emit_cmpimm(HOST_TEMPREG,imm);
1441}
1442
e2b5e7aa 1443static void emit_callne(int a)
0bbd1454 1444{
1445 assem_debug("blne %x\n",a);
1446 u_int offset=genjmp(a);
1447 output_w32(0x1b000000|offset);
1448}
1449
57871462 1450// Used to preload hash table entries
e2b5e7aa 1451static unused void emit_prefetchreg(int r)
57871462 1452{
1453 assem_debug("pld %s\n",regname[r]);
1454 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1455}
1456
1457// Special case for mini_ht
e2b5e7aa 1458static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1459{
1460 assert(offset<4096);
1461 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1462 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1463}
1464
e2b5e7aa 1465static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1466{
1467 u_int armval;
cfbd3c6e 1468 genimm_checked(imm,&armval);
b9b61529 1469 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1470 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1471}
1472
e2b5e7aa 1473static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1474{
1475 u_int armval;
cfbd3c6e 1476 genimm_checked(imm,&armval);
b9b61529 1477 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1478 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1479}
1480
e2b5e7aa 1481static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1482{
1483 u_int armval;
1484 genimm_checked(imm,&armval);
1485 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1486 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1487}
1488
e2b5e7aa 1489static void emit_jno_unlikely(int a)
57871462 1490{
1491 //emit_jno(a);
1492 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1493 output_w32(0x72800000|rd_rn_rm(15,15,0));
1494}
1495
054175e9 1496static void save_regs_all(u_int reglist)
57871462 1497{
054175e9 1498 int i;
57871462 1499 if(!reglist) return;
1500 assem_debug("stmia fp,{");
054175e9 1501 for(i=0;i<16;i++)
1502 if(reglist&(1<<i))
1503 assem_debug("r%d,",i);
57871462 1504 assem_debug("}\n");
1505 output_w32(0xe88b0000|reglist);
1506}
e2b5e7aa 1507
054175e9 1508static void restore_regs_all(u_int reglist)
57871462 1509{
054175e9 1510 int i;
57871462 1511 if(!reglist) return;
1512 assem_debug("ldmia fp,{");
054175e9 1513 for(i=0;i<16;i++)
1514 if(reglist&(1<<i))
1515 assem_debug("r%d,",i);
57871462 1516 assem_debug("}\n");
1517 output_w32(0xe89b0000|reglist);
1518}
e2b5e7aa 1519
054175e9 1520// Save registers before function call
1521static void save_regs(u_int reglist)
1522{
4d646738 1523 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1524 save_regs_all(reglist);
1525}
e2b5e7aa 1526
054175e9 1527// Restore registers after function call
1528static void restore_regs(u_int reglist)
1529{
4d646738 1530 reglist&=CALLER_SAVE_REGS;
054175e9 1531 restore_regs_all(reglist);
1532}
57871462 1533
57871462 1534/* Stubs/epilogue */
1535
e2b5e7aa 1536static void literal_pool(int n)
57871462 1537{
1538 if(!literalcount) return;
1539 if(n) {
1540 if((int)out-literals[0][0]<4096-n) return;
1541 }
1542 u_int *ptr;
1543 int i;
1544 for(i=0;i<literalcount;i++)
1545 {
77750690 1546 u_int l_addr=(u_int)out;
1547 int j;
1548 for(j=0;j<i;j++) {
1549 if(literals[j][1]==literals[i][1]) {
1550 //printf("dup %08x\n",literals[i][1]);
1551 l_addr=literals[j][0];
1552 break;
1553 }
1554 }
57871462 1555 ptr=(u_int *)literals[i][0];
77750690 1556 u_int offset=l_addr-(u_int)ptr-8;
57871462 1557 assert(offset<4096);
1558 assert(!(offset&3));
1559 *ptr|=offset;
77750690 1560 if(l_addr==(u_int)out) {
1561 literals[i][0]=l_addr; // remember for dupes
1562 output_w32(literals[i][1]);
1563 }
57871462 1564 }
1565 literalcount=0;
1566}
1567
e2b5e7aa 1568static void literal_pool_jumpover(int n)
57871462 1569{
1570 if(!literalcount) return;
1571 if(n) {
1572 if((int)out-literals[0][0]<4096-n) return;
1573 }
df4dc2b1 1574 void *jaddr = out;
57871462 1575 emit_jmp(0);
1576 literal_pool(0);
df4dc2b1 1577 set_jump_target(jaddr, out);
57871462 1578}
1579
7c3a5182 1580// parsed by get_pointer, find_extjump_insn
1581static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1582{
1583 u_char *ptr=(u_char *)addr;
1584 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1585 (void)ptr;
1586
57871462 1587 emit_loadlp(target,0);
643aeae3 1588 emit_loadlp((u_int)addr,1);
1589 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
57871462 1590 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1591//DEBUG >
1592#ifdef DEBUG_CYCLE_COUNT
643aeae3 1593 emit_readword(&last_count,ECX);
57871462 1594 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1595 emit_readword(&next_interupt,ECX);
1596 emit_writeword(HOST_CCREG,&Count);
57871462 1597 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1598 emit_writeword(ECX,&last_count);
57871462 1599#endif
1600//DEBUG <
1601 emit_jmp(linker);
1602}
1603
d1e4ebd9 1604static void check_extjump2(void *src)
1605{
1606 u_int *ptr = src;
1607 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1608 (void)ptr;
1609}
1610
13e35c04 1611// put rt_val into rt, potentially making use of rs with value rs_val
1612static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1613{
8575a877 1614 u_int armval;
1615 int diff;
1616 if(genimm(rt_val,&armval)) {
1617 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1618 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1619 return;
1620 }
1621 if(genimm(~rt_val,&armval)) {
1622 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1623 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1624 return;
1625 }
1626 diff=rt_val-rs_val;
1627 if(genimm(diff,&armval)) {
1628 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1629 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1630 return;
1631 }else if(genimm(-diff,&armval)) {
1632 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1633 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1634 return;
1635 }
1636 emit_movimm(rt_val,rt);
1637}
1638
1639// return 1 if above function can do it's job cheaply
1640static int is_similar_value(u_int v1,u_int v2)
1641{
13e35c04 1642 u_int xs;
8575a877 1643 int diff;
1644 if(v1==v2) return 1;
1645 diff=v2-v1;
1646 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1647 ;
8575a877 1648 if(xs<0x100) return 1;
1649 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1650 ;
1651 if(xs<0x100) return 1;
1652 return 0;
13e35c04 1653}
cbbab9cd 1654
b14b6a8f 1655static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1656{
1657 switch(type) {
1658 case LOADB_STUB: emit_signextend8(rs,rt); break;
1659 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1660 case LOADH_STUB: emit_signextend16(rs,rt); break;
1661 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1662 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1663 default: assert(0);
1664 }
1665}
1666
b1be1eee 1667#include "pcsxmem.h"
1668#include "pcsxmem_inline.c"
b1be1eee 1669
e2b5e7aa 1670static void do_readstub(int n)
57871462 1671{
b14b6a8f 1672 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1673 literal_pool(256);
b14b6a8f 1674 set_jump_target(stubs[n].addr, out);
1675 enum stub_type type=stubs[n].type;
1676 int i=stubs[n].a;
1677 int rs=stubs[n].b;
1678 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1679 u_int reglist=stubs[n].e;
57871462 1680 signed char *i_regmap=i_regs->regmap;
581335b0 1681 int rt;
b9b61529 1682 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 1683 rt=get_reg(i_regmap,FTEMP);
1684 }else{
57871462 1685 rt=get_reg(i_regmap,rt1[i]);
1686 }
1687 assert(rs>=0);
df4dc2b1 1688 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1689 void *restore_jump = NULL;
c6c3b1b3 1690 reglist|=(1<<rs);
1691 for(r=0;r<=12;r++) {
1692 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1693 temp=r; break;
1694 }
1695 }
db829eeb 1696 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 1697 reglist&=~(1<<rt);
1698 if(temp==-1) {
1699 save_regs(reglist);
1700 regs_saved=1;
1701 temp=(rs==0)?2:0;
1702 }
1703 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1704 temp2=1;
643aeae3 1705 emit_readword(&mem_rtab,temp);
c6c3b1b3 1706 emit_shrimm(rs,12,temp2);
1707 emit_readword_dualindexedx4(temp,temp2,temp2);
1708 emit_lsls_imm(temp2,1,temp2);
1709 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
1710 switch(type) {
1711 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1712 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1713 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1714 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1715 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1716 default: assert(0);
c6c3b1b3 1717 }
1718 }
1719 if(regs_saved) {
df4dc2b1 1720 restore_jump=out;
c6c3b1b3 1721 emit_jcc(0); // jump to reg restore
1722 }
1723 else
b14b6a8f 1724 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1725
1726 if(!regs_saved)
1727 save_regs(reglist);
643aeae3 1728 void *handler=NULL;
c6c3b1b3 1729 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1730 handler=jump_handler_read8;
c6c3b1b3 1731 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1732 handler=jump_handler_read16;
c6c3b1b3 1733 if(type==LOADW_STUB)
643aeae3 1734 handler=jump_handler_read32;
1735 assert(handler);
b96d3df7 1736 pass_args(rs,temp2);
c6c3b1b3 1737 int cc=get_reg(i_regmap,CCREG);
1738 if(cc<0)
1739 emit_loadreg(CCREG,2);
b14b6a8f 1740 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
c6c3b1b3 1741 emit_call(handler);
1742 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 1743 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1744 }
1745 if(restore_jump)
df4dc2b1 1746 set_jump_target(restore_jump, out);
c6c3b1b3 1747 restore_regs(reglist);
b14b6a8f 1748 emit_jmp(stubs[n].retaddr); // return address
57871462 1749}
1750
b14b6a8f 1751static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 1752{
1753 int rs=get_reg(regmap,target);
57871462 1754 int rt=get_reg(regmap,target);
535d208a 1755 if(rs<0) rs=get_reg(regmap,-1);
57871462 1756 assert(rs>=0);
687b4580 1757 u_int is_dynamic,far_call=0;
1758 uintptr_t host_addr = 0;
643aeae3 1759 void *handler;
b1be1eee 1760 int cc=get_reg(regmap,CCREG);
1761 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
1762 return;
643aeae3 1763 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1764 if (handler == NULL) {
db829eeb 1765 if(rt<0||rt1[i]==0)
c6c3b1b3 1766 return;
13e35c04 1767 if(addr!=host_addr)
1768 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1769 switch(type) {
1770 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1771 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1772 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1773 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1774 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1775 default: assert(0);
1776 }
1777 return;
1778 }
b1be1eee 1779 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1780 if(is_dynamic) {
1781 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1782 handler=jump_handler_read8;
b1be1eee 1783 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1784 handler=jump_handler_read16;
b1be1eee 1785 if(type==LOADW_STUB)
643aeae3 1786 handler=jump_handler_read32;
b1be1eee 1787 }
c6c3b1b3 1788
1789 // call a memhandler
db829eeb 1790 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 1791 reglist&=~(1<<rt);
1792 save_regs(reglist);
1793 if(target==0)
1794 emit_movimm(addr,0);
1795 else if(rs!=0)
1796 emit_mov(rs,0);
643aeae3 1797 int offset=(u_char *)handler-out-8;
c6c3b1b3 1798 if(offset<-33554432||offset>=33554432) {
1799 // unreachable memhandler, a plugin func perhaps
643aeae3 1800 emit_movimm((u_int)handler,12);
b1be1eee 1801 far_call=1;
1802 }
1803 if(cc<0)
1804 emit_loadreg(CCREG,2);
1805 if(is_dynamic) {
1806 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
1807 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 1808 }
b1be1eee 1809 else {
643aeae3 1810 emit_readword(&last_count,3);
b1be1eee 1811 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
1812 emit_add(2,3,2);
643aeae3 1813 emit_writeword(2,&Count);
b1be1eee 1814 }
1815
1816 if(far_call)
1817 emit_callreg(12);
c6c3b1b3 1818 else
1819 emit_call(handler);
b1be1eee 1820
db829eeb 1821 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 1822 switch(type) {
1823 case LOADB_STUB: emit_signextend8(0,rt); break;
1824 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1825 case LOADH_STUB: emit_signextend16(0,rt); break;
1826 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1827 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1828 default: assert(0);
1829 }
1830 }
1831 restore_regs(reglist);
57871462 1832}
1833
e2b5e7aa 1834static void do_writestub(int n)
57871462 1835{
b14b6a8f 1836 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1837 literal_pool(256);
b14b6a8f 1838 set_jump_target(stubs[n].addr, out);
1839 enum stub_type type=stubs[n].type;
1840 int i=stubs[n].a;
1841 int rs=stubs[n].b;
1842 struct regstat *i_regs=(struct regstat *)stubs[n].c;
1843 u_int reglist=stubs[n].e;
57871462 1844 signed char *i_regmap=i_regs->regmap;
581335b0 1845 int rt,r;
b9b61529 1846 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 1847 rt=get_reg(i_regmap,r=FTEMP);
1848 }else{
57871462 1849 rt=get_reg(i_regmap,r=rs2[i]);
1850 }
1851 assert(rs>=0);
1852 assert(rt>=0);
b14b6a8f 1853 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1854 void *restore_jump = NULL;
b96d3df7 1855 int reglist2=reglist|(1<<rs)|(1<<rt);
1856 for(rtmp=0;rtmp<=12;rtmp++) {
1857 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1858 temp=rtmp; break;
1859 }
1860 }
1861 if(temp==-1) {
1862 save_regs(reglist);
1863 regs_saved=1;
1864 for(rtmp=0;rtmp<=3;rtmp++)
1865 if(rtmp!=rs&&rtmp!=rt)
1866 {temp=rtmp;break;}
1867 }
1868 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1869 temp2=3;
643aeae3 1870 emit_readword(&mem_wtab,temp);
b96d3df7 1871 emit_shrimm(rs,12,temp2);
1872 emit_readword_dualindexedx4(temp,temp2,temp2);
1873 emit_lsls_imm(temp2,1,temp2);
1874 switch(type) {
1875 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1876 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1877 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1878 default: assert(0);
1879 }
1880 if(regs_saved) {
df4dc2b1 1881 restore_jump=out;
b96d3df7 1882 emit_jcc(0); // jump to reg restore
1883 }
1884 else
b14b6a8f 1885 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1886
1887 if(!regs_saved)
1888 save_regs(reglist);
643aeae3 1889 void *handler=NULL;
b96d3df7 1890 switch(type) {
643aeae3 1891 case STOREB_STUB: handler=jump_handler_write8; break;
1892 case STOREH_STUB: handler=jump_handler_write16; break;
1893 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1894 default: assert(0);
b96d3df7 1895 }
643aeae3 1896 assert(handler);
b96d3df7 1897 pass_args(rs,rt);
1898 if(temp2!=3)
1899 emit_mov(temp2,3);
1900 int cc=get_reg(i_regmap,CCREG);
1901 if(cc<0)
1902 emit_loadreg(CCREG,2);
b14b6a8f 1903 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
b96d3df7 1904 // returns new cycle_count
1905 emit_call(handler);
b14b6a8f 1906 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 1907 if(cc<0)
1908 emit_storereg(CCREG,2);
1909 if(restore_jump)
df4dc2b1 1910 set_jump_target(restore_jump, out);
b96d3df7 1911 restore_regs(reglist);
b14b6a8f 1912 emit_jmp(stubs[n].retaddr);
57871462 1913}
1914
b14b6a8f 1915static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 1916{
1917 int rs=get_reg(regmap,-1);
57871462 1918 int rt=get_reg(regmap,target);
1919 assert(rs>=0);
1920 assert(rt>=0);
687b4580 1921 uintptr_t host_addr = 0;
643aeae3 1922 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1923 if (handler == NULL) {
13e35c04 1924 if(addr!=host_addr)
1925 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1926 switch(type) {
1927 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1928 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1929 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1930 default: assert(0);
1931 }
1932 return;
1933 }
1934
1935 // call a memhandler
1936 save_regs(reglist);
13e35c04 1937 pass_args(rs,rt);
b96d3df7 1938 int cc=get_reg(regmap,CCREG);
1939 if(cc<0)
1940 emit_loadreg(CCREG,2);
2573466a 1941 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
643aeae3 1942 emit_movimm((u_int)handler,3);
b96d3df7 1943 // returns new cycle_count
643aeae3 1944 emit_call(jump_handler_write_h);
2573466a 1945 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 1946 if(cc<0)
1947 emit_storereg(CCREG,2);
1948 restore_regs(reglist);
57871462 1949}
1950
d1e4ebd9 1951// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
7c3a5182 1952static void do_dirty_stub_emit_args(u_int arg0)
57871462 1953{
665f33e1 1954 #ifndef HAVE_ARMV7
7c3a5182 1955 emit_loadlp((int)source, 1);
1956 emit_loadlp((int)copy, 2);
1957 emit_loadlp(slen*4, 3);
57871462 1958 #else
7c3a5182 1959 emit_movw(((u_int)source)&0x0000FFFF, 1);
1960 emit_movw(((u_int)copy)&0x0000FFFF, 2);
1961 emit_movt(((u_int)source)&0xFFFF0000, 1);
1962 emit_movt(((u_int)copy)&0xFFFF0000, 2);
1963 emit_movw(slen*4, 3);
57871462 1964 #endif
7c3a5182 1965 emit_movimm(arg0, 0);
1966}
1967
1968static void *do_dirty_stub(int i)
1969{
1970 assem_debug("do_dirty_stub %x\n",start+i*4);
1971 do_dirty_stub_emit_args(start + i*4);
1972 emit_call(verify_code);
df4dc2b1 1973 void *entry = out;
57871462 1974 load_regs_entry(i);
df4dc2b1 1975 if (entry == out)
1976 entry = instr_addr[i];
57871462 1977 emit_jmp(instr_addr[i]);
1978 return entry;
1979}
1980
e2b5e7aa 1981static void do_dirty_stub_ds()
57871462 1982{
7c3a5182 1983 do_dirty_stub_emit_args(start + 1);
1984 emit_call(verify_code_ds);
57871462 1985}
1986
57871462 1987/* Special assem */
1988
054175e9 1989static void c2op_prologue(u_int op,u_int reglist)
1990{
1991 save_regs_all(reglist);
82ed88eb 1992#ifdef PCNT
1993 emit_movimm(op,0);
3968e69e 1994 emit_call(pcnt_gte_start);
82ed88eb 1995#endif
054175e9 1996 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
1997}
1998
1999static void c2op_epilogue(u_int op,u_int reglist)
2000{
82ed88eb 2001#ifdef PCNT
2002 emit_movimm(op,0);
3968e69e 2003 emit_call(pcnt_gte_end);
82ed88eb 2004#endif
054175e9 2005 restore_regs_all(reglist);
2006}
2007
6c0eefaf 2008static void c2op_call_MACtoIR(int lm,int need_flags)
2009{
2010 if(need_flags)
643aeae3 2011 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2012 else
643aeae3 2013 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2014}
2015
2016static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2017{
643aeae3 2018 emit_call(func);
6c0eefaf 2019 // func is C code and trashes r0
2020 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2021 if(need_flags||need_ir)
2022 c2op_call_MACtoIR(lm,need_flags);
643aeae3 2023 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2024}
2025
054175e9 2026static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 2027{
b9b61529 2028 u_int c2op=source[i]&0x3f;
6c0eefaf 2029 u_int hr,reglist_full=0,reglist;
054175e9 2030 int need_flags,need_ir;
b9b61529 2031 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 2032 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 2033 }
4d646738 2034 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 2035
2036 if (gte_handlers[c2op]!=NULL) {
bedfea38 2037 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2038 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2039 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2040 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 2041 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
2042 need_flags=0;
6c0eefaf 2043 int shift = (source[i] >> 19) & 1;
2044 int lm = (source[i] >> 10) & 1;
054175e9 2045 switch(c2op) {
19776aef 2046#ifndef DRC_DBG
054175e9 2047 case GTE_MVMVA: {
82336ba3 2048#ifdef HAVE_ARMV5
054175e9 2049 int v = (source[i] >> 15) & 3;
2050 int cv = (source[i] >> 13) & 3;
2051 int mx = (source[i] >> 17) & 3;
4d646738 2052 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 2053 c2op_prologue(c2op,reglist);
2054 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2055 if(v<3)
2056 emit_ldrd(v*8,0,4);
2057 else {
2058 emit_movzwl_indexed(9*4,0,4); // gteIR
2059 emit_movzwl_indexed(10*4,0,6);
2060 emit_movzwl_indexed(11*4,0,5);
2061 emit_orrshl_imm(6,16,4);
2062 }
2063 if(mx<3)
2064 emit_addimm(0,32*4+mx*8*4,6);
2065 else
643aeae3 2066 emit_readword(&zeromem_ptr,6);
054175e9 2067 if(cv<3)
2068 emit_addimm(0,32*4+(cv*8+5)*4,7);
2069 else
643aeae3 2070 emit_readword(&zeromem_ptr,7);
054175e9 2071#ifdef __ARM_NEON__
2072 emit_movimm(source[i],1); // opcode
643aeae3 2073 emit_call(gteMVMVA_part_neon);
054175e9 2074 if(need_flags) {
2075 emit_movimm(lm,1);
643aeae3 2076 emit_call(gteMACtoIR_flags_neon);
054175e9 2077 }
2078#else
2079 if(cv==3&&shift)
2080 emit_call((int)gteMVMVA_part_cv3sh12_arm);
2081 else {
2082 emit_movimm(shift,1);
2083 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
2084 }
6c0eefaf 2085 if(need_flags||need_ir)
2086 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2087#endif
2088#else /* if not HAVE_ARMV5 */
2089 c2op_prologue(c2op,reglist);
2090 emit_movimm(source[i],1); // opcode
643aeae3 2091 emit_writeword(1,&psxRegs.code);
82336ba3 2092 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 2093#endif
2094 break;
2095 }
6c0eefaf 2096 case GTE_OP:
2097 c2op_prologue(c2op,reglist);
643aeae3 2098 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2099 if(need_flags||need_ir) {
2100 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2101 c2op_call_MACtoIR(lm,need_flags);
2102 }
2103 break;
2104 case GTE_DPCS:
2105 c2op_prologue(c2op,reglist);
2106 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2107 break;
2108 case GTE_INTPL:
2109 c2op_prologue(c2op,reglist);
2110 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2111 break;
2112 case GTE_SQR:
2113 c2op_prologue(c2op,reglist);
643aeae3 2114 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2115 if(need_flags||need_ir) {
2116 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2117 c2op_call_MACtoIR(lm,need_flags);
2118 }
2119 break;
2120 case GTE_DCPL:
2121 c2op_prologue(c2op,reglist);
2122 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2123 break;
2124 case GTE_GPF:
2125 c2op_prologue(c2op,reglist);
2126 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2127 break;
2128 case GTE_GPL:
2129 c2op_prologue(c2op,reglist);
2130 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2131 break;
19776aef 2132#endif
054175e9 2133 default:
054175e9 2134 c2op_prologue(c2op,reglist);
19776aef 2135#ifdef DRC_DBG
2136 emit_movimm(source[i],1); // opcode
643aeae3 2137 emit_writeword(1,&psxRegs.code);
19776aef 2138#endif
643aeae3 2139 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2140 break;
2141 }
2142 c2op_epilogue(c2op,reglist);
2143 }
b9b61529 2144}
2145
3968e69e 2146static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2147{
2148 //value = value & 0x7ffff000;
2149 //if (value & 0x7f87e000) value |= 0x80000000;
2150 emit_shrimm(sl,12,temp);
2151 emit_shlimm(temp,12,temp);
2152 emit_testimm(temp,0x7f000000);
2153 emit_testeqimm(temp,0x00870000);
2154 emit_testeqimm(temp,0x0000e000);
2155 emit_orrne_imm(temp,0x80000000,temp);
2156}
2157
2158static void do_mfc2_31_one(u_int copr,signed char temp)
2159{
2160 emit_readword(&reg_cop2d[copr],temp);
2161 emit_testimm(temp,0x8000); // do we need this?
2162 emit_andne_imm(temp,0,temp);
2163 emit_cmpimm(temp,0xf80);
2164 emit_andimm(temp,0xf80,temp);
2165 emit_cmovae_imm(0xf80,temp);
2166}
2167
2168static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2169{
2170 if (temp < 0) {
2171 host_tempreg_acquire();
2172 temp = HOST_TEMPREG;
2173 }
2174 do_mfc2_31_one(9,temp);
2175 emit_shrimm(temp,7,tl);
2176 do_mfc2_31_one(10,temp);
2177 emit_orrshr_imm(temp,2,tl);
2178 do_mfc2_31_one(11,temp);
2179 emit_orrshl_imm(temp,3,tl);
2180 emit_writeword(tl,&reg_cop2d[29]);
2181 if (temp == HOST_TEMPREG)
2182 host_tempreg_release();
2183}
2184
e2b5e7aa 2185static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 2186{
2187 // case 0x18: MULT
2188 // case 0x19: MULTU
2189 // case 0x1A: DIV
2190 // case 0x1B: DIVU
2191 // case 0x1C: DMULT
2192 // case 0x1D: DMULTU
2193 // case 0x1E: DDIV
2194 // case 0x1F: DDIVU
2195 if(rs1[i]&&rs2[i])
2196 {
2197 if((opcode2[i]&4)==0) // 32-bit
2198 {
2199 if(opcode2[i]==0x18) // MULT
2200 {
2201 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2202 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2203 signed char hi=get_reg(i_regs->regmap,HIREG);
2204 signed char lo=get_reg(i_regs->regmap,LOREG);
2205 assert(m1>=0);
2206 assert(m2>=0);
2207 assert(hi>=0);
2208 assert(lo>=0);
2209 emit_smull(m1,m2,hi,lo);
2210 }
2211 if(opcode2[i]==0x19) // MULTU
2212 {
2213 signed char m1=get_reg(i_regs->regmap,rs1[i]);
2214 signed char m2=get_reg(i_regs->regmap,rs2[i]);
2215 signed char hi=get_reg(i_regs->regmap,HIREG);
2216 signed char lo=get_reg(i_regs->regmap,LOREG);
2217 assert(m1>=0);
2218 assert(m2>=0);
2219 assert(hi>=0);
2220 assert(lo>=0);
2221 emit_umull(m1,m2,hi,lo);
2222 }
2223 if(opcode2[i]==0x1A) // DIV
2224 {
2225 signed char d1=get_reg(i_regs->regmap,rs1[i]);
2226 signed char d2=get_reg(i_regs->regmap,rs2[i]);
2227 assert(d1>=0);
2228 assert(d2>=0);
2229 signed char quotient=get_reg(i_regs->regmap,LOREG);
2230 signed char remainder=get_reg(i_regs->regmap,HIREG);
2231 assert(quotient>=0);
2232 assert(remainder>=0);
2233 emit_movs(d1,remainder);
44a80f6a 2234 emit_movimm(0xffffffff,quotient);
2235 emit_negmi(quotient,quotient); // .. quotient and ..
2236 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2237 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2238 emit_jeq(out+52); // Division by zero
82336ba3 2239 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2240#ifdef HAVE_ARMV5
57871462 2241 emit_clz(HOST_TEMPREG,quotient);
2242 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2243#else
2244 emit_movimm(0,quotient);
2245 emit_addpl_imm(quotient,1,quotient);
2246 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2247 emit_jns(out-2*4);
665f33e1 2248#endif
57871462 2249 emit_orimm(quotient,1<<31,quotient);
2250 emit_shr(quotient,quotient,quotient);
2251 emit_cmp(remainder,HOST_TEMPREG);
2252 emit_subcs(remainder,HOST_TEMPREG,remainder);
2253 emit_adcs(quotient,quotient,quotient);
2254 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2255 emit_jcc(out-16); // -4
57871462 2256 emit_teq(d1,d2);
2257 emit_negmi(quotient,quotient);
2258 emit_test(d1,d1);
2259 emit_negmi(remainder,remainder);
2260 }
2261 if(opcode2[i]==0x1B) // DIVU
2262 {
2263 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
2264 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
2265 assert(d1>=0);
2266 assert(d2>=0);
2267 signed char quotient=get_reg(i_regs->regmap,LOREG);
2268 signed char remainder=get_reg(i_regs->regmap,HIREG);
2269 assert(quotient>=0);
2270 assert(remainder>=0);
44a80f6a 2271 emit_mov(d1,remainder);
2272 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2273 emit_test(d2,d2);
7c3a5182 2274 emit_jeq(out+40); // Division by zero
665f33e1 2275#ifdef HAVE_ARMV5
57871462 2276 emit_clz(d2,HOST_TEMPREG);
2277 emit_movimm(1<<31,quotient);
2278 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2279#else
2280 emit_movimm(0,HOST_TEMPREG);
82336ba3 2281 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2282 emit_lslpls_imm(d2,1,d2);
7c3a5182 2283 emit_jns(out-2*4);
665f33e1 2284 emit_movimm(1<<31,quotient);
2285#endif
57871462 2286 emit_shr(quotient,HOST_TEMPREG,quotient);
2287 emit_cmp(remainder,d2);
2288 emit_subcs(remainder,d2,remainder);
2289 emit_adcs(quotient,quotient,quotient);
2290 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2291 emit_jcc(out-16); // -4
57871462 2292 }
2293 }
2294 else // 64-bit
71e490c5 2295 assert(0);
57871462 2296 }
2297 else
2298 {
2299 // Multiply by zero is zero.
2300 // MIPS does not have a divide by zero exception.
2301 // The result is undefined, we return zero.
2302 signed char hr=get_reg(i_regs->regmap,HIREG);
2303 signed char lr=get_reg(i_regs->regmap,LOREG);
2304 if(hr>=0) emit_zeroreg(hr);
2305 if(lr>=0) emit_zeroreg(lr);
2306 }
2307}
2308#define multdiv_assemble multdiv_assemble_arm
2309
d1e4ebd9 2310static void do_jump_vaddr(int rs)
2311{
2312 emit_jmp(jump_vaddr_reg[rs]);
2313}
2314
e2b5e7aa 2315static void do_preload_rhash(int r) {
57871462 2316 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2317 // register. On ARM the hash can be done with a single instruction (below)
2318}
2319
e2b5e7aa 2320static void do_preload_rhtbl(int ht) {
57871462 2321 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2322}
2323
e2b5e7aa 2324static void do_rhash(int rs,int rh) {
57871462 2325 emit_andimm(rs,0xf8,rh);
2326}
2327
e2b5e7aa 2328static void do_miniht_load(int ht,int rh) {
57871462 2329 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2330 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2331}
2332
e2b5e7aa 2333static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2334 emit_cmp(rh,rs);
2335 emit_ldreq_indexed(ht,4,15);
2336 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2337 if(rs!=7)
2338 emit_mov(rs,7);
2339 rs=7;
57871462 2340 #endif
d1e4ebd9 2341 do_jump_vaddr(rs);
57871462 2342}
2343
e2b5e7aa 2344static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2345 #ifndef HAVE_ARMV7
57871462 2346 emit_movimm(return_address,rt); // PC into link register
643aeae3 2347 add_to_linker(out,return_address,1);
57871462 2348 emit_pcreladdr(temp);
643aeae3 2349 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2350 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2351 #else
2352 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2353 add_to_linker(out,return_address,1);
57871462 2354 emit_pcreladdr(temp);
643aeae3 2355 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2356 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2357 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2358 #endif
2359}
2360
d148d265 2361static void mark_clear_cache(void *target)
2362{
643aeae3 2363 u_long offset = (u_char *)target - translation_cache;
d148d265 2364 u_int mask = 1u << ((offset >> 12) & 31);
2365 if (!(needs_clear_cache[offset >> 17] & mask)) {
2366 char *start = (char *)((u_long)target & ~4095ul);
2367 start_tcache_write(start, start + 4096);
2368 needs_clear_cache[offset >> 17] |= mask;
2369 }
2370}
2371
dd3a91a1 2372// Clearing the cache is rather slow on ARM Linux, so mark the areas
2373// that need to be cleared, and then only clear these areas once.
e2b5e7aa 2374static void do_clear_cache()
dd3a91a1 2375{
2376 int i,j;
2377 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
2378 {
2379 u_int bitmap=needs_clear_cache[i];
2380 if(bitmap) {
643aeae3 2381 u_char *start, *end;
9f51b4b9 2382 for(j=0;j<32;j++)
dd3a91a1 2383 {
2384 if(bitmap&(1<<j)) {
643aeae3 2385 start=translation_cache+i*131072+j*4096;
dd3a91a1 2386 end=start+4095;
2387 j++;
2388 while(j<32) {
2389 if(bitmap&(1<<j)) {
2390 end+=4096;
2391 j++;
2392 }else{
643aeae3 2393 end_tcache_write(start, end);
dd3a91a1 2394 break;
2395 }
2396 }
2397 }
2398 }
2399 needs_clear_cache[i]=0;
2400 }
2401 }
2402}
2403
57871462 2404// CPU-architecture-specific initialization
71e490c5 2405static void arch_init() {
57871462 2406}
b9b61529 2407
2408// vim:shiftwidth=2:expandtab