drc: rm unneeded &63 masking
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
57871462 104/* Linker */
105
df4dc2b1 106static void set_jump_target(void *addr, void *target_)
57871462 107{
df4dc2b1 108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
57871462 110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 113 assert(((uintptr_t)addr&3)==0);
57871462 114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 126 assert(((uintptr_t)addr&3)==0);
57871462 127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
e2b5e7aa 141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
e2b5e7aa 171#endif
57871462 172
173/* Literal pool */
e2b5e7aa 174static void add_literal(int addr,int val)
57871462 175{
15776b68 176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
9f51b4b9 179 literalcount++;
180}
57871462 181
d148d265 182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
57871462 185{
186 int *ptr=(int *)(stub+4);
d148d265 187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 188 u_int offset=*ptr&0xfff;
d148d265 189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
57871462 191}
192
f968d35d 193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
643aeae3 197static void *get_pointer(void *stub)
57871462 198{
199 //printf("get_pointer(%x)\n",(int)stub);
d148d265 200 int *i_ptr=find_extjump_insn(stub);
3d680478 201 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 202 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 203}
204
205// Find the "clean" entry point from a "dirty" entry point
206// by skipping past the call to verify_code
df4dc2b1 207static void *get_clean_addr(void *addr)
57871462 208{
df4dc2b1 209 signed int *ptr = addr;
665f33e1 210 #ifndef HAVE_ARMV7
57871462 211 ptr+=4;
212 #else
213 ptr+=6;
214 #endif
215 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
216 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
217 ptr++;
218 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 219 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 220 }
df4dc2b1 221 return ptr;
57871462 222}
223
3968e69e 224static int verify_dirty(const u_int *ptr)
57871462 225{
665f33e1 226 #ifndef HAVE_ARMV7
16c8be17 227 u_int offset;
57871462 228 // get from literal pool
15776b68 229 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 230 offset=*ptr&0xfff;
231 u_int source=*(u_int*)((void *)ptr+offset+8);
232 ptr++;
233 assert((*ptr&0xFFFF0000)==0xe59f0000);
234 offset=*ptr&0xfff;
235 u_int copy=*(u_int*)((void *)ptr+offset+8);
236 ptr++;
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int len=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 ptr++;
57871462 242 #else
243 // ARMv7 movw/movt
244 assert((*ptr&0xFFF00000)==0xe3000000);
245 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
246 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
247 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
248 ptr+=6;
249 #endif
250 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
251 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
df4dc2b1 258static int isclean(void *addr)
57871462 259{
665f33e1 260 #ifndef HAVE_ARMV7
581335b0 261 u_int *ptr=((u_int *)addr)+4;
57871462 262 #else
581335b0 263 u_int *ptr=((u_int *)addr)+6;
57871462 264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
269 return 1;
270}
271
4a35de07 272// get source that block at addr was compiled from (host pointers)
01d26796 273static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 274{
643aeae3 275 u_int *ptr = addr;
665f33e1 276 #ifndef HAVE_ARMV7
16c8be17 277 u_int offset;
57871462 278 // get from literal pool
15776b68 279 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 280 offset=*ptr&0xfff;
281 u_int source=*(u_int*)((void *)ptr+offset+8);
282 ptr++;
283 //assert((*ptr&0xFFFF0000)==0xe59f0000);
284 //offset=*ptr&0xfff;
285 //u_int copy=*(u_int*)((void *)ptr+offset+8);
286 ptr++;
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int len=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 ptr++;
57871462 292 #else
293 // ARMv7 movw/movt
294 assert((*ptr&0xFFF00000)==0xe3000000);
295 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
296 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
297 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
298 ptr+=6;
299 #endif
300 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
301 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 302 *start=(u_char *)source;
303 *end=(u_char *)source+len;
57871462 304}
305
57871462 306// Allocate a specific ARM register.
e2b5e7aa 307static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 308{
309 int n;
f776eb14 310 int dirty=0;
9f51b4b9 311
57871462 312 // see if it's already allocated (and dealloc it)
313 for(n=0;n<HOST_REGS;n++)
314 {
f776eb14 315 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
316 dirty=(cur->dirty>>n)&1;
317 cur->regmap[n]=-1;
318 }
57871462 319 }
9f51b4b9 320
57871462 321 cur->regmap[hr]=reg;
322 cur->dirty&=~(1<<hr);
f776eb14 323 cur->dirty|=dirty<<hr;
57871462 324 cur->isconst&=~(1<<hr);
325}
326
327// Alloc cycle count into dedicated register
e2b5e7aa 328static void alloc_cc(struct regstat *cur,int i)
57871462 329{
330 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
331}
332
57871462 333/* Assembler */
334
e2b5e7aa 335static unused char regname[16][4] = {
57871462 336 "r0",
337 "r1",
338 "r2",
339 "r3",
340 "r4",
341 "r5",
342 "r6",
343 "r7",
344 "r8",
345 "r9",
346 "r10",
347 "fp",
348 "r12",
349 "sp",
350 "lr",
351 "pc"};
352
e2b5e7aa 353static void output_w32(u_int word)
57871462 354{
355 *((u_int *)out)=word;
356 out+=4;
357}
e2b5e7aa 358
359static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 360{
361 assert(rd<16);
362 assert(rn<16);
363 assert(rm<16);
364 return((rn<<16)|(rd<<12)|rm);
365}
e2b5e7aa 366
367static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(imm<256);
372 assert((shift&1)==0);
373 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
374}
e2b5e7aa 375
376static u_int genimm(u_int imm,u_int *encoded)
57871462 377{
c2e3bd42 378 *encoded=0;
379 if(imm==0) return 1;
57871462 380 int i=32;
381 while(i>0)
382 {
383 if(imm<256) {
384 *encoded=((i&30)<<7)|imm;
385 return 1;
386 }
387 imm=(imm>>2)|(imm<<30);i-=2;
388 }
389 return 0;
390}
e2b5e7aa 391
392static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 393{
394 u_int ret=genimm(imm,encoded);
395 assert(ret);
581335b0 396 (void)ret;
cfbd3c6e 397}
e2b5e7aa 398
399static u_int genjmp(u_int addr)
57871462 400{
7c3a5182 401 if (addr < 3) return 0; // a branch that will be patched later
402 int offset = addr-(int)out-8;
403 if (offset < -33554432 || offset >= 33554432) {
404 SysPrintf("genjmp: out of range: %08x\n", offset);
405 abort();
e80343e2 406 return 0;
407 }
57871462 408 return ((u_int)offset>>2)&0xffffff;
409}
410
d1e4ebd9 411static unused void emit_breakpoint(void)
412{
413 assem_debug("bkpt #0\n");
414 //output_w32(0xe1200070);
415 output_w32(0xe7f001f0);
416}
417
e2b5e7aa 418static void emit_mov(int rs,int rt)
57871462 419{
420 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
421 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
422}
423
e2b5e7aa 424static void emit_movs(int rs,int rt)
57871462 425{
426 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
427 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
428}
429
e2b5e7aa 430static void emit_add(int rs1,int rs2,int rt)
57871462 431{
432 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
433 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
434}
435
39b71d9a 436static void emit_adds(int rs1,int rs2,int rt)
437{
438 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
439 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
440}
441#define emit_adds_ptr emit_adds
442
e2b5e7aa 443static void emit_adcs(int rs1,int rs2,int rt)
57871462 444{
445 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
446 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
447}
448
e2b5e7aa 449static void emit_neg(int rs, int rt)
57871462 450{
451 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
452 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
453}
454
e2b5e7aa 455static void emit_sub(int rs1,int rs2,int rt)
57871462 456{
457 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
458 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
459}
460
e2b5e7aa 461static void emit_zeroreg(int rt)
57871462 462{
463 assem_debug("mov %s,#0\n",regname[rt]);
464 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
465}
466
e2b5e7aa 467static void emit_loadlp(u_int imm,u_int rt)
790ee18e 468{
469 add_literal((int)out,imm);
470 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
471 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
472}
e2b5e7aa 473
33788798 474#ifdef HAVE_ARMV7
e2b5e7aa 475static void emit_movw(u_int imm,u_int rt)
790ee18e 476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
e2b5e7aa 481
482static void emit_movt(u_int imm,u_int rt)
790ee18e 483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
33788798 487#endif
e2b5e7aa 488
489static void emit_movimm(u_int imm,u_int rt)
790ee18e 490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
665f33e1 499 #ifndef HAVE_ARMV7
790ee18e 500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
665f33e1 508 #ifndef HAVE_ARMV7
790ee18e 509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
e2b5e7aa 516
517static void emit_pcreladdr(u_int rt)
790ee18e 518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
e2b5e7aa 523static void emit_loadreg(int r, int hr)
57871462 524{
3d624f89 525 if(r&64) {
c43b5311 526 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 527 assert(0);
528 return;
3d624f89 529 }
57871462 530 if((r&63)==0)
531 emit_zeroreg(hr);
532 else {
33788798 533 void *addr;
7c3a5182 534 switch (r) {
535 //case HIREG: addr = &hi; break;
536 //case LOREG: addr = &lo; break;
33788798 537 case CCREG: addr = &cycle_count; break;
538 case CSREG: addr = &Status; break;
539 case INVCP: addr = &invc_ptr; break;
540 case ROREG: addr = &ram_offset; break;
541 default:
542 assert(r < 34);
543 addr = &psxRegs.GPR.r[r];
544 break;
7c3a5182 545 }
33788798 546 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 547 assert(offset<4096);
6cc8d23c 548 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 549 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
550 }
551}
e2b5e7aa 552
553static void emit_storereg(int r, int hr)
57871462 554{
3d624f89 555 if(r&64) {
c43b5311 556 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 557 assert(0);
558 return;
3d624f89 559 }
7c3a5182 560 int addr = (int)&psxRegs.GPR.r[r];
561 switch (r) {
562 //case HIREG: addr = &hi; break;
563 //case LOREG: addr = &lo; break;
564 case CCREG: addr = (int)&cycle_count; break;
565 default: assert(r < 34); break;
566 }
57871462 567 u_int offset = addr-(u_int)&dynarec_local;
568 assert(offset<4096);
6cc8d23c 569 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 570 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
571}
572
e2b5e7aa 573static void emit_test(int rs, int rt)
57871462 574{
575 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
576 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
577}
578
e2b5e7aa 579static void emit_testimm(int rs,int imm)
57871462 580{
581 u_int armval;
5a05d80c 582 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 583 genimm_checked(imm,&armval);
57871462 584 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
585}
586
e2b5e7aa 587static void emit_testeqimm(int rs,int imm)
b9b61529 588{
589 u_int armval;
590 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 591 genimm_checked(imm,&armval);
b9b61529 592 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
593}
594
e2b5e7aa 595static void emit_not(int rs,int rt)
57871462 596{
597 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
598 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
599}
600
e2b5e7aa 601static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 602{
603 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
604 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
605}
606
e2b5e7aa 607static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 608{
609 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
610 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
611}
e2b5e7aa 612
e2b5e7aa 613static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 614{
615 assert(rs<16);
616 assert(rt<16);
617 assert(imm<32);
618 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
619 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
620}
621
e2b5e7aa 622static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 623{
624 assert(rs<16);
625 assert(rt<16);
626 assert(imm<32);
627 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
628 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
629}
630
e2b5e7aa 631static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 632{
633 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
634 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
635}
636
3968e69e 637static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
638{
639 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
640 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
641}
642
e2b5e7aa 643static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 644{
645 assert(rs<16);
646 assert(rt<16);
647 if(imm!=0) {
57871462 648 u_int armval;
649 if(genimm(imm,&armval)) {
650 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
651 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
652 }else if(genimm(-imm,&armval)) {
8a0a8423 653 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 654 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 655 #ifdef HAVE_ARMV7
656 }else if(rt!=rs&&(u_int)imm<65536) {
657 emit_movw(imm&0x0000ffff,rt);
658 emit_add(rs,rt,rt);
659 }else if(rt!=rs&&(u_int)-imm<65536) {
660 emit_movw(-imm&0x0000ffff,rt);
661 emit_sub(rs,rt,rt);
662 #endif
663 }else if((u_int)-imm<65536) {
57871462 664 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
665 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
666 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
667 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 668 }else {
669 do {
670 int shift = (ffs(imm) - 1) & ~1;
671 int imm8 = imm & (0xff << shift);
672 genimm_checked(imm8,&armval);
673 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
674 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
675 rs = rt;
676 imm &= ~imm8;
677 }
678 while (imm != 0);
57871462 679 }
680 }
681 else if(rs!=rt) emit_mov(rs,rt);
682}
683
e2b5e7aa 684static void emit_addimm_and_set_flags(int imm,int rt)
57871462 685{
686 assert(imm>-65536&&imm<65536);
687 u_int armval;
688 if(genimm(imm,&armval)) {
689 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
690 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
691 }else if(genimm(-imm,&armval)) {
692 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
693 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
694 }else if(imm<0) {
695 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
696 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
697 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
698 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
699 }else{
700 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
701 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
702 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
703 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
704 }
705}
e2b5e7aa 706
e2b5e7aa 707static void emit_addnop(u_int r)
57871462 708{
709 assert(r<16);
710 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
711 output_w32(0xe2800000|rd_rn_rm(r,r,0));
712}
713
e2b5e7aa 714static void emit_andimm(int rs,int imm,int rt)
57871462 715{
716 u_int armval;
790ee18e 717 if(imm==0) {
718 emit_zeroreg(rt);
719 }else if(genimm(imm,&armval)) {
57871462 720 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
722 }else if(genimm(~imm,&armval)) {
723 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
724 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
725 }else if(imm==65535) {
332a4533 726 #ifndef HAVE_ARMV6
57871462 727 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
728 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
729 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
730 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
731 #else
732 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
734 #endif
735 }else{
736 assert(imm>0&&imm<65535);
665f33e1 737 #ifndef HAVE_ARMV7
57871462 738 assem_debug("mov r14,#%d\n",imm&0xFF00);
739 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
740 assem_debug("add r14,r14,#%d\n",imm&0xFF);
741 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
742 #else
743 emit_movw(imm,HOST_TEMPREG);
744 #endif
745 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
746 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
747 }
748}
749
e2b5e7aa 750static void emit_orimm(int rs,int imm,int rt)
57871462 751{
752 u_int armval;
790ee18e 753 if(imm==0) {
754 if(rs!=rt) emit_mov(rs,rt);
755 }else if(genimm(imm,&armval)) {
57871462 756 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
757 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
758 }else{
759 assert(imm>0&&imm<65536);
760 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
761 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
762 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
763 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
764 }
765}
766
e2b5e7aa 767static void emit_xorimm(int rs,int imm,int rt)
57871462 768{
57871462 769 u_int armval;
790ee18e 770 if(imm==0) {
771 if(rs!=rt) emit_mov(rs,rt);
772 }else if(genimm(imm,&armval)) {
57871462 773 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
774 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
775 }else{
514ed0d9 776 assert(imm>0&&imm<65536);
57871462 777 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
778 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
779 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
780 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
781 }
782}
783
e2b5e7aa 784static void emit_shlimm(int rs,u_int imm,int rt)
57871462 785{
786 assert(imm>0);
787 assert(imm<32);
788 //if(imm==1) ...
789 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
e2b5e7aa 793static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
e2b5e7aa 801static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
807}
808
e2b5e7aa 809static void emit_shrimm(int rs,u_int imm,int rt)
57871462 810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
815}
816
e2b5e7aa 817static void emit_sarimm(int rs,u_int imm,int rt)
57871462 818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
823}
824
e2b5e7aa 825static void emit_rorimm(int rs,u_int imm,int rt)
57871462 826{
827 assert(imm>0);
828 assert(imm<32);
829 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
830 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
831}
832
e2b5e7aa 833static void emit_signextend16(int rs,int rt)
b9b61529 834{
332a4533 835 #ifndef HAVE_ARMV6
b9b61529 836 emit_shlimm(rs,16,rt);
837 emit_sarimm(rt,16,rt);
838 #else
839 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
840 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
841 #endif
842}
843
e2b5e7aa 844static void emit_signextend8(int rs,int rt)
c6c3b1b3 845{
332a4533 846 #ifndef HAVE_ARMV6
c6c3b1b3 847 emit_shlimm(rs,24,rt);
848 emit_sarimm(rt,24,rt);
849 #else
850 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
851 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
852 #endif
853}
854
e2b5e7aa 855static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 856{
857 assert(rs<16);
858 assert(rt<16);
859 assert(shift<16);
860 //if(imm==1) ...
861 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
862 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
863}
e2b5e7aa 864
865static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 866{
867 assert(rs<16);
868 assert(rt<16);
869 assert(shift<16);
870 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
871 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
872}
e2b5e7aa 873
874static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 875{
876 assert(rs<16);
877 assert(rt<16);
878 assert(shift<16);
879 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
881}
57871462 882
3968e69e 883static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 884{
885 assert(rs<16);
886 assert(rt<16);
887 assert(shift<16);
888 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
889 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
890}
e2b5e7aa 891
3968e69e 892static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 893{
894 assert(rs<16);
895 assert(rt<16);
896 assert(shift<16);
897 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
898 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
899}
900
e2b5e7aa 901static void emit_cmpimm(int rs,int imm)
57871462 902{
903 u_int armval;
904 if(genimm(imm,&armval)) {
5a05d80c 905 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 906 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
907 }else if(genimm(-imm,&armval)) {
5a05d80c 908 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 909 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
910 }else if(imm>0) {
911 assert(imm<65536);
57871462 912 emit_movimm(imm,HOST_TEMPREG);
57871462 913 assem_debug("cmp %s,r14\n",regname[rs]);
914 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
915 }else{
916 assert(imm>-65536);
57871462 917 emit_movimm(-imm,HOST_TEMPREG);
57871462 918 assem_debug("cmn %s,r14\n",regname[rs]);
919 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
920 }
921}
922
e2b5e7aa 923static void emit_cmovne_imm(int imm,int rt)
57871462 924{
925 assem_debug("movne %s,#%d\n",regname[rt],imm);
926 u_int armval;
cfbd3c6e 927 genimm_checked(imm,&armval);
57871462 928 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
929}
e2b5e7aa 930
931static void emit_cmovl_imm(int imm,int rt)
57871462 932{
933 assem_debug("movlt %s,#%d\n",regname[rt],imm);
934 u_int armval;
cfbd3c6e 935 genimm_checked(imm,&armval);
57871462 936 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
937}
e2b5e7aa 938
939static void emit_cmovb_imm(int imm,int rt)
57871462 940{
941 assem_debug("movcc %s,#%d\n",regname[rt],imm);
942 u_int armval;
cfbd3c6e 943 genimm_checked(imm,&armval);
57871462 944 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
945}
e2b5e7aa 946
3968e69e 947static void emit_cmovae_imm(int imm,int rt)
948{
949 assem_debug("movcs %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
9c997d19 955static void emit_cmovs_imm(int imm,int rt)
956{
957 assem_debug("movmi %s,#%d\n",regname[rt],imm);
958 u_int armval;
959 genimm_checked(imm,&armval);
960 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
961}
962
e2b5e7aa 963static void emit_cmovne_reg(int rs,int rt)
57871462 964{
965 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
966 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
967}
e2b5e7aa 968
969static void emit_cmovl_reg(int rs,int rt)
57871462 970{
971 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
972 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
973}
e2b5e7aa 974
e3c6bdb5 975static void emit_cmovb_reg(int rs,int rt)
976{
977 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
978 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
979}
980
e2b5e7aa 981static void emit_cmovs_reg(int rs,int rt)
57871462 982{
983 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
984 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
985}
986
e2b5e7aa 987static void emit_slti32(int rs,int imm,int rt)
57871462 988{
989 if(rs!=rt) emit_zeroreg(rt);
990 emit_cmpimm(rs,imm);
991 if(rs==rt) emit_movimm(0,rt);
992 emit_cmovl_imm(1,rt);
993}
e2b5e7aa 994
995static void emit_sltiu32(int rs,int imm,int rt)
57871462 996{
997 if(rs!=rt) emit_zeroreg(rt);
998 emit_cmpimm(rs,imm);
999 if(rs==rt) emit_movimm(0,rt);
1000 emit_cmovb_imm(1,rt);
1001}
e2b5e7aa 1002
e2b5e7aa 1003static void emit_cmp(int rs,int rt)
57871462 1004{
1005 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1006 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1007}
e2b5e7aa 1008
1009static void emit_set_gz32(int rs, int rt)
57871462 1010{
1011 //assem_debug("set_gz32\n");
1012 emit_cmpimm(rs,1);
1013 emit_movimm(1,rt);
1014 emit_cmovl_imm(0,rt);
1015}
e2b5e7aa 1016
1017static void emit_set_nz32(int rs, int rt)
57871462 1018{
1019 //assem_debug("set_nz32\n");
1020 if(rs!=rt) emit_movs(rs,rt);
1021 else emit_test(rs,rs);
1022 emit_cmovne_imm(1,rt);
1023}
e2b5e7aa 1024
e2b5e7aa 1025static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1026{
1027 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1028 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1029 emit_cmp(rs1,rs2);
1030 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1031 emit_cmovl_imm(1,rt);
1032}
e2b5e7aa 1033
1034static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1035{
1036 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1037 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1038 emit_cmp(rs1,rs2);
1039 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1040 emit_cmovb_imm(1,rt);
1041}
e2b5e7aa 1042
2a014d73 1043static int can_jump_or_call(const void *a)
1044{
1045 intptr_t offset = (u_char *)a - out - 8;
1046 return (-33554432 <= offset && offset < 33554432);
1047}
1048
643aeae3 1049static void emit_call(const void *a_)
57871462 1050{
643aeae3 1051 int a = (int)a_;
d1e4ebd9 1052 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1053 u_int offset=genjmp(a);
1054 output_w32(0xeb000000|offset);
1055}
e2b5e7aa 1056
b14b6a8f 1057static void emit_jmp(const void *a_)
57871462 1058{
b14b6a8f 1059 int a = (int)a_;
d1e4ebd9 1060 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1061 u_int offset=genjmp(a);
1062 output_w32(0xea000000|offset);
1063}
e2b5e7aa 1064
643aeae3 1065static void emit_jne(const void *a_)
57871462 1066{
643aeae3 1067 int a = (int)a_;
57871462 1068 assem_debug("bne %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x1a000000|offset);
1071}
e2b5e7aa 1072
7c3a5182 1073static void emit_jeq(const void *a_)
57871462 1074{
7c3a5182 1075 int a = (int)a_;
57871462 1076 assem_debug("beq %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x0a000000|offset);
1079}
e2b5e7aa 1080
7c3a5182 1081static void emit_js(const void *a_)
57871462 1082{
7c3a5182 1083 int a = (int)a_;
57871462 1084 assem_debug("bmi %x\n",a);
1085 u_int offset=genjmp(a);
1086 output_w32(0x4a000000|offset);
1087}
e2b5e7aa 1088
7c3a5182 1089static void emit_jns(const void *a_)
57871462 1090{
7c3a5182 1091 int a = (int)a_;
57871462 1092 assem_debug("bpl %x\n",a);
1093 u_int offset=genjmp(a);
1094 output_w32(0x5a000000|offset);
1095}
e2b5e7aa 1096
7c3a5182 1097static void emit_jl(const void *a_)
57871462 1098{
7c3a5182 1099 int a = (int)a_;
57871462 1100 assem_debug("blt %x\n",a);
1101 u_int offset=genjmp(a);
1102 output_w32(0xba000000|offset);
1103}
e2b5e7aa 1104
7c3a5182 1105static void emit_jge(const void *a_)
57871462 1106{
7c3a5182 1107 int a = (int)a_;
57871462 1108 assem_debug("bge %x\n",a);
1109 u_int offset=genjmp(a);
1110 output_w32(0xaa000000|offset);
1111}
e2b5e7aa 1112
7c3a5182 1113static void emit_jno(const void *a_)
57871462 1114{
7c3a5182 1115 int a = (int)a_;
57871462 1116 assem_debug("bvc %x\n",a);
1117 u_int offset=genjmp(a);
1118 output_w32(0x7a000000|offset);
1119}
e2b5e7aa 1120
7c3a5182 1121static void emit_jc(const void *a_)
57871462 1122{
7c3a5182 1123 int a = (int)a_;
57871462 1124 assem_debug("bcs %x\n",a);
1125 u_int offset=genjmp(a);
1126 output_w32(0x2a000000|offset);
1127}
e2b5e7aa 1128
7c3a5182 1129static void emit_jcc(const void *a_)
57871462 1130{
b14b6a8f 1131 int a = (int)a_;
57871462 1132 assem_debug("bcc %x\n",a);
1133 u_int offset=genjmp(a);
1134 output_w32(0x3a000000|offset);
1135}
1136
3968e69e 1137static unused void emit_callreg(u_int r)
57871462 1138{
c6c3b1b3 1139 assert(r<15);
1140 assem_debug("blx %s\n",regname[r]);
1141 output_w32(0xe12fff30|r);
57871462 1142}
e2b5e7aa 1143
1144static void emit_jmpreg(u_int r)
57871462 1145{
1146 assem_debug("mov pc,%s\n",regname[r]);
1147 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1148}
1149
be516ebe 1150static void emit_ret(void)
1151{
1152 emit_jmpreg(14);
1153}
1154
e2b5e7aa 1155static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1156{
1157 assert(offset>-4096&&offset<4096);
1158 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1159 if(offset>=0) {
1160 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1161 }else{
1162 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1163 }
1164}
e2b5e7aa 1165
1166static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1167{
1168 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1169 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1170}
39b71d9a 1171#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1172
1173static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1174{
1175 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1176 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1177}
e2b5e7aa 1178
1179static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1180{
1181 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1182 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1183}
e2b5e7aa 1184
37387d8b 1185static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1186{
1187 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1188 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1189}
1190
e2b5e7aa 1191static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1192{
1193 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1194 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1195}
e2b5e7aa 1196
37387d8b 1197static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1198{
1199 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1200 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1201}
1202
e2b5e7aa 1203static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1204{
1205 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1206 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1207}
e2b5e7aa 1208
37387d8b 1209static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1210{
1211 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1212 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1213}
1214
e2b5e7aa 1215static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1216{
1217 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1218 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1219}
e2b5e7aa 1220
37387d8b 1221static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1222{
1223 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1224 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1225}
1226
e2b5e7aa 1227static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1228{
1229 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1230 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1231}
1232
1233static void emit_str_dualindexed(int rs1, int rs2, int rt)
1234{
1235 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1236 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1237}
1238
1239static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1240{
1241 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1242 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1243}
1244
1245static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1246{
1247 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1248 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1249}
e2b5e7aa 1250
e2b5e7aa 1251static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1252{
1253 assert(offset>-256&&offset<256);
1254 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1255 if(offset>=0) {
1256 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1257 }else{
1258 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1259 }
1260}
e2b5e7aa 1261
e2b5e7aa 1262static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1263{
1264 assert(offset>-256&&offset<256);
1265 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1266 if(offset>=0) {
1267 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1268 }else{
1269 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1270 }
1271}
e2b5e7aa 1272
1273static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1274{
1275 assert(offset>-4096&&offset<4096);
1276 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1277 if(offset>=0) {
1278 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1279 }else{
1280 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1281 }
1282}
e2b5e7aa 1283
e2b5e7aa 1284static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1285{
1286 assert(offset>-256&&offset<256);
1287 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1288 if(offset>=0) {
1289 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1290 }else{
1291 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1292 }
1293}
e2b5e7aa 1294
054175e9 1295static void emit_ldrd(int offset, int rs, int rt)
1296{
1297 assert(offset>-256&&offset<256);
1298 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1299 if(offset>=0) {
1300 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1301 }else{
1302 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1303 }
1304}
e2b5e7aa 1305
643aeae3 1306static void emit_readword(void *addr, int rt)
57871462 1307{
643aeae3 1308 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1309 assert(offset<4096);
1310 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1311 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1312}
39b71d9a 1313#define emit_readptr emit_readword
e2b5e7aa 1314
e2b5e7aa 1315static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1316{
1317 assert(offset>-4096&&offset<4096);
1318 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1319 if(offset>=0) {
1320 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1321 }else{
1322 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1323 }
1324}
e2b5e7aa 1325
e2b5e7aa 1326static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1327{
1328 assert(offset>-256&&offset<256);
1329 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1330 if(offset>=0) {
1331 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1332 }else{
1333 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1334 }
1335}
e2b5e7aa 1336
1337static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1338{
1339 assert(offset>-4096&&offset<4096);
1340 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1341 if(offset>=0) {
1342 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1343 }else{
1344 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1345 }
1346}
e2b5e7aa 1347
e2b5e7aa 1348static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1349{
1350 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1351 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1352}
e2b5e7aa 1353
1354static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1355{
1356 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1357 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1358}
e2b5e7aa 1359
1360static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1361{
1362 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1363 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1364}
e2b5e7aa 1365
643aeae3 1366static void emit_writeword(int rt, void *addr)
57871462 1367{
643aeae3 1368 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1369 assert(offset<4096);
1370 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1371 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1372}
e2b5e7aa 1373
e2b5e7aa 1374static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1375{
1376 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1377 assert(rs1<16);
1378 assert(rs2<16);
1379 assert(hi<16);
1380 assert(lo<16);
1381 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1382}
e2b5e7aa 1383
1384static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1385{
1386 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1387 assert(rs1<16);
1388 assert(rs2<16);
1389 assert(hi<16);
1390 assert(lo<16);
1391 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1392}
1393
e2b5e7aa 1394static void emit_clz(int rs,int rt)
57871462 1395{
1396 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1397 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1398}
1399
e2b5e7aa 1400static void emit_subcs(int rs1,int rs2,int rt)
57871462 1401{
1402 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1403 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1404}
1405
e2b5e7aa 1406static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1407{
1408 assert(imm>0);
1409 assert(imm<32);
1410 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1411 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1412}
1413
e2b5e7aa 1414static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1415{
1416 assert(imm>0);
1417 assert(imm<32);
1418 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1419 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1420}
1421
e2b5e7aa 1422static void emit_negmi(int rs, int rt)
57871462 1423{
1424 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1425 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1426}
1427
e2b5e7aa 1428static void emit_negsmi(int rs, int rt)
57871462 1429{
1430 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1431 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1432}
1433
e2b5e7aa 1434static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1435{
1436 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1437 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1438}
1439
e2b5e7aa 1440static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1441{
1442 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1443 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1444}
1445
e2b5e7aa 1446static void emit_teq(int rs, int rt)
57871462 1447{
1448 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1449 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1450}
1451
3968e69e 1452static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1453{
1454 u_int armval;
cfbd3c6e 1455 genimm_checked(imm,&armval);
57871462 1456 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1457 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1458}
1459
57871462 1460// Conditionally select one of two immediates, optimizing for small code size
1461// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1462static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1463{
1464 u_int armval;
1465 if(genimm(imm2-imm1,&armval)) {
1466 emit_movimm(imm1,rt);
1467 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1468 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1469 }else if(genimm(imm1-imm2,&armval)) {
1470 emit_movimm(imm1,rt);
1471 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1472 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1473 }
1474 else {
665f33e1 1475 #ifndef HAVE_ARMV7
57871462 1476 emit_movimm(imm1,rt);
1477 add_literal((int)out,imm2);
1478 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1479 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1480 #else
1481 emit_movw(imm1&0x0000FFFF,rt);
1482 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1483 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1484 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1485 }
1486 emit_movt(imm1&0xFFFF0000,rt);
1487 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1488 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1489 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1490 }
1491 #endif
1492 }
1493}
1494
57871462 1495// special case for checking invalid_code
e2b5e7aa 1496static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1497{
1498 assert(imm<128&&imm>=0);
1499 assert(r>=0&&r<16);
1500 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1501 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1502 emit_cmpimm(HOST_TEMPREG,imm);
1503}
1504
e2b5e7aa 1505static void emit_callne(int a)
0bbd1454 1506{
1507 assem_debug("blne %x\n",a);
1508 u_int offset=genjmp(a);
1509 output_w32(0x1b000000|offset);
1510}
1511
57871462 1512// Used to preload hash table entries
e2b5e7aa 1513static unused void emit_prefetchreg(int r)
57871462 1514{
1515 assem_debug("pld %s\n",regname[r]);
1516 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1517}
1518
1519// Special case for mini_ht
e2b5e7aa 1520static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1521{
1522 assert(offset<4096);
1523 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1524 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1525}
1526
e2b5e7aa 1527static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1528{
1529 u_int armval;
cfbd3c6e 1530 genimm_checked(imm,&armval);
b9b61529 1531 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1532 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1533}
1534
e2b5e7aa 1535static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1536{
1537 u_int armval;
1538 genimm_checked(imm,&armval);
1539 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1540 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1541}
1542
e2b5e7aa 1543static void emit_jno_unlikely(int a)
57871462 1544{
1545 //emit_jno(a);
1546 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1547 output_w32(0x72800000|rd_rn_rm(15,15,0));
1548}
1549
054175e9 1550static void save_regs_all(u_int reglist)
57871462 1551{
054175e9 1552 int i;
57871462 1553 if(!reglist) return;
1554 assem_debug("stmia fp,{");
054175e9 1555 for(i=0;i<16;i++)
1556 if(reglist&(1<<i))
1557 assem_debug("r%d,",i);
57871462 1558 assem_debug("}\n");
1559 output_w32(0xe88b0000|reglist);
1560}
e2b5e7aa 1561
054175e9 1562static void restore_regs_all(u_int reglist)
57871462 1563{
054175e9 1564 int i;
57871462 1565 if(!reglist) return;
1566 assem_debug("ldmia fp,{");
054175e9 1567 for(i=0;i<16;i++)
1568 if(reglist&(1<<i))
1569 assem_debug("r%d,",i);
57871462 1570 assem_debug("}\n");
1571 output_w32(0xe89b0000|reglist);
1572}
e2b5e7aa 1573
054175e9 1574// Save registers before function call
1575static void save_regs(u_int reglist)
1576{
4d646738 1577 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1578 save_regs_all(reglist);
1579}
e2b5e7aa 1580
054175e9 1581// Restore registers after function call
1582static void restore_regs(u_int reglist)
1583{
4d646738 1584 reglist&=CALLER_SAVE_REGS;
054175e9 1585 restore_regs_all(reglist);
1586}
57871462 1587
57871462 1588/* Stubs/epilogue */
1589
e2b5e7aa 1590static void literal_pool(int n)
57871462 1591{
1592 if(!literalcount) return;
1593 if(n) {
1594 if((int)out-literals[0][0]<4096-n) return;
1595 }
1596 u_int *ptr;
1597 int i;
1598 for(i=0;i<literalcount;i++)
1599 {
77750690 1600 u_int l_addr=(u_int)out;
1601 int j;
1602 for(j=0;j<i;j++) {
1603 if(literals[j][1]==literals[i][1]) {
1604 //printf("dup %08x\n",literals[i][1]);
1605 l_addr=literals[j][0];
1606 break;
1607 }
1608 }
57871462 1609 ptr=(u_int *)literals[i][0];
77750690 1610 u_int offset=l_addr-(u_int)ptr-8;
57871462 1611 assert(offset<4096);
1612 assert(!(offset&3));
1613 *ptr|=offset;
77750690 1614 if(l_addr==(u_int)out) {
1615 literals[i][0]=l_addr; // remember for dupes
1616 output_w32(literals[i][1]);
1617 }
57871462 1618 }
1619 literalcount=0;
1620}
1621
e2b5e7aa 1622static void literal_pool_jumpover(int n)
57871462 1623{
1624 if(!literalcount) return;
1625 if(n) {
1626 if((int)out-literals[0][0]<4096-n) return;
1627 }
df4dc2b1 1628 void *jaddr = out;
57871462 1629 emit_jmp(0);
1630 literal_pool(0);
df4dc2b1 1631 set_jump_target(jaddr, out);
57871462 1632}
1633
7c3a5182 1634// parsed by get_pointer, find_extjump_insn
1635static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1636{
1637 u_char *ptr=(u_char *)addr;
1638 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1639 (void)ptr;
1640
57871462 1641 emit_loadlp(target,0);
643aeae3 1642 emit_loadlp((u_int)addr,1);
66ea165f 1643 assert(ndrc->translation_cache <= addr &&
1644 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
57871462 1645 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1646//DEBUG >
1647#ifdef DEBUG_CYCLE_COUNT
643aeae3 1648 emit_readword(&last_count,ECX);
57871462 1649 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1650 emit_readword(&next_interupt,ECX);
1651 emit_writeword(HOST_CCREG,&Count);
57871462 1652 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1653 emit_writeword(ECX,&last_count);
57871462 1654#endif
1655//DEBUG <
2a014d73 1656 emit_far_jump(linker);
57871462 1657}
1658
d1e4ebd9 1659static void check_extjump2(void *src)
1660{
1661 u_int *ptr = src;
1662 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1663 (void)ptr;
1664}
1665
13e35c04 1666// put rt_val into rt, potentially making use of rs with value rs_val
1667static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1668{
8575a877 1669 u_int armval;
1670 int diff;
1671 if(genimm(rt_val,&armval)) {
1672 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1673 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1674 return;
1675 }
1676 if(genimm(~rt_val,&armval)) {
1677 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1678 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1679 return;
1680 }
1681 diff=rt_val-rs_val;
1682 if(genimm(diff,&armval)) {
1683 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1684 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1685 return;
1686 }else if(genimm(-diff,&armval)) {
1687 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1688 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1689 return;
1690 }
1691 emit_movimm(rt_val,rt);
1692}
1693
1694// return 1 if above function can do it's job cheaply
1695static int is_similar_value(u_int v1,u_int v2)
1696{
13e35c04 1697 u_int xs;
8575a877 1698 int diff;
1699 if(v1==v2) return 1;
1700 diff=v2-v1;
1701 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1702 ;
8575a877 1703 if(xs<0x100) return 1;
1704 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1705 ;
1706 if(xs<0x100) return 1;
1707 return 0;
13e35c04 1708}
cbbab9cd 1709
b14b6a8f 1710static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1711{
1712 switch(type) {
1713 case LOADB_STUB: emit_signextend8(rs,rt); break;
1714 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1715 case LOADH_STUB: emit_signextend16(rs,rt); break;
1716 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1717 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1718 default: assert(0);
1719 }
1720}
1721
b1be1eee 1722#include "pcsxmem.h"
1723#include "pcsxmem_inline.c"
b1be1eee 1724
e2b5e7aa 1725static void do_readstub(int n)
57871462 1726{
b14b6a8f 1727 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1728 literal_pool(256);
b14b6a8f 1729 set_jump_target(stubs[n].addr, out);
1730 enum stub_type type=stubs[n].type;
1731 int i=stubs[n].a;
1732 int rs=stubs[n].b;
81dbbf4c 1733 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1734 u_int reglist=stubs[n].e;
81dbbf4c 1735 const signed char *i_regmap=i_regs->regmap;
581335b0 1736 int rt;
cf95b4f0 1737 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1738 rt=get_reg(i_regmap,FTEMP);
1739 }else{
cf95b4f0 1740 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1741 }
1742 assert(rs>=0);
df4dc2b1 1743 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1744 void *restore_jump = NULL;
c6c3b1b3 1745 reglist|=(1<<rs);
1746 for(r=0;r<=12;r++) {
1747 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1748 temp=r; break;
1749 }
1750 }
cf95b4f0 1751 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1752 reglist&=~(1<<rt);
1753 if(temp==-1) {
1754 save_regs(reglist);
1755 regs_saved=1;
1756 temp=(rs==0)?2:0;
1757 }
1758 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1759 temp2=1;
643aeae3 1760 emit_readword(&mem_rtab,temp);
c6c3b1b3 1761 emit_shrimm(rs,12,temp2);
1762 emit_readword_dualindexedx4(temp,temp2,temp2);
1763 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1764 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1765 switch(type) {
1766 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1767 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1768 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1769 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1770 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1771 default: assert(0);
c6c3b1b3 1772 }
1773 }
1774 if(regs_saved) {
df4dc2b1 1775 restore_jump=out;
c6c3b1b3 1776 emit_jcc(0); // jump to reg restore
1777 }
1778 else
b14b6a8f 1779 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1780
1781 if(!regs_saved)
1782 save_regs(reglist);
643aeae3 1783 void *handler=NULL;
c6c3b1b3 1784 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1785 handler=jump_handler_read8;
c6c3b1b3 1786 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1787 handler=jump_handler_read16;
c6c3b1b3 1788 if(type==LOADW_STUB)
643aeae3 1789 handler=jump_handler_read32;
1790 assert(handler);
b96d3df7 1791 pass_args(rs,temp2);
c6c3b1b3 1792 int cc=get_reg(i_regmap,CCREG);
1793 if(cc<0)
1794 emit_loadreg(CCREG,2);
2330734f 1795 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1796 emit_far_call(handler);
cf95b4f0 1797 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1798 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1799 }
1800 if(restore_jump)
df4dc2b1 1801 set_jump_target(restore_jump, out);
c6c3b1b3 1802 restore_regs(reglist);
b14b6a8f 1803 emit_jmp(stubs[n].retaddr); // return address
57871462 1804}
1805
81dbbf4c 1806static void inline_readstub(enum stub_type type, int i, u_int addr,
1807 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1808{
1809 int rs=get_reg(regmap,target);
57871462 1810 int rt=get_reg(regmap,target);
9de8a0c3 1811 if(rs<0) rs=get_reg_temp(regmap);
57871462 1812 assert(rs>=0);
2a014d73 1813 u_int is_dynamic;
687b4580 1814 uintptr_t host_addr = 0;
643aeae3 1815 void *handler;
b1be1eee 1816 int cc=get_reg(regmap,CCREG);
2330734f 1817 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
b1be1eee 1818 return;
643aeae3 1819 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1820 if (handler == NULL) {
cf95b4f0 1821 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1822 return;
13e35c04 1823 if(addr!=host_addr)
1824 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1825 switch(type) {
1826 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1827 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1828 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1829 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1830 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1831 default: assert(0);
1832 }
1833 return;
1834 }
b1be1eee 1835 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1836 if(is_dynamic) {
1837 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1838 handler=jump_handler_read8;
b1be1eee 1839 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1840 handler=jump_handler_read16;
b1be1eee 1841 if(type==LOADW_STUB)
643aeae3 1842 handler=jump_handler_read32;
b1be1eee 1843 }
c6c3b1b3 1844
1845 // call a memhandler
cf95b4f0 1846 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1847 reglist&=~(1<<rt);
1848 save_regs(reglist);
1849 if(target==0)
1850 emit_movimm(addr,0);
1851 else if(rs!=0)
1852 emit_mov(rs,0);
b1be1eee 1853 if(cc<0)
1854 emit_loadreg(CCREG,2);
1855 if(is_dynamic) {
1856 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1857 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1858 }
b1be1eee 1859 else {
643aeae3 1860 emit_readword(&last_count,3);
2330734f 1861 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1862 emit_add(2,3,2);
643aeae3 1863 emit_writeword(2,&Count);
b1be1eee 1864 }
1865
2a014d73 1866 emit_far_call(handler);
b1be1eee 1867
cf95b4f0 1868 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1869 switch(type) {
1870 case LOADB_STUB: emit_signextend8(0,rt); break;
1871 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1872 case LOADH_STUB: emit_signextend16(0,rt); break;
1873 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1874 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1875 default: assert(0);
1876 }
1877 }
1878 restore_regs(reglist);
57871462 1879}
1880
e2b5e7aa 1881static void do_writestub(int n)
57871462 1882{
b14b6a8f 1883 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1884 literal_pool(256);
b14b6a8f 1885 set_jump_target(stubs[n].addr, out);
1886 enum stub_type type=stubs[n].type;
1887 int i=stubs[n].a;
1888 int rs=stubs[n].b;
81dbbf4c 1889 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1890 u_int reglist=stubs[n].e;
81dbbf4c 1891 const signed char *i_regmap=i_regs->regmap;
581335b0 1892 int rt,r;
cf95b4f0 1893 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1894 rt=get_reg(i_regmap,r=FTEMP);
1895 }else{
cf95b4f0 1896 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1897 }
1898 assert(rs>=0);
1899 assert(rt>=0);
b14b6a8f 1900 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1901 void *restore_jump = NULL;
b96d3df7 1902 int reglist2=reglist|(1<<rs)|(1<<rt);
1903 for(rtmp=0;rtmp<=12;rtmp++) {
1904 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1905 temp=rtmp; break;
1906 }
1907 }
1908 if(temp==-1) {
1909 save_regs(reglist);
1910 regs_saved=1;
1911 for(rtmp=0;rtmp<=3;rtmp++)
1912 if(rtmp!=rs&&rtmp!=rt)
1913 {temp=rtmp;break;}
1914 }
1915 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1916 temp2=3;
643aeae3 1917 emit_readword(&mem_wtab,temp);
b96d3df7 1918 emit_shrimm(rs,12,temp2);
1919 emit_readword_dualindexedx4(temp,temp2,temp2);
1920 emit_lsls_imm(temp2,1,temp2);
1921 switch(type) {
1922 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1923 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1924 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1925 default: assert(0);
1926 }
1927 if(regs_saved) {
df4dc2b1 1928 restore_jump=out;
b96d3df7 1929 emit_jcc(0); // jump to reg restore
1930 }
1931 else
b14b6a8f 1932 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1933
1934 if(!regs_saved)
1935 save_regs(reglist);
643aeae3 1936 void *handler=NULL;
b96d3df7 1937 switch(type) {
643aeae3 1938 case STOREB_STUB: handler=jump_handler_write8; break;
1939 case STOREH_STUB: handler=jump_handler_write16; break;
1940 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1941 default: assert(0);
b96d3df7 1942 }
643aeae3 1943 assert(handler);
b96d3df7 1944 pass_args(rs,rt);
1945 if(temp2!=3)
1946 emit_mov(temp2,3);
1947 int cc=get_reg(i_regmap,CCREG);
1948 if(cc<0)
1949 emit_loadreg(CCREG,2);
2330734f 1950 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
b96d3df7 1951 // returns new cycle_count
2a014d73 1952 emit_far_call(handler);
2330734f 1953 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1954 if(cc<0)
1955 emit_storereg(CCREG,2);
1956 if(restore_jump)
df4dc2b1 1957 set_jump_target(restore_jump, out);
b96d3df7 1958 restore_regs(reglist);
b14b6a8f 1959 emit_jmp(stubs[n].retaddr);
57871462 1960}
1961
81dbbf4c 1962static void inline_writestub(enum stub_type type, int i, u_int addr,
1963 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1964{
9de8a0c3 1965 int rs=get_reg_temp(regmap);
57871462 1966 int rt=get_reg(regmap,target);
1967 assert(rs>=0);
1968 assert(rt>=0);
687b4580 1969 uintptr_t host_addr = 0;
643aeae3 1970 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1971 if (handler == NULL) {
13e35c04 1972 if(addr!=host_addr)
1973 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1974 switch(type) {
1975 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1976 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1977 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1978 default: assert(0);
1979 }
1980 return;
1981 }
1982
1983 // call a memhandler
1984 save_regs(reglist);
13e35c04 1985 pass_args(rs,rt);
b96d3df7 1986 int cc=get_reg(regmap,CCREG);
1987 if(cc<0)
1988 emit_loadreg(CCREG,2);
2330734f 1989 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1990 emit_movimm((u_int)handler,3);
b96d3df7 1991 // returns new cycle_count
2a014d73 1992 emit_far_call(jump_handler_write_h);
2330734f 1993 emit_addimm(0,-adj,cc<0?2:cc);
b96d3df7 1994 if(cc<0)
1995 emit_storereg(CCREG,2);
1996 restore_regs(reglist);
57871462 1997}
1998
d1e4ebd9 1999// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 2000static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 2001{
665f33e1 2002 #ifndef HAVE_ARMV7
7c3a5182 2003 emit_loadlp((int)source, 1);
2004 emit_loadlp((int)copy, 2);
3d680478 2005 emit_loadlp(source_len, 3);
57871462 2006 #else
7c3a5182 2007 emit_movw(((u_int)source)&0x0000FFFF, 1);
2008 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2009 emit_movt(((u_int)source)&0xFFFF0000, 1);
2010 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 2011 emit_movw(source_len, 3);
57871462 2012 #endif
7c3a5182 2013 emit_movimm(arg0, 0);
2014}
2015
3d680478 2016static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 2017{
2018 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 2019 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 2020 emit_far_call(verify_code);
df4dc2b1 2021 void *entry = out;
57871462 2022 load_regs_entry(i);
df4dc2b1 2023 if (entry == out)
2024 entry = instr_addr[i];
57871462 2025 emit_jmp(instr_addr[i]);
2026 return entry;
2027}
2028
3d680478 2029static void do_dirty_stub_ds(u_int source_len)
57871462 2030{
3d680478 2031 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 2032 emit_far_call(verify_code_ds);
57871462 2033}
2034
57871462 2035/* Special assem */
2036
81dbbf4c 2037static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2038{
2039 save_regs_all(reglist);
32631e6a 2040 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2041#ifdef PCNT
81dbbf4c 2042 emit_movimm(op, 0);
2a014d73 2043 emit_far_call(pcnt_gte_start);
82ed88eb 2044#endif
81dbbf4c 2045 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2046}
2047
2048static void c2op_epilogue(u_int op,u_int reglist)
2049{
82ed88eb 2050#ifdef PCNT
2051 emit_movimm(op,0);
2a014d73 2052 emit_far_call(pcnt_gte_end);
82ed88eb 2053#endif
054175e9 2054 restore_regs_all(reglist);
2055}
2056
6c0eefaf 2057static void c2op_call_MACtoIR(int lm,int need_flags)
2058{
2059 if(need_flags)
2a014d73 2060 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2061 else
2a014d73 2062 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2063}
2064
2065static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2066{
2a014d73 2067 emit_far_call(func);
6c0eefaf 2068 // func is C code and trashes r0
2069 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2070 if(need_flags||need_ir)
2071 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2072 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2073}
2074
81dbbf4c 2075static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2076{
81dbbf4c 2077 u_int c2op = source[i] & 0x3f;
2078 u_int reglist_full = get_host_reglist(i_regs->regmap);
2079 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2080 int need_flags, need_ir;
b9b61529 2081
2082 if (gte_handlers[c2op]!=NULL) {
bedfea38 2083 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2084 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2085 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2086 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2087 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2088 need_flags=0;
6c0eefaf 2089 int shift = (source[i] >> 19) & 1;
2090 int lm = (source[i] >> 10) & 1;
054175e9 2091 switch(c2op) {
19776aef 2092#ifndef DRC_DBG
054175e9 2093 case GTE_MVMVA: {
82336ba3 2094#ifdef HAVE_ARMV5
054175e9 2095 int v = (source[i] >> 15) & 3;
2096 int cv = (source[i] >> 13) & 3;
2097 int mx = (source[i] >> 17) & 3;
4d646738 2098 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2099 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2100 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2101 if(v<3)
2102 emit_ldrd(v*8,0,4);
2103 else {
2104 emit_movzwl_indexed(9*4,0,4); // gteIR
2105 emit_movzwl_indexed(10*4,0,6);
2106 emit_movzwl_indexed(11*4,0,5);
2107 emit_orrshl_imm(6,16,4);
2108 }
2109 if(mx<3)
2110 emit_addimm(0,32*4+mx*8*4,6);
2111 else
643aeae3 2112 emit_readword(&zeromem_ptr,6);
054175e9 2113 if(cv<3)
2114 emit_addimm(0,32*4+(cv*8+5)*4,7);
2115 else
643aeae3 2116 emit_readword(&zeromem_ptr,7);
054175e9 2117#ifdef __ARM_NEON__
2118 emit_movimm(source[i],1); // opcode
2a014d73 2119 emit_far_call(gteMVMVA_part_neon);
054175e9 2120 if(need_flags) {
2121 emit_movimm(lm,1);
2a014d73 2122 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2123 }
2124#else
2125 if(cv==3&&shift)
33788798 2126 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 2127 else {
2128 emit_movimm(shift,1);
33788798 2129 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 2130 }
6c0eefaf 2131 if(need_flags||need_ir)
2132 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2133#endif
2134#else /* if not HAVE_ARMV5 */
81dbbf4c 2135 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2136 emit_movimm(source[i],1); // opcode
643aeae3 2137 emit_writeword(1,&psxRegs.code);
2a014d73 2138 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2139#endif
2140 break;
2141 }
6c0eefaf 2142 case GTE_OP:
81dbbf4c 2143 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2144 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2145 if(need_flags||need_ir) {
2146 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2147 c2op_call_MACtoIR(lm,need_flags);
2148 }
2149 break;
2150 case GTE_DPCS:
81dbbf4c 2151 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2152 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2153 break;
2154 case GTE_INTPL:
81dbbf4c 2155 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2156 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2157 break;
2158 case GTE_SQR:
81dbbf4c 2159 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2160 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2161 if(need_flags||need_ir) {
2162 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2163 c2op_call_MACtoIR(lm,need_flags);
2164 }
2165 break;
2166 case GTE_DCPL:
81dbbf4c 2167 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2168 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2169 break;
2170 case GTE_GPF:
81dbbf4c 2171 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2172 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2173 break;
2174 case GTE_GPL:
81dbbf4c 2175 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2176 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2177 break;
19776aef 2178#endif
054175e9 2179 default:
81dbbf4c 2180 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2181#ifdef DRC_DBG
2182 emit_movimm(source[i],1); // opcode
643aeae3 2183 emit_writeword(1,&psxRegs.code);
19776aef 2184#endif
2a014d73 2185 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2186 break;
2187 }
2188 c2op_epilogue(c2op,reglist);
2189 }
b9b61529 2190}
2191
3968e69e 2192static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2193{
2194 //value = value & 0x7ffff000;
2195 //if (value & 0x7f87e000) value |= 0x80000000;
2196 emit_shrimm(sl,12,temp);
2197 emit_shlimm(temp,12,temp);
2198 emit_testimm(temp,0x7f000000);
2199 emit_testeqimm(temp,0x00870000);
2200 emit_testeqimm(temp,0x0000e000);
2201 emit_orrne_imm(temp,0x80000000,temp);
2202}
2203
2204static void do_mfc2_31_one(u_int copr,signed char temp)
2205{
2206 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2207 emit_lsls_imm(temp,16,temp);
2208 emit_cmovs_imm(0,temp);
2209 emit_cmpimm(temp,0xf80<<16);
2210 emit_andimm(temp,0xf80<<16,temp);
2211 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2212}
2213
2214static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2215{
2216 if (temp < 0) {
2217 host_tempreg_acquire();
2218 temp = HOST_TEMPREG;
2219 }
2220 do_mfc2_31_one(9,temp);
9c997d19 2221 emit_shrimm(temp,7+16,tl);
3968e69e 2222 do_mfc2_31_one(10,temp);
9c997d19 2223 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2224 do_mfc2_31_one(11,temp);
9c997d19 2225 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2226 emit_writeword(tl,&reg_cop2d[29]);
2227 if (temp == HOST_TEMPREG)
2228 host_tempreg_release();
2229}
2230
2330734f 2231static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2232{
2233 // case 0x18: MULT
2234 // case 0x19: MULTU
2235 // case 0x1A: DIV
2236 // case 0x1B: DIVU
2237 // case 0x1C: DMULT
2238 // case 0x1D: DMULTU
2239 // case 0x1E: DDIV
2240 // case 0x1F: DDIVU
cf95b4f0 2241 if(dops[i].rs1&&dops[i].rs2)
57871462 2242 {
cf95b4f0 2243 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2244 {
cf95b4f0 2245 if(dops[i].opcode2==0x18) // MULT
57871462 2246 {
cf95b4f0 2247 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2248 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2249 signed char hi=get_reg(i_regs->regmap,HIREG);
2250 signed char lo=get_reg(i_regs->regmap,LOREG);
2251 assert(m1>=0);
2252 assert(m2>=0);
2253 assert(hi>=0);
2254 assert(lo>=0);
2255 emit_smull(m1,m2,hi,lo);
2256 }
cf95b4f0 2257 if(dops[i].opcode2==0x19) // MULTU
57871462 2258 {
cf95b4f0 2259 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2260 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2261 signed char hi=get_reg(i_regs->regmap,HIREG);
2262 signed char lo=get_reg(i_regs->regmap,LOREG);
2263 assert(m1>=0);
2264 assert(m2>=0);
2265 assert(hi>=0);
2266 assert(lo>=0);
2267 emit_umull(m1,m2,hi,lo);
2268 }
cf95b4f0 2269 if(dops[i].opcode2==0x1A) // DIV
57871462 2270 {
cf95b4f0 2271 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2272 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2273 assert(d1>=0);
2274 assert(d2>=0);
2275 signed char quotient=get_reg(i_regs->regmap,LOREG);
2276 signed char remainder=get_reg(i_regs->regmap,HIREG);
2277 assert(quotient>=0);
2278 assert(remainder>=0);
2279 emit_movs(d1,remainder);
44a80f6a 2280 emit_movimm(0xffffffff,quotient);
2281 emit_negmi(quotient,quotient); // .. quotient and ..
2282 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2283 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2284 emit_jeq(out+52); // Division by zero
82336ba3 2285 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2286#ifdef HAVE_ARMV5
57871462 2287 emit_clz(HOST_TEMPREG,quotient);
2288 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2289#else
2290 emit_movimm(0,quotient);
2291 emit_addpl_imm(quotient,1,quotient);
2292 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2293 emit_jns(out-2*4);
665f33e1 2294#endif
57871462 2295 emit_orimm(quotient,1<<31,quotient);
2296 emit_shr(quotient,quotient,quotient);
2297 emit_cmp(remainder,HOST_TEMPREG);
2298 emit_subcs(remainder,HOST_TEMPREG,remainder);
2299 emit_adcs(quotient,quotient,quotient);
2300 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2301 emit_jcc(out-16); // -4
57871462 2302 emit_teq(d1,d2);
2303 emit_negmi(quotient,quotient);
2304 emit_test(d1,d1);
2305 emit_negmi(remainder,remainder);
2306 }
cf95b4f0 2307 if(dops[i].opcode2==0x1B) // DIVU
57871462 2308 {
cf95b4f0 2309 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2310 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2311 assert(d1>=0);
2312 assert(d2>=0);
2313 signed char quotient=get_reg(i_regs->regmap,LOREG);
2314 signed char remainder=get_reg(i_regs->regmap,HIREG);
2315 assert(quotient>=0);
2316 assert(remainder>=0);
44a80f6a 2317 emit_mov(d1,remainder);
2318 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2319 emit_test(d2,d2);
7c3a5182 2320 emit_jeq(out+40); // Division by zero
665f33e1 2321#ifdef HAVE_ARMV5
57871462 2322 emit_clz(d2,HOST_TEMPREG);
2323 emit_movimm(1<<31,quotient);
2324 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2325#else
2326 emit_movimm(0,HOST_TEMPREG);
82336ba3 2327 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2328 emit_lslpls_imm(d2,1,d2);
7c3a5182 2329 emit_jns(out-2*4);
665f33e1 2330 emit_movimm(1<<31,quotient);
2331#endif
57871462 2332 emit_shr(quotient,HOST_TEMPREG,quotient);
2333 emit_cmp(remainder,d2);
2334 emit_subcs(remainder,d2,remainder);
2335 emit_adcs(quotient,quotient,quotient);
2336 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2337 emit_jcc(out-16); // -4
57871462 2338 }
2339 }
2340 else // 64-bit
71e490c5 2341 assert(0);
57871462 2342 }
2343 else
2344 {
2345 // Multiply by zero is zero.
2346 // MIPS does not have a divide by zero exception.
2347 // The result is undefined, we return zero.
2348 signed char hr=get_reg(i_regs->regmap,HIREG);
2349 signed char lr=get_reg(i_regs->regmap,LOREG);
2350 if(hr>=0) emit_zeroreg(hr);
2351 if(lr>=0) emit_zeroreg(lr);
2352 }
2353}
2354#define multdiv_assemble multdiv_assemble_arm
2355
d1e4ebd9 2356static void do_jump_vaddr(int rs)
2357{
2a014d73 2358 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2359}
2360
e2b5e7aa 2361static void do_preload_rhash(int r) {
57871462 2362 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2363 // register. On ARM the hash can be done with a single instruction (below)
2364}
2365
e2b5e7aa 2366static void do_preload_rhtbl(int ht) {
57871462 2367 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2368}
2369
e2b5e7aa 2370static void do_rhash(int rs,int rh) {
57871462 2371 emit_andimm(rs,0xf8,rh);
2372}
2373
e2b5e7aa 2374static void do_miniht_load(int ht,int rh) {
57871462 2375 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2376 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2377}
2378
e2b5e7aa 2379static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2380 emit_cmp(rh,rs);
2381 emit_ldreq_indexed(ht,4,15);
2382 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2383 if(rs!=7)
2384 emit_mov(rs,7);
2385 rs=7;
57871462 2386 #endif
d1e4ebd9 2387 do_jump_vaddr(rs);
57871462 2388}
2389
e2b5e7aa 2390static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2391 #ifndef HAVE_ARMV7
57871462 2392 emit_movimm(return_address,rt); // PC into link register
643aeae3 2393 add_to_linker(out,return_address,1);
57871462 2394 emit_pcreladdr(temp);
643aeae3 2395 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2396 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2397 #else
2398 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2399 add_to_linker(out,return_address,1);
57871462 2400 emit_pcreladdr(temp);
643aeae3 2401 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2402 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2403 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2404 #endif
2405}
2406
57871462 2407// CPU-architecture-specific initialization
2a014d73 2408static void arch_init(void)
2409{
2410 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2411 struct tramp_insns *ops = ndrc->tramp.ops;
2412 size_t i;
2413 assert(!(diff & 3));
2414 assert(diff < 0x1000);
2415 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2416 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2417 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2418 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2419}
b9b61529 2420
2421// vim:shiftwidth=2:expandtab