drc: try to make some passes not as slow, part 2
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
57871462 104/* Linker */
105
df4dc2b1 106static void set_jump_target(void *addr, void *target_)
57871462 107{
df4dc2b1 108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
57871462 110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 113 assert(((uintptr_t)addr&3)==0);
57871462 114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 126 assert(((uintptr_t)addr&3)==0);
57871462 127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
e2b5e7aa 141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
e2b5e7aa 171#endif
57871462 172
173/* Literal pool */
e2b5e7aa 174static void add_literal(int addr,int val)
57871462 175{
15776b68 176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
9f51b4b9 179 literalcount++;
180}
57871462 181
d148d265 182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
57871462 185{
186 int *ptr=(int *)(stub+4);
d148d265 187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 188 u_int offset=*ptr&0xfff;
d148d265 189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
57871462 191}
192
f968d35d 193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
643aeae3 197static void *get_pointer(void *stub)
57871462 198{
199 //printf("get_pointer(%x)\n",(int)stub);
d148d265 200 int *i_ptr=find_extjump_insn(stub);
3d680478 201 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 202 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 203}
204
205// Find the "clean" entry point from a "dirty" entry point
206// by skipping past the call to verify_code
df4dc2b1 207static void *get_clean_addr(void *addr)
57871462 208{
df4dc2b1 209 signed int *ptr = addr;
665f33e1 210 #ifndef HAVE_ARMV7
57871462 211 ptr+=4;
212 #else
213 ptr+=6;
214 #endif
215 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
216 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
217 ptr++;
218 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 219 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 220 }
df4dc2b1 221 return ptr;
57871462 222}
223
3968e69e 224static int verify_dirty(const u_int *ptr)
57871462 225{
665f33e1 226 #ifndef HAVE_ARMV7
16c8be17 227 u_int offset;
57871462 228 // get from literal pool
15776b68 229 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 230 offset=*ptr&0xfff;
231 u_int source=*(u_int*)((void *)ptr+offset+8);
232 ptr++;
233 assert((*ptr&0xFFFF0000)==0xe59f0000);
234 offset=*ptr&0xfff;
235 u_int copy=*(u_int*)((void *)ptr+offset+8);
236 ptr++;
237 assert((*ptr&0xFFFF0000)==0xe59f0000);
238 offset=*ptr&0xfff;
239 u_int len=*(u_int*)((void *)ptr+offset+8);
240 ptr++;
241 ptr++;
57871462 242 #else
243 // ARMv7 movw/movt
244 assert((*ptr&0xFFF00000)==0xe3000000);
245 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
246 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
247 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
248 ptr+=6;
249 #endif
250 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
251 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 252 //printf("verify_dirty: %x %x %x\n",source,copy,len);
253 return !memcmp((void *)source,(void *)copy,len);
254}
255
256// This doesn't necessarily find all clean entry points, just
257// guarantees that it's not dirty
df4dc2b1 258static int isclean(void *addr)
57871462 259{
665f33e1 260 #ifndef HAVE_ARMV7
581335b0 261 u_int *ptr=((u_int *)addr)+4;
57871462 262 #else
581335b0 263 u_int *ptr=((u_int *)addr)+6;
57871462 264 #endif
265 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
266 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
267 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
57871462 268 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
269 return 1;
270}
271
4a35de07 272// get source that block at addr was compiled from (host pointers)
01d26796 273static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 274{
643aeae3 275 u_int *ptr = addr;
665f33e1 276 #ifndef HAVE_ARMV7
16c8be17 277 u_int offset;
57871462 278 // get from literal pool
15776b68 279 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 280 offset=*ptr&0xfff;
281 u_int source=*(u_int*)((void *)ptr+offset+8);
282 ptr++;
283 //assert((*ptr&0xFFFF0000)==0xe59f0000);
284 //offset=*ptr&0xfff;
285 //u_int copy=*(u_int*)((void *)ptr+offset+8);
286 ptr++;
287 assert((*ptr&0xFFFF0000)==0xe59f0000);
288 offset=*ptr&0xfff;
289 u_int len=*(u_int*)((void *)ptr+offset+8);
290 ptr++;
291 ptr++;
57871462 292 #else
293 // ARMv7 movw/movt
294 assert((*ptr&0xFFF00000)==0xe3000000);
295 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
296 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
297 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
298 ptr+=6;
299 #endif
300 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
301 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 302 *start=(u_char *)source;
303 *end=(u_char *)source+len;
57871462 304}
305
57871462 306// Allocate a specific ARM register.
e2b5e7aa 307static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 308{
309 int n;
f776eb14 310 int dirty=0;
9f51b4b9 311
57871462 312 // see if it's already allocated (and dealloc it)
313 for(n=0;n<HOST_REGS;n++)
314 {
f776eb14 315 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
316 dirty=(cur->dirty>>n)&1;
317 cur->regmap[n]=-1;
318 }
57871462 319 }
9f51b4b9 320
57871462 321 cur->regmap[hr]=reg;
322 cur->dirty&=~(1<<hr);
f776eb14 323 cur->dirty|=dirty<<hr;
57871462 324 cur->isconst&=~(1<<hr);
325}
326
327// Alloc cycle count into dedicated register
e2b5e7aa 328static void alloc_cc(struct regstat *cur,int i)
57871462 329{
330 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
331}
332
57871462 333/* Assembler */
334
e2b5e7aa 335static unused char regname[16][4] = {
57871462 336 "r0",
337 "r1",
338 "r2",
339 "r3",
340 "r4",
341 "r5",
342 "r6",
343 "r7",
344 "r8",
345 "r9",
346 "r10",
347 "fp",
348 "r12",
349 "sp",
350 "lr",
351 "pc"};
352
e2b5e7aa 353static void output_w32(u_int word)
57871462 354{
355 *((u_int *)out)=word;
356 out+=4;
357}
e2b5e7aa 358
359static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 360{
361 assert(rd<16);
362 assert(rn<16);
363 assert(rm<16);
364 return((rn<<16)|(rd<<12)|rm);
365}
e2b5e7aa 366
367static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 368{
369 assert(rd<16);
370 assert(rn<16);
371 assert(imm<256);
372 assert((shift&1)==0);
373 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
374}
e2b5e7aa 375
376static u_int genimm(u_int imm,u_int *encoded)
57871462 377{
c2e3bd42 378 *encoded=0;
379 if(imm==0) return 1;
57871462 380 int i=32;
381 while(i>0)
382 {
383 if(imm<256) {
384 *encoded=((i&30)<<7)|imm;
385 return 1;
386 }
387 imm=(imm>>2)|(imm<<30);i-=2;
388 }
389 return 0;
390}
e2b5e7aa 391
392static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 393{
394 u_int ret=genimm(imm,encoded);
395 assert(ret);
581335b0 396 (void)ret;
cfbd3c6e 397}
e2b5e7aa 398
399static u_int genjmp(u_int addr)
57871462 400{
7c3a5182 401 if (addr < 3) return 0; // a branch that will be patched later
402 int offset = addr-(int)out-8;
403 if (offset < -33554432 || offset >= 33554432) {
404 SysPrintf("genjmp: out of range: %08x\n", offset);
405 abort();
e80343e2 406 return 0;
407 }
57871462 408 return ((u_int)offset>>2)&0xffffff;
409}
410
d1e4ebd9 411static unused void emit_breakpoint(void)
412{
413 assem_debug("bkpt #0\n");
414 //output_w32(0xe1200070);
415 output_w32(0xe7f001f0);
416}
417
e2b5e7aa 418static void emit_mov(int rs,int rt)
57871462 419{
420 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
421 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
422}
423
e2b5e7aa 424static void emit_movs(int rs,int rt)
57871462 425{
426 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
427 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
428}
429
e2b5e7aa 430static void emit_add(int rs1,int rs2,int rt)
57871462 431{
432 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
433 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
434}
435
39b71d9a 436static void emit_adds(int rs1,int rs2,int rt)
437{
438 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
439 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
440}
441#define emit_adds_ptr emit_adds
442
e2b5e7aa 443static void emit_adcs(int rs1,int rs2,int rt)
57871462 444{
445 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
446 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
447}
448
e2b5e7aa 449static void emit_neg(int rs, int rt)
57871462 450{
451 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
452 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
453}
454
e2b5e7aa 455static void emit_sub(int rs1,int rs2,int rt)
57871462 456{
457 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
458 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
459}
460
e2b5e7aa 461static void emit_zeroreg(int rt)
57871462 462{
463 assem_debug("mov %s,#0\n",regname[rt]);
464 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
465}
466
e2b5e7aa 467static void emit_loadlp(u_int imm,u_int rt)
790ee18e 468{
469 add_literal((int)out,imm);
470 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
471 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
472}
e2b5e7aa 473
33788798 474#ifdef HAVE_ARMV7
e2b5e7aa 475static void emit_movw(u_int imm,u_int rt)
790ee18e 476{
477 assert(imm<65536);
478 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
479 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
480}
e2b5e7aa 481
482static void emit_movt(u_int imm,u_int rt)
790ee18e 483{
484 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
485 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
486}
33788798 487#endif
e2b5e7aa 488
489static void emit_movimm(u_int imm,u_int rt)
790ee18e 490{
491 u_int armval;
492 if(genimm(imm,&armval)) {
493 assem_debug("mov %s,#%d\n",regname[rt],imm);
494 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
495 }else if(genimm(~imm,&armval)) {
496 assem_debug("mvn %s,#%d\n",regname[rt],imm);
497 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
498 }else if(imm<65536) {
665f33e1 499 #ifndef HAVE_ARMV7
790ee18e 500 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
501 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
502 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
503 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
504 #else
505 emit_movw(imm,rt);
506 #endif
507 }else{
665f33e1 508 #ifndef HAVE_ARMV7
790ee18e 509 emit_loadlp(imm,rt);
510 #else
511 emit_movw(imm&0x0000FFFF,rt);
512 emit_movt(imm&0xFFFF0000,rt);
513 #endif
514 }
515}
e2b5e7aa 516
517static void emit_pcreladdr(u_int rt)
790ee18e 518{
519 assem_debug("add %s,pc,#?\n",regname[rt]);
520 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
521}
522
e2b5e7aa 523static void emit_loadreg(int r, int hr)
57871462 524{
53358c1d 525 assert(hr != EXCLUDE_REG);
526 if (r == 0)
57871462 527 emit_zeroreg(hr);
528 else {
33788798 529 void *addr;
7c3a5182 530 switch (r) {
531 //case HIREG: addr = &hi; break;
532 //case LOREG: addr = &lo; break;
33788798 533 case CCREG: addr = &cycle_count; break;
534 case CSREG: addr = &Status; break;
535 case INVCP: addr = &invc_ptr; break;
536 case ROREG: addr = &ram_offset; break;
537 default:
538 assert(r < 34);
539 addr = &psxRegs.GPR.r[r];
540 break;
7c3a5182 541 }
33788798 542 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 543 assert(offset<4096);
6cc8d23c 544 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 545 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
546 }
547}
e2b5e7aa 548
549static void emit_storereg(int r, int hr)
57871462 550{
53358c1d 551 assert(hr != EXCLUDE_REG);
7c3a5182 552 int addr = (int)&psxRegs.GPR.r[r];
553 switch (r) {
554 //case HIREG: addr = &hi; break;
555 //case LOREG: addr = &lo; break;
556 case CCREG: addr = (int)&cycle_count; break;
557 default: assert(r < 34); break;
558 }
57871462 559 u_int offset = addr-(u_int)&dynarec_local;
560 assert(offset<4096);
6cc8d23c 561 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 562 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
563}
564
e2b5e7aa 565static void emit_test(int rs, int rt)
57871462 566{
567 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
568 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
569}
570
e2b5e7aa 571static void emit_testimm(int rs,int imm)
57871462 572{
573 u_int armval;
5a05d80c 574 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 575 genimm_checked(imm,&armval);
57871462 576 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
577}
578
e2b5e7aa 579static void emit_testeqimm(int rs,int imm)
b9b61529 580{
581 u_int armval;
582 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 583 genimm_checked(imm,&armval);
b9b61529 584 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
585}
586
e2b5e7aa 587static void emit_not(int rs,int rt)
57871462 588{
589 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
590 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
591}
592
e2b5e7aa 593static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 594{
595 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
596 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
597}
598
e2b5e7aa 599static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 600{
601 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
602 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
603}
e2b5e7aa 604
e2b5e7aa 605static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 606{
607 assert(rs<16);
608 assert(rt<16);
609 assert(imm<32);
610 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
611 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
612}
613
e2b5e7aa 614static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 615{
616 assert(rs<16);
617 assert(rt<16);
618 assert(imm<32);
619 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
620 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
621}
622
e2b5e7aa 623static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 624{
625 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
626 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
627}
628
3968e69e 629static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
630{
631 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
632 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
633}
634
e2b5e7aa 635static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 636{
637 assert(rs<16);
638 assert(rt<16);
639 if(imm!=0) {
57871462 640 u_int armval;
641 if(genimm(imm,&armval)) {
642 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
643 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
644 }else if(genimm(-imm,&armval)) {
8a0a8423 645 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 646 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 647 #ifdef HAVE_ARMV7
648 }else if(rt!=rs&&(u_int)imm<65536) {
649 emit_movw(imm&0x0000ffff,rt);
650 emit_add(rs,rt,rt);
651 }else if(rt!=rs&&(u_int)-imm<65536) {
652 emit_movw(-imm&0x0000ffff,rt);
653 emit_sub(rs,rt,rt);
654 #endif
655 }else if((u_int)-imm<65536) {
57871462 656 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
657 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
658 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
659 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 660 }else {
661 do {
662 int shift = (ffs(imm) - 1) & ~1;
663 int imm8 = imm & (0xff << shift);
664 genimm_checked(imm8,&armval);
665 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
666 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
667 rs = rt;
668 imm &= ~imm8;
669 }
670 while (imm != 0);
57871462 671 }
672 }
673 else if(rs!=rt) emit_mov(rs,rt);
674}
675
e2b5e7aa 676static void emit_addimm_and_set_flags(int imm,int rt)
57871462 677{
678 assert(imm>-65536&&imm<65536);
679 u_int armval;
680 if(genimm(imm,&armval)) {
681 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
682 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
683 }else if(genimm(-imm,&armval)) {
684 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
685 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
686 }else if(imm<0) {
687 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
688 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
689 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
690 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
691 }else{
692 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
693 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
694 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
695 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
696 }
697}
e2b5e7aa 698
e2b5e7aa 699static void emit_addnop(u_int r)
57871462 700{
701 assert(r<16);
702 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
703 output_w32(0xe2800000|rd_rn_rm(r,r,0));
704}
705
e2b5e7aa 706static void emit_andimm(int rs,int imm,int rt)
57871462 707{
708 u_int armval;
790ee18e 709 if(imm==0) {
710 emit_zeroreg(rt);
711 }else if(genimm(imm,&armval)) {
57871462 712 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
713 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
714 }else if(genimm(~imm,&armval)) {
715 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
716 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
717 }else if(imm==65535) {
332a4533 718 #ifndef HAVE_ARMV6
57871462 719 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
720 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
721 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
722 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
723 #else
724 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
725 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
726 #endif
727 }else{
728 assert(imm>0&&imm<65535);
665f33e1 729 #ifndef HAVE_ARMV7
57871462 730 assem_debug("mov r14,#%d\n",imm&0xFF00);
731 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
732 assem_debug("add r14,r14,#%d\n",imm&0xFF);
733 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
734 #else
735 emit_movw(imm,HOST_TEMPREG);
736 #endif
737 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
738 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
739 }
740}
741
e2b5e7aa 742static void emit_orimm(int rs,int imm,int rt)
57871462 743{
744 u_int armval;
790ee18e 745 if(imm==0) {
746 if(rs!=rt) emit_mov(rs,rt);
747 }else if(genimm(imm,&armval)) {
57871462 748 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
749 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
750 }else{
751 assert(imm>0&&imm<65536);
752 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
753 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
754 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
755 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
756 }
757}
758
e2b5e7aa 759static void emit_xorimm(int rs,int imm,int rt)
57871462 760{
57871462 761 u_int armval;
790ee18e 762 if(imm==0) {
763 if(rs!=rt) emit_mov(rs,rt);
764 }else if(genimm(imm,&armval)) {
57871462 765 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
766 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
767 }else{
514ed0d9 768 assert(imm>0&&imm<65536);
57871462 769 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
770 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
771 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
772 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
773 }
774}
775
e2b5e7aa 776static void emit_shlimm(int rs,u_int imm,int rt)
57871462 777{
778 assert(imm>0);
779 assert(imm<32);
780 //if(imm==1) ...
781 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
782 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
783}
784
e2b5e7aa 785static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 786{
787 assert(imm>0);
788 assert(imm<32);
789 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
790 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
791}
792
e2b5e7aa 793static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 794{
795 assert(imm>0);
796 assert(imm<32);
797 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
798 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
799}
800
e2b5e7aa 801static void emit_shrimm(int rs,u_int imm,int rt)
57871462 802{
803 assert(imm>0);
804 assert(imm<32);
805 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
806 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
807}
808
e2b5e7aa 809static void emit_sarimm(int rs,u_int imm,int rt)
57871462 810{
811 assert(imm>0);
812 assert(imm<32);
813 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
814 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
815}
816
e2b5e7aa 817static void emit_rorimm(int rs,u_int imm,int rt)
57871462 818{
819 assert(imm>0);
820 assert(imm<32);
821 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
822 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
823}
824
e2b5e7aa 825static void emit_signextend16(int rs,int rt)
b9b61529 826{
332a4533 827 #ifndef HAVE_ARMV6
b9b61529 828 emit_shlimm(rs,16,rt);
829 emit_sarimm(rt,16,rt);
830 #else
831 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
832 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
833 #endif
834}
835
e2b5e7aa 836static void emit_signextend8(int rs,int rt)
c6c3b1b3 837{
332a4533 838 #ifndef HAVE_ARMV6
c6c3b1b3 839 emit_shlimm(rs,24,rt);
840 emit_sarimm(rt,24,rt);
841 #else
842 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
843 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
844 #endif
845}
846
e2b5e7aa 847static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 848{
849 assert(rs<16);
850 assert(rt<16);
851 assert(shift<16);
852 //if(imm==1) ...
853 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
854 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
855}
e2b5e7aa 856
857static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 858{
859 assert(rs<16);
860 assert(rt<16);
861 assert(shift<16);
862 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
863 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
864}
e2b5e7aa 865
866static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 867{
868 assert(rs<16);
869 assert(rt<16);
870 assert(shift<16);
871 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
872 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
873}
57871462 874
3968e69e 875static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 876{
877 assert(rs<16);
878 assert(rt<16);
879 assert(shift<16);
880 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
881 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
882}
e2b5e7aa 883
3968e69e 884static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 885{
886 assert(rs<16);
887 assert(rt<16);
888 assert(shift<16);
889 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
890 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
891}
892
e2b5e7aa 893static void emit_cmpimm(int rs,int imm)
57871462 894{
895 u_int armval;
896 if(genimm(imm,&armval)) {
5a05d80c 897 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 898 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
899 }else if(genimm(-imm,&armval)) {
5a05d80c 900 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 901 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
902 }else if(imm>0) {
903 assert(imm<65536);
57871462 904 emit_movimm(imm,HOST_TEMPREG);
57871462 905 assem_debug("cmp %s,r14\n",regname[rs]);
906 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
907 }else{
908 assert(imm>-65536);
57871462 909 emit_movimm(-imm,HOST_TEMPREG);
57871462 910 assem_debug("cmn %s,r14\n",regname[rs]);
911 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
912 }
913}
914
e2b5e7aa 915static void emit_cmovne_imm(int imm,int rt)
57871462 916{
917 assem_debug("movne %s,#%d\n",regname[rt],imm);
918 u_int armval;
cfbd3c6e 919 genimm_checked(imm,&armval);
57871462 920 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
921}
e2b5e7aa 922
923static void emit_cmovl_imm(int imm,int rt)
57871462 924{
925 assem_debug("movlt %s,#%d\n",regname[rt],imm);
926 u_int armval;
cfbd3c6e 927 genimm_checked(imm,&armval);
57871462 928 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
929}
e2b5e7aa 930
931static void emit_cmovb_imm(int imm,int rt)
57871462 932{
933 assem_debug("movcc %s,#%d\n",regname[rt],imm);
934 u_int armval;
cfbd3c6e 935 genimm_checked(imm,&armval);
57871462 936 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
937}
e2b5e7aa 938
3968e69e 939static void emit_cmovae_imm(int imm,int rt)
940{
941 assem_debug("movcs %s,#%d\n",regname[rt],imm);
942 u_int armval;
943 genimm_checked(imm,&armval);
944 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
945}
946
9c997d19 947static void emit_cmovs_imm(int imm,int rt)
948{
949 assem_debug("movmi %s,#%d\n",regname[rt],imm);
950 u_int armval;
951 genimm_checked(imm,&armval);
952 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
953}
954
e2b5e7aa 955static void emit_cmovne_reg(int rs,int rt)
57871462 956{
957 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
958 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
959}
e2b5e7aa 960
961static void emit_cmovl_reg(int rs,int rt)
57871462 962{
963 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
964 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
965}
e2b5e7aa 966
e3c6bdb5 967static void emit_cmovb_reg(int rs,int rt)
968{
969 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
970 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
971}
972
e2b5e7aa 973static void emit_cmovs_reg(int rs,int rt)
57871462 974{
975 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
976 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
977}
978
e2b5e7aa 979static void emit_slti32(int rs,int imm,int rt)
57871462 980{
981 if(rs!=rt) emit_zeroreg(rt);
982 emit_cmpimm(rs,imm);
983 if(rs==rt) emit_movimm(0,rt);
984 emit_cmovl_imm(1,rt);
985}
e2b5e7aa 986
987static void emit_sltiu32(int rs,int imm,int rt)
57871462 988{
989 if(rs!=rt) emit_zeroreg(rt);
990 emit_cmpimm(rs,imm);
991 if(rs==rt) emit_movimm(0,rt);
992 emit_cmovb_imm(1,rt);
993}
e2b5e7aa 994
e2b5e7aa 995static void emit_cmp(int rs,int rt)
57871462 996{
997 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
998 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
999}
e2b5e7aa 1000
1001static void emit_set_gz32(int rs, int rt)
57871462 1002{
1003 //assem_debug("set_gz32\n");
1004 emit_cmpimm(rs,1);
1005 emit_movimm(1,rt);
1006 emit_cmovl_imm(0,rt);
1007}
e2b5e7aa 1008
1009static void emit_set_nz32(int rs, int rt)
57871462 1010{
1011 //assem_debug("set_nz32\n");
1012 if(rs!=rt) emit_movs(rs,rt);
1013 else emit_test(rs,rs);
1014 emit_cmovne_imm(1,rt);
1015}
e2b5e7aa 1016
e2b5e7aa 1017static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1018{
1019 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1020 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1021 emit_cmp(rs1,rs2);
1022 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1023 emit_cmovl_imm(1,rt);
1024}
e2b5e7aa 1025
1026static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1027{
1028 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1029 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1030 emit_cmp(rs1,rs2);
1031 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1032 emit_cmovb_imm(1,rt);
1033}
e2b5e7aa 1034
2a014d73 1035static int can_jump_or_call(const void *a)
1036{
1037 intptr_t offset = (u_char *)a - out - 8;
1038 return (-33554432 <= offset && offset < 33554432);
1039}
1040
643aeae3 1041static void emit_call(const void *a_)
57871462 1042{
643aeae3 1043 int a = (int)a_;
d1e4ebd9 1044 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1045 u_int offset=genjmp(a);
1046 output_w32(0xeb000000|offset);
1047}
e2b5e7aa 1048
b14b6a8f 1049static void emit_jmp(const void *a_)
57871462 1050{
b14b6a8f 1051 int a = (int)a_;
d1e4ebd9 1052 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 1053 u_int offset=genjmp(a);
1054 output_w32(0xea000000|offset);
1055}
e2b5e7aa 1056
643aeae3 1057static void emit_jne(const void *a_)
57871462 1058{
643aeae3 1059 int a = (int)a_;
57871462 1060 assem_debug("bne %x\n",a);
1061 u_int offset=genjmp(a);
1062 output_w32(0x1a000000|offset);
1063}
e2b5e7aa 1064
7c3a5182 1065static void emit_jeq(const void *a_)
57871462 1066{
7c3a5182 1067 int a = (int)a_;
57871462 1068 assem_debug("beq %x\n",a);
1069 u_int offset=genjmp(a);
1070 output_w32(0x0a000000|offset);
1071}
e2b5e7aa 1072
7c3a5182 1073static void emit_js(const void *a_)
57871462 1074{
7c3a5182 1075 int a = (int)a_;
57871462 1076 assem_debug("bmi %x\n",a);
1077 u_int offset=genjmp(a);
1078 output_w32(0x4a000000|offset);
1079}
e2b5e7aa 1080
7c3a5182 1081static void emit_jns(const void *a_)
57871462 1082{
7c3a5182 1083 int a = (int)a_;
57871462 1084 assem_debug("bpl %x\n",a);
1085 u_int offset=genjmp(a);
1086 output_w32(0x5a000000|offset);
1087}
e2b5e7aa 1088
7c3a5182 1089static void emit_jl(const void *a_)
57871462 1090{
7c3a5182 1091 int a = (int)a_;
57871462 1092 assem_debug("blt %x\n",a);
1093 u_int offset=genjmp(a);
1094 output_w32(0xba000000|offset);
1095}
e2b5e7aa 1096
7c3a5182 1097static void emit_jge(const void *a_)
57871462 1098{
7c3a5182 1099 int a = (int)a_;
57871462 1100 assem_debug("bge %x\n",a);
1101 u_int offset=genjmp(a);
1102 output_w32(0xaa000000|offset);
1103}
e2b5e7aa 1104
7c3a5182 1105static void emit_jno(const void *a_)
57871462 1106{
7c3a5182 1107 int a = (int)a_;
57871462 1108 assem_debug("bvc %x\n",a);
1109 u_int offset=genjmp(a);
1110 output_w32(0x7a000000|offset);
1111}
e2b5e7aa 1112
7c3a5182 1113static void emit_jc(const void *a_)
57871462 1114{
7c3a5182 1115 int a = (int)a_;
57871462 1116 assem_debug("bcs %x\n",a);
1117 u_int offset=genjmp(a);
1118 output_w32(0x2a000000|offset);
1119}
e2b5e7aa 1120
7c3a5182 1121static void emit_jcc(const void *a_)
57871462 1122{
b14b6a8f 1123 int a = (int)a_;
57871462 1124 assem_debug("bcc %x\n",a);
1125 u_int offset=genjmp(a);
1126 output_w32(0x3a000000|offset);
1127}
1128
3968e69e 1129static unused void emit_callreg(u_int r)
57871462 1130{
c6c3b1b3 1131 assert(r<15);
1132 assem_debug("blx %s\n",regname[r]);
1133 output_w32(0xe12fff30|r);
57871462 1134}
e2b5e7aa 1135
1136static void emit_jmpreg(u_int r)
57871462 1137{
1138 assem_debug("mov pc,%s\n",regname[r]);
1139 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1140}
1141
be516ebe 1142static void emit_ret(void)
1143{
1144 emit_jmpreg(14);
1145}
1146
e2b5e7aa 1147static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1148{
1149 assert(offset>-4096&&offset<4096);
1150 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1151 if(offset>=0) {
1152 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1153 }else{
1154 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1155 }
1156}
e2b5e7aa 1157
1158static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1159{
1160 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1161 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1162}
39b71d9a 1163#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1164
1165static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1166{
1167 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1168 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1169}
e2b5e7aa 1170
1171static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1172{
1173 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1174 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1175}
e2b5e7aa 1176
37387d8b 1177static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1178{
1179 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1180 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1181}
1182
e2b5e7aa 1183static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1184{
1185 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1186 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1187}
e2b5e7aa 1188
37387d8b 1189static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1190{
1191 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1192 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1193}
1194
e2b5e7aa 1195static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1196{
1197 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1198 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1199}
e2b5e7aa 1200
37387d8b 1201static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1202{
1203 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1204 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1205}
1206
e2b5e7aa 1207static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1208{
1209 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1210 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1211}
e2b5e7aa 1212
37387d8b 1213static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1214{
1215 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1216 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1217}
1218
e2b5e7aa 1219static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1220{
1221 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1222 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1223}
1224
1225static void emit_str_dualindexed(int rs1, int rs2, int rt)
1226{
1227 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1228 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1229}
1230
1231static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1232{
1233 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1234 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1235}
1236
1237static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1238{
1239 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1240 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1241}
e2b5e7aa 1242
e2b5e7aa 1243static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1244{
1245 assert(offset>-256&&offset<256);
1246 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1247 if(offset>=0) {
1248 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1249 }else{
1250 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1251 }
1252}
e2b5e7aa 1253
e2b5e7aa 1254static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1255{
1256 assert(offset>-256&&offset<256);
1257 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1258 if(offset>=0) {
1259 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1260 }else{
1261 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1262 }
1263}
e2b5e7aa 1264
1265static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1266{
1267 assert(offset>-4096&&offset<4096);
1268 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1269 if(offset>=0) {
1270 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1271 }else{
1272 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1273 }
1274}
e2b5e7aa 1275
e2b5e7aa 1276static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1277{
1278 assert(offset>-256&&offset<256);
1279 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1280 if(offset>=0) {
1281 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1282 }else{
1283 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1284 }
1285}
e2b5e7aa 1286
054175e9 1287static void emit_ldrd(int offset, int rs, int rt)
1288{
1289 assert(offset>-256&&offset<256);
1290 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1291 if(offset>=0) {
1292 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1293 }else{
1294 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1295 }
1296}
e2b5e7aa 1297
643aeae3 1298static void emit_readword(void *addr, int rt)
57871462 1299{
643aeae3 1300 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1301 assert(offset<4096);
1302 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1303 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1304}
39b71d9a 1305#define emit_readptr emit_readword
e2b5e7aa 1306
e2b5e7aa 1307static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1308{
1309 assert(offset>-4096&&offset<4096);
1310 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1311 if(offset>=0) {
1312 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1313 }else{
1314 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1315 }
1316}
e2b5e7aa 1317
e2b5e7aa 1318static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1319{
1320 assert(offset>-256&&offset<256);
1321 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1322 if(offset>=0) {
1323 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1324 }else{
1325 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1326 }
1327}
e2b5e7aa 1328
1329static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1330{
1331 assert(offset>-4096&&offset<4096);
1332 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1333 if(offset>=0) {
1334 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1335 }else{
1336 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1337 }
1338}
e2b5e7aa 1339
e2b5e7aa 1340static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1341{
1342 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1343 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1344}
e2b5e7aa 1345
1346static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1347{
1348 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1349 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1350}
e2b5e7aa 1351
1352static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1353{
1354 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1355 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1356}
e2b5e7aa 1357
643aeae3 1358static void emit_writeword(int rt, void *addr)
57871462 1359{
643aeae3 1360 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1361 assert(offset<4096);
1362 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1363 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1364}
e2b5e7aa 1365
e2b5e7aa 1366static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1367{
1368 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1369 assert(rs1<16);
1370 assert(rs2<16);
1371 assert(hi<16);
1372 assert(lo<16);
1373 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1374}
e2b5e7aa 1375
1376static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1377{
1378 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1379 assert(rs1<16);
1380 assert(rs2<16);
1381 assert(hi<16);
1382 assert(lo<16);
1383 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1384}
1385
e2b5e7aa 1386static void emit_clz(int rs,int rt)
57871462 1387{
1388 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1389 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1390}
1391
e2b5e7aa 1392static void emit_subcs(int rs1,int rs2,int rt)
57871462 1393{
1394 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1395 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1396}
1397
e2b5e7aa 1398static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1399{
1400 assert(imm>0);
1401 assert(imm<32);
1402 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1403 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1404}
1405
e2b5e7aa 1406static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1407{
1408 assert(imm>0);
1409 assert(imm<32);
1410 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1411 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1412}
1413
e2b5e7aa 1414static void emit_negmi(int rs, int rt)
57871462 1415{
1416 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1417 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1418}
1419
e2b5e7aa 1420static void emit_negsmi(int rs, int rt)
57871462 1421{
1422 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1423 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1424}
1425
e2b5e7aa 1426static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1427{
1428 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1429 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1430}
1431
e2b5e7aa 1432static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1433{
1434 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1435 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1436}
1437
e2b5e7aa 1438static void emit_teq(int rs, int rt)
57871462 1439{
1440 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1441 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1442}
1443
3968e69e 1444static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1445{
1446 u_int armval;
cfbd3c6e 1447 genimm_checked(imm,&armval);
57871462 1448 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1449 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1450}
1451
57871462 1452// Conditionally select one of two immediates, optimizing for small code size
1453// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1454static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1455{
1456 u_int armval;
1457 if(genimm(imm2-imm1,&armval)) {
1458 emit_movimm(imm1,rt);
1459 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1460 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1461 }else if(genimm(imm1-imm2,&armval)) {
1462 emit_movimm(imm1,rt);
1463 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1464 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1465 }
1466 else {
665f33e1 1467 #ifndef HAVE_ARMV7
57871462 1468 emit_movimm(imm1,rt);
1469 add_literal((int)out,imm2);
1470 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1471 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1472 #else
1473 emit_movw(imm1&0x0000FFFF,rt);
1474 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1475 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1476 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1477 }
1478 emit_movt(imm1&0xFFFF0000,rt);
1479 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1480 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1481 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1482 }
1483 #endif
1484 }
1485}
1486
57871462 1487// special case for checking invalid_code
e2b5e7aa 1488static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1489{
1490 assert(imm<128&&imm>=0);
1491 assert(r>=0&&r<16);
1492 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1493 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1494 emit_cmpimm(HOST_TEMPREG,imm);
1495}
1496
e2b5e7aa 1497static void emit_callne(int a)
0bbd1454 1498{
1499 assem_debug("blne %x\n",a);
1500 u_int offset=genjmp(a);
1501 output_w32(0x1b000000|offset);
1502}
1503
57871462 1504// Used to preload hash table entries
e2b5e7aa 1505static unused void emit_prefetchreg(int r)
57871462 1506{
1507 assem_debug("pld %s\n",regname[r]);
1508 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1509}
1510
1511// Special case for mini_ht
e2b5e7aa 1512static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1513{
1514 assert(offset<4096);
1515 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1516 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1517}
1518
e2b5e7aa 1519static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1520{
1521 u_int armval;
cfbd3c6e 1522 genimm_checked(imm,&armval);
b9b61529 1523 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1524 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1525}
1526
e2b5e7aa 1527static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1528{
1529 u_int armval;
1530 genimm_checked(imm,&armval);
1531 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1532 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1533}
1534
e2b5e7aa 1535static void emit_jno_unlikely(int a)
57871462 1536{
1537 //emit_jno(a);
1538 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1539 output_w32(0x72800000|rd_rn_rm(15,15,0));
1540}
1541
054175e9 1542static void save_regs_all(u_int reglist)
57871462 1543{
054175e9 1544 int i;
57871462 1545 if(!reglist) return;
1546 assem_debug("stmia fp,{");
054175e9 1547 for(i=0;i<16;i++)
1548 if(reglist&(1<<i))
1549 assem_debug("r%d,",i);
57871462 1550 assem_debug("}\n");
1551 output_w32(0xe88b0000|reglist);
1552}
e2b5e7aa 1553
054175e9 1554static void restore_regs_all(u_int reglist)
57871462 1555{
054175e9 1556 int i;
57871462 1557 if(!reglist) return;
1558 assem_debug("ldmia fp,{");
054175e9 1559 for(i=0;i<16;i++)
1560 if(reglist&(1<<i))
1561 assem_debug("r%d,",i);
57871462 1562 assem_debug("}\n");
1563 output_w32(0xe89b0000|reglist);
1564}
e2b5e7aa 1565
054175e9 1566// Save registers before function call
1567static void save_regs(u_int reglist)
1568{
4d646738 1569 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1570 save_regs_all(reglist);
1571}
e2b5e7aa 1572
054175e9 1573// Restore registers after function call
1574static void restore_regs(u_int reglist)
1575{
4d646738 1576 reglist&=CALLER_SAVE_REGS;
054175e9 1577 restore_regs_all(reglist);
1578}
57871462 1579
57871462 1580/* Stubs/epilogue */
1581
e2b5e7aa 1582static void literal_pool(int n)
57871462 1583{
1584 if(!literalcount) return;
1585 if(n) {
1586 if((int)out-literals[0][0]<4096-n) return;
1587 }
1588 u_int *ptr;
1589 int i;
1590 for(i=0;i<literalcount;i++)
1591 {
77750690 1592 u_int l_addr=(u_int)out;
1593 int j;
1594 for(j=0;j<i;j++) {
1595 if(literals[j][1]==literals[i][1]) {
1596 //printf("dup %08x\n",literals[i][1]);
1597 l_addr=literals[j][0];
1598 break;
1599 }
1600 }
57871462 1601 ptr=(u_int *)literals[i][0];
77750690 1602 u_int offset=l_addr-(u_int)ptr-8;
57871462 1603 assert(offset<4096);
1604 assert(!(offset&3));
1605 *ptr|=offset;
77750690 1606 if(l_addr==(u_int)out) {
1607 literals[i][0]=l_addr; // remember for dupes
1608 output_w32(literals[i][1]);
1609 }
57871462 1610 }
1611 literalcount=0;
1612}
1613
e2b5e7aa 1614static void literal_pool_jumpover(int n)
57871462 1615{
1616 if(!literalcount) return;
1617 if(n) {
1618 if((int)out-literals[0][0]<4096-n) return;
1619 }
df4dc2b1 1620 void *jaddr = out;
57871462 1621 emit_jmp(0);
1622 literal_pool(0);
df4dc2b1 1623 set_jump_target(jaddr, out);
57871462 1624}
1625
7c3a5182 1626// parsed by get_pointer, find_extjump_insn
1627static void emit_extjump2(u_char *addr, u_int target, void *linker)
57871462 1628{
1629 u_char *ptr=(u_char *)addr;
1630 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1631 (void)ptr;
1632
57871462 1633 emit_loadlp(target,0);
643aeae3 1634 emit_loadlp((u_int)addr,1);
66ea165f 1635 assert(ndrc->translation_cache <= addr &&
1636 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
57871462 1637 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1638//DEBUG >
1639#ifdef DEBUG_CYCLE_COUNT
643aeae3 1640 emit_readword(&last_count,ECX);
57871462 1641 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1642 emit_readword(&next_interupt,ECX);
1643 emit_writeword(HOST_CCREG,&Count);
57871462 1644 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1645 emit_writeword(ECX,&last_count);
57871462 1646#endif
1647//DEBUG <
2a014d73 1648 emit_far_jump(linker);
57871462 1649}
1650
d1e4ebd9 1651static void check_extjump2(void *src)
1652{
1653 u_int *ptr = src;
1654 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1655 (void)ptr;
1656}
1657
13e35c04 1658// put rt_val into rt, potentially making use of rs with value rs_val
1659static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1660{
8575a877 1661 u_int armval;
1662 int diff;
1663 if(genimm(rt_val,&armval)) {
1664 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1665 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1666 return;
1667 }
1668 if(genimm(~rt_val,&armval)) {
1669 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1670 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1671 return;
1672 }
1673 diff=rt_val-rs_val;
1674 if(genimm(diff,&armval)) {
1675 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1676 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1677 return;
1678 }else if(genimm(-diff,&armval)) {
1679 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1680 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1681 return;
1682 }
1683 emit_movimm(rt_val,rt);
1684}
1685
1686// return 1 if above function can do it's job cheaply
1687static int is_similar_value(u_int v1,u_int v2)
1688{
13e35c04 1689 u_int xs;
8575a877 1690 int diff;
1691 if(v1==v2) return 1;
1692 diff=v2-v1;
1693 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1694 ;
8575a877 1695 if(xs<0x100) return 1;
1696 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1697 ;
1698 if(xs<0x100) return 1;
1699 return 0;
13e35c04 1700}
cbbab9cd 1701
b14b6a8f 1702static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1703{
1704 switch(type) {
1705 case LOADB_STUB: emit_signextend8(rs,rt); break;
1706 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1707 case LOADH_STUB: emit_signextend16(rs,rt); break;
1708 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1709 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1710 default: assert(0);
1711 }
1712}
1713
b1be1eee 1714#include "pcsxmem.h"
1715#include "pcsxmem_inline.c"
b1be1eee 1716
e2b5e7aa 1717static void do_readstub(int n)
57871462 1718{
b14b6a8f 1719 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1720 literal_pool(256);
b14b6a8f 1721 set_jump_target(stubs[n].addr, out);
1722 enum stub_type type=stubs[n].type;
1723 int i=stubs[n].a;
1724 int rs=stubs[n].b;
81dbbf4c 1725 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1726 u_int reglist=stubs[n].e;
81dbbf4c 1727 const signed char *i_regmap=i_regs->regmap;
581335b0 1728 int rt;
cf95b4f0 1729 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1730 rt=get_reg(i_regmap,FTEMP);
1731 }else{
cf95b4f0 1732 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1733 }
1734 assert(rs>=0);
df4dc2b1 1735 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1736 void *restore_jump = NULL;
c6c3b1b3 1737 reglist|=(1<<rs);
1738 for(r=0;r<=12;r++) {
1739 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1740 temp=r; break;
1741 }
1742 }
cf95b4f0 1743 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1744 reglist&=~(1<<rt);
1745 if(temp==-1) {
1746 save_regs(reglist);
1747 regs_saved=1;
1748 temp=(rs==0)?2:0;
1749 }
1750 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1751 temp2=1;
643aeae3 1752 emit_readword(&mem_rtab,temp);
c6c3b1b3 1753 emit_shrimm(rs,12,temp2);
1754 emit_readword_dualindexedx4(temp,temp2,temp2);
1755 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1756 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1757 switch(type) {
1758 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1759 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1760 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1761 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1762 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1763 default: assert(0);
c6c3b1b3 1764 }
1765 }
1766 if(regs_saved) {
df4dc2b1 1767 restore_jump=out;
c6c3b1b3 1768 emit_jcc(0); // jump to reg restore
1769 }
1770 else
b14b6a8f 1771 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1772
1773 if(!regs_saved)
1774 save_regs(reglist);
643aeae3 1775 void *handler=NULL;
c6c3b1b3 1776 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1777 handler=jump_handler_read8;
c6c3b1b3 1778 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1779 handler=jump_handler_read16;
c6c3b1b3 1780 if(type==LOADW_STUB)
643aeae3 1781 handler=jump_handler_read32;
1782 assert(handler);
b96d3df7 1783 pass_args(rs,temp2);
c6c3b1b3 1784 int cc=get_reg(i_regmap,CCREG);
1785 if(cc<0)
1786 emit_loadreg(CCREG,2);
2330734f 1787 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1788 emit_far_call(handler);
cf95b4f0 1789 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1790 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1791 }
1792 if(restore_jump)
df4dc2b1 1793 set_jump_target(restore_jump, out);
c6c3b1b3 1794 restore_regs(reglist);
b14b6a8f 1795 emit_jmp(stubs[n].retaddr); // return address
57871462 1796}
1797
81dbbf4c 1798static void inline_readstub(enum stub_type type, int i, u_int addr,
1799 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1800{
1801 int rs=get_reg(regmap,target);
57871462 1802 int rt=get_reg(regmap,target);
9de8a0c3 1803 if(rs<0) rs=get_reg_temp(regmap);
57871462 1804 assert(rs>=0);
2a014d73 1805 u_int is_dynamic;
687b4580 1806 uintptr_t host_addr = 0;
643aeae3 1807 void *handler;
b1be1eee 1808 int cc=get_reg(regmap,CCREG);
2330734f 1809 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
b1be1eee 1810 return;
643aeae3 1811 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1812 if (handler == NULL) {
cf95b4f0 1813 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1814 return;
13e35c04 1815 if(addr!=host_addr)
1816 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1817 switch(type) {
1818 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1819 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1820 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1821 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1822 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1823 default: assert(0);
1824 }
1825 return;
1826 }
b1be1eee 1827 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1828 if(is_dynamic) {
1829 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1830 handler=jump_handler_read8;
b1be1eee 1831 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1832 handler=jump_handler_read16;
b1be1eee 1833 if(type==LOADW_STUB)
643aeae3 1834 handler=jump_handler_read32;
b1be1eee 1835 }
c6c3b1b3 1836
1837 // call a memhandler
cf95b4f0 1838 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1839 reglist&=~(1<<rt);
1840 save_regs(reglist);
1841 if(target==0)
1842 emit_movimm(addr,0);
1843 else if(rs!=0)
1844 emit_mov(rs,0);
b1be1eee 1845 if(cc<0)
1846 emit_loadreg(CCREG,2);
1847 if(is_dynamic) {
1848 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1849 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1850 }
b1be1eee 1851 else {
643aeae3 1852 emit_readword(&last_count,3);
2330734f 1853 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1854 emit_add(2,3,2);
643aeae3 1855 emit_writeword(2,&Count);
b1be1eee 1856 }
1857
2a014d73 1858 emit_far_call(handler);
b1be1eee 1859
cf95b4f0 1860 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1861 switch(type) {
1862 case LOADB_STUB: emit_signextend8(0,rt); break;
1863 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1864 case LOADH_STUB: emit_signextend16(0,rt); break;
1865 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1866 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1867 default: assert(0);
1868 }
1869 }
1870 restore_regs(reglist);
57871462 1871}
1872
e2b5e7aa 1873static void do_writestub(int n)
57871462 1874{
b14b6a8f 1875 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1876 literal_pool(256);
b14b6a8f 1877 set_jump_target(stubs[n].addr, out);
1878 enum stub_type type=stubs[n].type;
1879 int i=stubs[n].a;
1880 int rs=stubs[n].b;
81dbbf4c 1881 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1882 u_int reglist=stubs[n].e;
81dbbf4c 1883 const signed char *i_regmap=i_regs->regmap;
581335b0 1884 int rt,r;
cf95b4f0 1885 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1886 rt=get_reg(i_regmap,r=FTEMP);
1887 }else{
cf95b4f0 1888 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1889 }
1890 assert(rs>=0);
1891 assert(rt>=0);
b14b6a8f 1892 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1893 void *restore_jump = NULL;
b96d3df7 1894 int reglist2=reglist|(1<<rs)|(1<<rt);
1895 for(rtmp=0;rtmp<=12;rtmp++) {
1896 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1897 temp=rtmp; break;
1898 }
1899 }
1900 if(temp==-1) {
1901 save_regs(reglist);
1902 regs_saved=1;
1903 for(rtmp=0;rtmp<=3;rtmp++)
1904 if(rtmp!=rs&&rtmp!=rt)
1905 {temp=rtmp;break;}
1906 }
1907 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1908 temp2=3;
643aeae3 1909 emit_readword(&mem_wtab,temp);
b96d3df7 1910 emit_shrimm(rs,12,temp2);
1911 emit_readword_dualindexedx4(temp,temp2,temp2);
1912 emit_lsls_imm(temp2,1,temp2);
1913 switch(type) {
1914 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1915 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1916 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1917 default: assert(0);
1918 }
1919 if(regs_saved) {
df4dc2b1 1920 restore_jump=out;
b96d3df7 1921 emit_jcc(0); // jump to reg restore
1922 }
1923 else
b14b6a8f 1924 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1925
1926 if(!regs_saved)
1927 save_regs(reglist);
643aeae3 1928 void *handler=NULL;
b96d3df7 1929 switch(type) {
643aeae3 1930 case STOREB_STUB: handler=jump_handler_write8; break;
1931 case STOREH_STUB: handler=jump_handler_write16; break;
1932 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1933 default: assert(0);
b96d3df7 1934 }
643aeae3 1935 assert(handler);
b96d3df7 1936 pass_args(rs,rt);
1937 if(temp2!=3)
1938 emit_mov(temp2,3);
1939 int cc=get_reg(i_regmap,CCREG);
1940 if(cc<0)
1941 emit_loadreg(CCREG,2);
2330734f 1942 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
b96d3df7 1943 // returns new cycle_count
2a014d73 1944 emit_far_call(handler);
2330734f 1945 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1946 if(cc<0)
1947 emit_storereg(CCREG,2);
1948 if(restore_jump)
df4dc2b1 1949 set_jump_target(restore_jump, out);
b96d3df7 1950 restore_regs(reglist);
b14b6a8f 1951 emit_jmp(stubs[n].retaddr);
57871462 1952}
1953
81dbbf4c 1954static void inline_writestub(enum stub_type type, int i, u_int addr,
1955 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1956{
9de8a0c3 1957 int rs=get_reg_temp(regmap);
57871462 1958 int rt=get_reg(regmap,target);
1959 assert(rs>=0);
1960 assert(rt>=0);
687b4580 1961 uintptr_t host_addr = 0;
643aeae3 1962 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1963 if (handler == NULL) {
13e35c04 1964 if(addr!=host_addr)
1965 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1966 switch(type) {
1967 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1968 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1969 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1970 default: assert(0);
1971 }
1972 return;
1973 }
1974
1975 // call a memhandler
1976 save_regs(reglist);
13e35c04 1977 pass_args(rs,rt);
b96d3df7 1978 int cc=get_reg(regmap,CCREG);
1979 if(cc<0)
1980 emit_loadreg(CCREG,2);
2330734f 1981 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1982 emit_movimm((u_int)handler,3);
b96d3df7 1983 // returns new cycle_count
2a014d73 1984 emit_far_call(jump_handler_write_h);
2330734f 1985 emit_addimm(0,-adj,cc<0?2:cc);
b96d3df7 1986 if(cc<0)
1987 emit_storereg(CCREG,2);
1988 restore_regs(reglist);
57871462 1989}
1990
d1e4ebd9 1991// this output is parsed by verify_dirty, get_bounds, isclean, get_clean_addr
3d680478 1992static void do_dirty_stub_emit_args(u_int arg0, u_int source_len)
57871462 1993{
665f33e1 1994 #ifndef HAVE_ARMV7
7c3a5182 1995 emit_loadlp((int)source, 1);
1996 emit_loadlp((int)copy, 2);
3d680478 1997 emit_loadlp(source_len, 3);
57871462 1998 #else
7c3a5182 1999 emit_movw(((u_int)source)&0x0000FFFF, 1);
2000 emit_movw(((u_int)copy)&0x0000FFFF, 2);
2001 emit_movt(((u_int)source)&0xFFFF0000, 1);
2002 emit_movt(((u_int)copy)&0xFFFF0000, 2);
3d680478 2003 emit_movw(source_len, 3);
57871462 2004 #endif
7c3a5182 2005 emit_movimm(arg0, 0);
2006}
2007
3d680478 2008static void *do_dirty_stub(int i, u_int source_len)
7c3a5182 2009{
2010 assem_debug("do_dirty_stub %x\n",start+i*4);
3d680478 2011 do_dirty_stub_emit_args(start + i*4, source_len);
2a014d73 2012 emit_far_call(verify_code);
df4dc2b1 2013 void *entry = out;
57871462 2014 load_regs_entry(i);
df4dc2b1 2015 if (entry == out)
2016 entry = instr_addr[i];
57871462 2017 emit_jmp(instr_addr[i]);
2018 return entry;
2019}
2020
3d680478 2021static void do_dirty_stub_ds(u_int source_len)
57871462 2022{
3d680478 2023 do_dirty_stub_emit_args(start + 1, source_len);
2a014d73 2024 emit_far_call(verify_code_ds);
57871462 2025}
2026
57871462 2027/* Special assem */
2028
81dbbf4c 2029static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 2030{
2031 save_regs_all(reglist);
32631e6a 2032 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 2033#ifdef PCNT
81dbbf4c 2034 emit_movimm(op, 0);
2a014d73 2035 emit_far_call(pcnt_gte_start);
82ed88eb 2036#endif
81dbbf4c 2037 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 2038}
2039
2040static void c2op_epilogue(u_int op,u_int reglist)
2041{
82ed88eb 2042#ifdef PCNT
2043 emit_movimm(op,0);
2a014d73 2044 emit_far_call(pcnt_gte_end);
82ed88eb 2045#endif
054175e9 2046 restore_regs_all(reglist);
2047}
2048
6c0eefaf 2049static void c2op_call_MACtoIR(int lm,int need_flags)
2050{
2051 if(need_flags)
2a014d73 2052 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 2053 else
2a014d73 2054 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 2055}
2056
2057static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
2058{
2a014d73 2059 emit_far_call(func);
6c0eefaf 2060 // func is C code and trashes r0
2061 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2062 if(need_flags||need_ir)
2063 c2op_call_MACtoIR(lm,need_flags);
2a014d73 2064 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 2065}
2066
81dbbf4c 2067static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 2068{
81dbbf4c 2069 u_int c2op = source[i] & 0x3f;
2070 u_int reglist_full = get_host_reglist(i_regs->regmap);
2071 u_int reglist = reglist_full & CALLER_SAVE_REGS;
2072 int need_flags, need_ir;
b9b61529 2073
2074 if (gte_handlers[c2op]!=NULL) {
bedfea38 2075 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2076 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2077 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2078 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2079 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2080 need_flags=0;
6c0eefaf 2081 int shift = (source[i] >> 19) & 1;
2082 int lm = (source[i] >> 10) & 1;
054175e9 2083 switch(c2op) {
19776aef 2084#ifndef DRC_DBG
054175e9 2085 case GTE_MVMVA: {
82336ba3 2086#ifdef HAVE_ARMV5
054175e9 2087 int v = (source[i] >> 15) & 3;
2088 int cv = (source[i] >> 13) & 3;
2089 int mx = (source[i] >> 17) & 3;
4d646738 2090 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2091 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2092 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2093 if(v<3)
2094 emit_ldrd(v*8,0,4);
2095 else {
2096 emit_movzwl_indexed(9*4,0,4); // gteIR
2097 emit_movzwl_indexed(10*4,0,6);
2098 emit_movzwl_indexed(11*4,0,5);
2099 emit_orrshl_imm(6,16,4);
2100 }
2101 if(mx<3)
2102 emit_addimm(0,32*4+mx*8*4,6);
2103 else
643aeae3 2104 emit_readword(&zeromem_ptr,6);
054175e9 2105 if(cv<3)
2106 emit_addimm(0,32*4+(cv*8+5)*4,7);
2107 else
643aeae3 2108 emit_readword(&zeromem_ptr,7);
054175e9 2109#ifdef __ARM_NEON__
2110 emit_movimm(source[i],1); // opcode
2a014d73 2111 emit_far_call(gteMVMVA_part_neon);
054175e9 2112 if(need_flags) {
2113 emit_movimm(lm,1);
2a014d73 2114 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2115 }
2116#else
2117 if(cv==3&&shift)
33788798 2118 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 2119 else {
2120 emit_movimm(shift,1);
33788798 2121 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 2122 }
6c0eefaf 2123 if(need_flags||need_ir)
2124 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2125#endif
2126#else /* if not HAVE_ARMV5 */
81dbbf4c 2127 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2128 emit_movimm(source[i],1); // opcode
643aeae3 2129 emit_writeword(1,&psxRegs.code);
2a014d73 2130 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2131#endif
2132 break;
2133 }
6c0eefaf 2134 case GTE_OP:
81dbbf4c 2135 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2136 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2137 if(need_flags||need_ir) {
2138 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2139 c2op_call_MACtoIR(lm,need_flags);
2140 }
2141 break;
2142 case GTE_DPCS:
81dbbf4c 2143 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2144 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2145 break;
2146 case GTE_INTPL:
81dbbf4c 2147 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2148 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2149 break;
2150 case GTE_SQR:
81dbbf4c 2151 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2152 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2153 if(need_flags||need_ir) {
2154 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2155 c2op_call_MACtoIR(lm,need_flags);
2156 }
2157 break;
2158 case GTE_DCPL:
81dbbf4c 2159 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2160 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2161 break;
2162 case GTE_GPF:
81dbbf4c 2163 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2164 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2165 break;
2166 case GTE_GPL:
81dbbf4c 2167 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2168 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2169 break;
19776aef 2170#endif
054175e9 2171 default:
81dbbf4c 2172 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2173#ifdef DRC_DBG
2174 emit_movimm(source[i],1); // opcode
643aeae3 2175 emit_writeword(1,&psxRegs.code);
19776aef 2176#endif
2a014d73 2177 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2178 break;
2179 }
2180 c2op_epilogue(c2op,reglist);
2181 }
b9b61529 2182}
2183
3968e69e 2184static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2185{
2186 //value = value & 0x7ffff000;
2187 //if (value & 0x7f87e000) value |= 0x80000000;
2188 emit_shrimm(sl,12,temp);
2189 emit_shlimm(temp,12,temp);
2190 emit_testimm(temp,0x7f000000);
2191 emit_testeqimm(temp,0x00870000);
2192 emit_testeqimm(temp,0x0000e000);
2193 emit_orrne_imm(temp,0x80000000,temp);
2194}
2195
2196static void do_mfc2_31_one(u_int copr,signed char temp)
2197{
2198 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2199 emit_lsls_imm(temp,16,temp);
2200 emit_cmovs_imm(0,temp);
2201 emit_cmpimm(temp,0xf80<<16);
2202 emit_andimm(temp,0xf80<<16,temp);
2203 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2204}
2205
2206static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2207{
2208 if (temp < 0) {
2209 host_tempreg_acquire();
2210 temp = HOST_TEMPREG;
2211 }
2212 do_mfc2_31_one(9,temp);
9c997d19 2213 emit_shrimm(temp,7+16,tl);
3968e69e 2214 do_mfc2_31_one(10,temp);
9c997d19 2215 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2216 do_mfc2_31_one(11,temp);
9c997d19 2217 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2218 emit_writeword(tl,&reg_cop2d[29]);
2219 if (temp == HOST_TEMPREG)
2220 host_tempreg_release();
2221}
2222
2330734f 2223static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2224{
2225 // case 0x18: MULT
2226 // case 0x19: MULTU
2227 // case 0x1A: DIV
2228 // case 0x1B: DIVU
2229 // case 0x1C: DMULT
2230 // case 0x1D: DMULTU
2231 // case 0x1E: DDIV
2232 // case 0x1F: DDIVU
cf95b4f0 2233 if(dops[i].rs1&&dops[i].rs2)
57871462 2234 {
cf95b4f0 2235 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2236 {
cf95b4f0 2237 if(dops[i].opcode2==0x18) // MULT
57871462 2238 {
cf95b4f0 2239 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2240 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2241 signed char hi=get_reg(i_regs->regmap,HIREG);
2242 signed char lo=get_reg(i_regs->regmap,LOREG);
2243 assert(m1>=0);
2244 assert(m2>=0);
2245 assert(hi>=0);
2246 assert(lo>=0);
2247 emit_smull(m1,m2,hi,lo);
2248 }
cf95b4f0 2249 if(dops[i].opcode2==0x19) // MULTU
57871462 2250 {
cf95b4f0 2251 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2252 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2253 signed char hi=get_reg(i_regs->regmap,HIREG);
2254 signed char lo=get_reg(i_regs->regmap,LOREG);
2255 assert(m1>=0);
2256 assert(m2>=0);
2257 assert(hi>=0);
2258 assert(lo>=0);
2259 emit_umull(m1,m2,hi,lo);
2260 }
cf95b4f0 2261 if(dops[i].opcode2==0x1A) // DIV
57871462 2262 {
cf95b4f0 2263 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2264 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2265 assert(d1>=0);
2266 assert(d2>=0);
2267 signed char quotient=get_reg(i_regs->regmap,LOREG);
2268 signed char remainder=get_reg(i_regs->regmap,HIREG);
2269 assert(quotient>=0);
2270 assert(remainder>=0);
2271 emit_movs(d1,remainder);
44a80f6a 2272 emit_movimm(0xffffffff,quotient);
2273 emit_negmi(quotient,quotient); // .. quotient and ..
2274 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2275 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2276 emit_jeq(out+52); // Division by zero
82336ba3 2277 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2278#ifdef HAVE_ARMV5
57871462 2279 emit_clz(HOST_TEMPREG,quotient);
2280 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2281#else
2282 emit_movimm(0,quotient);
2283 emit_addpl_imm(quotient,1,quotient);
2284 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2285 emit_jns(out-2*4);
665f33e1 2286#endif
57871462 2287 emit_orimm(quotient,1<<31,quotient);
2288 emit_shr(quotient,quotient,quotient);
2289 emit_cmp(remainder,HOST_TEMPREG);
2290 emit_subcs(remainder,HOST_TEMPREG,remainder);
2291 emit_adcs(quotient,quotient,quotient);
2292 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2293 emit_jcc(out-16); // -4
57871462 2294 emit_teq(d1,d2);
2295 emit_negmi(quotient,quotient);
2296 emit_test(d1,d1);
2297 emit_negmi(remainder,remainder);
2298 }
cf95b4f0 2299 if(dops[i].opcode2==0x1B) // DIVU
57871462 2300 {
cf95b4f0 2301 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2302 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2303 assert(d1>=0);
2304 assert(d2>=0);
2305 signed char quotient=get_reg(i_regs->regmap,LOREG);
2306 signed char remainder=get_reg(i_regs->regmap,HIREG);
2307 assert(quotient>=0);
2308 assert(remainder>=0);
44a80f6a 2309 emit_mov(d1,remainder);
2310 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2311 emit_test(d2,d2);
7c3a5182 2312 emit_jeq(out+40); // Division by zero
665f33e1 2313#ifdef HAVE_ARMV5
57871462 2314 emit_clz(d2,HOST_TEMPREG);
2315 emit_movimm(1<<31,quotient);
2316 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2317#else
2318 emit_movimm(0,HOST_TEMPREG);
82336ba3 2319 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2320 emit_lslpls_imm(d2,1,d2);
7c3a5182 2321 emit_jns(out-2*4);
665f33e1 2322 emit_movimm(1<<31,quotient);
2323#endif
57871462 2324 emit_shr(quotient,HOST_TEMPREG,quotient);
2325 emit_cmp(remainder,d2);
2326 emit_subcs(remainder,d2,remainder);
2327 emit_adcs(quotient,quotient,quotient);
2328 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2329 emit_jcc(out-16); // -4
57871462 2330 }
2331 }
2332 else // 64-bit
71e490c5 2333 assert(0);
57871462 2334 }
2335 else
2336 {
2337 // Multiply by zero is zero.
2338 // MIPS does not have a divide by zero exception.
2339 // The result is undefined, we return zero.
2340 signed char hr=get_reg(i_regs->regmap,HIREG);
2341 signed char lr=get_reg(i_regs->regmap,LOREG);
2342 if(hr>=0) emit_zeroreg(hr);
2343 if(lr>=0) emit_zeroreg(lr);
2344 }
2345}
2346#define multdiv_assemble multdiv_assemble_arm
2347
d1e4ebd9 2348static void do_jump_vaddr(int rs)
2349{
2a014d73 2350 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2351}
2352
e2b5e7aa 2353static void do_preload_rhash(int r) {
57871462 2354 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2355 // register. On ARM the hash can be done with a single instruction (below)
2356}
2357
e2b5e7aa 2358static void do_preload_rhtbl(int ht) {
57871462 2359 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2360}
2361
e2b5e7aa 2362static void do_rhash(int rs,int rh) {
57871462 2363 emit_andimm(rs,0xf8,rh);
2364}
2365
e2b5e7aa 2366static void do_miniht_load(int ht,int rh) {
57871462 2367 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2368 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2369}
2370
e2b5e7aa 2371static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2372 emit_cmp(rh,rs);
2373 emit_ldreq_indexed(ht,4,15);
2374 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2375 if(rs!=7)
2376 emit_mov(rs,7);
2377 rs=7;
57871462 2378 #endif
d1e4ebd9 2379 do_jump_vaddr(rs);
57871462 2380}
2381
e2b5e7aa 2382static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2383 #ifndef HAVE_ARMV7
57871462 2384 emit_movimm(return_address,rt); // PC into link register
643aeae3 2385 add_to_linker(out,return_address,1);
57871462 2386 emit_pcreladdr(temp);
643aeae3 2387 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2388 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2389 #else
2390 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2391 add_to_linker(out,return_address,1);
57871462 2392 emit_pcreladdr(temp);
643aeae3 2393 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2394 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2395 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2396 #endif
2397}
2398
57871462 2399// CPU-architecture-specific initialization
2a014d73 2400static void arch_init(void)
2401{
2402 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2403 struct tramp_insns *ops = ndrc->tramp.ops;
2404 size_t i;
2405 assert(!(diff & 3));
2406 assert(diff < 0x1000);
2407 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2408 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2409 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2410 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2411}
b9b61529 2412
2413// vim:shiftwidth=2:expandtab