drc: more precise invalidation checking for the invstub case
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
57871462 104/* Linker */
105
df4dc2b1 106static void set_jump_target(void *addr, void *target_)
57871462 107{
df4dc2b1 108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
57871462 110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 113 assert(((uintptr_t)addr&3)==0);
57871462 114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 126 assert(((uintptr_t)addr&3)==0);
57871462 127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
e2b5e7aa 141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
e2b5e7aa 171#endif
57871462 172
173/* Literal pool */
e2b5e7aa 174static void add_literal(int addr,int val)
57871462 175{
15776b68 176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
9f51b4b9 179 literalcount++;
180}
57871462 181
d148d265 182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
57871462 185{
186 int *ptr=(int *)(stub+4);
d148d265 187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 188 u_int offset=*ptr&0xfff;
d148d265 189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
57871462 191}
192
f968d35d 193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
104df9d3 197#if 0
643aeae3 198static void *get_pointer(void *stub)
57871462 199{
200 //printf("get_pointer(%x)\n",(int)stub);
d148d265 201 int *i_ptr=find_extjump_insn(stub);
3d680478 202 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 203 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 204}
104df9d3 205#endif
57871462 206
57871462 207// Allocate a specific ARM register.
e2b5e7aa 208static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 209{
210 int n;
f776eb14 211 int dirty=0;
9f51b4b9 212
57871462 213 // see if it's already allocated (and dealloc it)
214 for(n=0;n<HOST_REGS;n++)
215 {
f776eb14 216 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
217 dirty=(cur->dirty>>n)&1;
218 cur->regmap[n]=-1;
219 }
57871462 220 }
9f51b4b9 221
57871462 222 cur->regmap[hr]=reg;
223 cur->dirty&=~(1<<hr);
f776eb14 224 cur->dirty|=dirty<<hr;
57871462 225 cur->isconst&=~(1<<hr);
226}
227
228// Alloc cycle count into dedicated register
e2b5e7aa 229static void alloc_cc(struct regstat *cur,int i)
57871462 230{
231 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
232}
233
57871462 234/* Assembler */
235
e2b5e7aa 236static unused char regname[16][4] = {
57871462 237 "r0",
238 "r1",
239 "r2",
240 "r3",
241 "r4",
242 "r5",
243 "r6",
244 "r7",
245 "r8",
246 "r9",
247 "r10",
248 "fp",
249 "r12",
250 "sp",
251 "lr",
252 "pc"};
253
e2b5e7aa 254static void output_w32(u_int word)
57871462 255{
256 *((u_int *)out)=word;
257 out+=4;
258}
e2b5e7aa 259
260static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 261{
262 assert(rd<16);
263 assert(rn<16);
264 assert(rm<16);
265 return((rn<<16)|(rd<<12)|rm);
266}
e2b5e7aa 267
268static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 269{
270 assert(rd<16);
271 assert(rn<16);
272 assert(imm<256);
273 assert((shift&1)==0);
274 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
275}
e2b5e7aa 276
277static u_int genimm(u_int imm,u_int *encoded)
57871462 278{
c2e3bd42 279 *encoded=0;
280 if(imm==0) return 1;
57871462 281 int i=32;
282 while(i>0)
283 {
284 if(imm<256) {
285 *encoded=((i&30)<<7)|imm;
286 return 1;
287 }
288 imm=(imm>>2)|(imm<<30);i-=2;
289 }
290 return 0;
291}
e2b5e7aa 292
293static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 294{
295 u_int ret=genimm(imm,encoded);
296 assert(ret);
581335b0 297 (void)ret;
cfbd3c6e 298}
e2b5e7aa 299
300static u_int genjmp(u_int addr)
57871462 301{
7c3a5182 302 if (addr < 3) return 0; // a branch that will be patched later
303 int offset = addr-(int)out-8;
304 if (offset < -33554432 || offset >= 33554432) {
305 SysPrintf("genjmp: out of range: %08x\n", offset);
306 abort();
e80343e2 307 return 0;
308 }
57871462 309 return ((u_int)offset>>2)&0xffffff;
310}
311
d1e4ebd9 312static unused void emit_breakpoint(void)
313{
314 assem_debug("bkpt #0\n");
315 //output_w32(0xe1200070);
316 output_w32(0xe7f001f0);
317}
318
e2b5e7aa 319static void emit_mov(int rs,int rt)
57871462 320{
321 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
323}
324
e2b5e7aa 325static void emit_movs(int rs,int rt)
57871462 326{
327 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
328 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
329}
330
e2b5e7aa 331static void emit_add(int rs1,int rs2,int rt)
57871462 332{
333 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
334 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
335}
336
39b71d9a 337static void emit_adds(int rs1,int rs2,int rt)
338{
339 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
340 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
341}
342#define emit_adds_ptr emit_adds
343
e2b5e7aa 344static void emit_adcs(int rs1,int rs2,int rt)
57871462 345{
346 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
347 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
348}
349
e2b5e7aa 350static void emit_neg(int rs, int rt)
57871462 351{
352 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
353 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
354}
355
e2b5e7aa 356static void emit_sub(int rs1,int rs2,int rt)
57871462 357{
358 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
359 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
360}
361
e2b5e7aa 362static void emit_zeroreg(int rt)
57871462 363{
364 assem_debug("mov %s,#0\n",regname[rt]);
365 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
366}
367
e2b5e7aa 368static void emit_loadlp(u_int imm,u_int rt)
790ee18e 369{
370 add_literal((int)out,imm);
371 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
372 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
373}
e2b5e7aa 374
33788798 375#ifdef HAVE_ARMV7
e2b5e7aa 376static void emit_movw(u_int imm,u_int rt)
790ee18e 377{
378 assert(imm<65536);
379 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
380 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
381}
e2b5e7aa 382
383static void emit_movt(u_int imm,u_int rt)
790ee18e 384{
385 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
386 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
387}
33788798 388#endif
e2b5e7aa 389
390static void emit_movimm(u_int imm,u_int rt)
790ee18e 391{
392 u_int armval;
393 if(genimm(imm,&armval)) {
394 assem_debug("mov %s,#%d\n",regname[rt],imm);
395 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
396 }else if(genimm(~imm,&armval)) {
397 assem_debug("mvn %s,#%d\n",regname[rt],imm);
398 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
399 }else if(imm<65536) {
665f33e1 400 #ifndef HAVE_ARMV7
790ee18e 401 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
402 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
403 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
404 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
405 #else
406 emit_movw(imm,rt);
407 #endif
408 }else{
665f33e1 409 #ifndef HAVE_ARMV7
790ee18e 410 emit_loadlp(imm,rt);
411 #else
412 emit_movw(imm&0x0000FFFF,rt);
413 emit_movt(imm&0xFFFF0000,rt);
414 #endif
415 }
416}
e2b5e7aa 417
418static void emit_pcreladdr(u_int rt)
790ee18e 419{
420 assem_debug("add %s,pc,#?\n",regname[rt]);
421 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
422}
423
e2b5e7aa 424static void emit_loadreg(int r, int hr)
57871462 425{
53358c1d 426 assert(hr != EXCLUDE_REG);
427 if (r == 0)
57871462 428 emit_zeroreg(hr);
429 else {
33788798 430 void *addr;
7c3a5182 431 switch (r) {
432 //case HIREG: addr = &hi; break;
433 //case LOREG: addr = &lo; break;
33788798 434 case CCREG: addr = &cycle_count; break;
435 case CSREG: addr = &Status; break;
436 case INVCP: addr = &invc_ptr; break;
437 case ROREG: addr = &ram_offset; break;
438 default:
439 assert(r < 34);
440 addr = &psxRegs.GPR.r[r];
441 break;
7c3a5182 442 }
33788798 443 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 444 assert(offset<4096);
6cc8d23c 445 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 446 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
447 }
448}
e2b5e7aa 449
450static void emit_storereg(int r, int hr)
57871462 451{
53358c1d 452 assert(hr != EXCLUDE_REG);
7c3a5182 453 int addr = (int)&psxRegs.GPR.r[r];
454 switch (r) {
455 //case HIREG: addr = &hi; break;
456 //case LOREG: addr = &lo; break;
457 case CCREG: addr = (int)&cycle_count; break;
458 default: assert(r < 34); break;
459 }
57871462 460 u_int offset = addr-(u_int)&dynarec_local;
461 assert(offset<4096);
6cc8d23c 462 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 463 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
464}
465
e2b5e7aa 466static void emit_test(int rs, int rt)
57871462 467{
468 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
469 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
470}
471
e2b5e7aa 472static void emit_testimm(int rs,int imm)
57871462 473{
474 u_int armval;
5a05d80c 475 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 476 genimm_checked(imm,&armval);
57871462 477 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
478}
479
e2b5e7aa 480static void emit_testeqimm(int rs,int imm)
b9b61529 481{
482 u_int armval;
483 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 484 genimm_checked(imm,&armval);
b9b61529 485 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
486}
487
e2b5e7aa 488static void emit_not(int rs,int rt)
57871462 489{
490 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
491 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
492}
493
e2b5e7aa 494static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 495{
496 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
497 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
498}
499
e2b5e7aa 500static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 501{
502 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
503 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
504}
e2b5e7aa 505
e2b5e7aa 506static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 507{
508 assert(rs<16);
509 assert(rt<16);
510 assert(imm<32);
511 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
512 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
513}
514
e2b5e7aa 515static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 516{
517 assert(rs<16);
518 assert(rt<16);
519 assert(imm<32);
520 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
521 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
522}
523
e2b5e7aa 524static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 525{
526 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
527 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
528}
529
3968e69e 530static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
531{
532 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
533 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
534}
535
e2b5e7aa 536static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 537{
538 assert(rs<16);
539 assert(rt<16);
540 if(imm!=0) {
57871462 541 u_int armval;
542 if(genimm(imm,&armval)) {
543 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
544 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
545 }else if(genimm(-imm,&armval)) {
8a0a8423 546 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 547 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 548 #ifdef HAVE_ARMV7
549 }else if(rt!=rs&&(u_int)imm<65536) {
550 emit_movw(imm&0x0000ffff,rt);
551 emit_add(rs,rt,rt);
552 }else if(rt!=rs&&(u_int)-imm<65536) {
553 emit_movw(-imm&0x0000ffff,rt);
554 emit_sub(rs,rt,rt);
555 #endif
556 }else if((u_int)-imm<65536) {
57871462 557 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
558 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
559 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
560 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 561 }else {
562 do {
563 int shift = (ffs(imm) - 1) & ~1;
564 int imm8 = imm & (0xff << shift);
565 genimm_checked(imm8,&armval);
566 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
567 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
568 rs = rt;
569 imm &= ~imm8;
570 }
571 while (imm != 0);
57871462 572 }
573 }
574 else if(rs!=rt) emit_mov(rs,rt);
575}
576
e2b5e7aa 577static void emit_addimm_and_set_flags(int imm,int rt)
57871462 578{
579 assert(imm>-65536&&imm<65536);
580 u_int armval;
581 if(genimm(imm,&armval)) {
582 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
583 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
584 }else if(genimm(-imm,&armval)) {
585 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
586 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
587 }else if(imm<0) {
588 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
589 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
590 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
591 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
592 }else{
593 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
594 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
595 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
596 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
597 }
598}
e2b5e7aa 599
e2b5e7aa 600static void emit_addnop(u_int r)
57871462 601{
602 assert(r<16);
603 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
604 output_w32(0xe2800000|rd_rn_rm(r,r,0));
605}
606
e2b5e7aa 607static void emit_andimm(int rs,int imm,int rt)
57871462 608{
609 u_int armval;
790ee18e 610 if(imm==0) {
611 emit_zeroreg(rt);
612 }else if(genimm(imm,&armval)) {
57871462 613 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
614 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
615 }else if(genimm(~imm,&armval)) {
616 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
617 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
618 }else if(imm==65535) {
332a4533 619 #ifndef HAVE_ARMV6
57871462 620 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
621 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
622 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
623 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
624 #else
625 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
626 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
627 #endif
628 }else{
629 assert(imm>0&&imm<65535);
665f33e1 630 #ifndef HAVE_ARMV7
57871462 631 assem_debug("mov r14,#%d\n",imm&0xFF00);
632 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
633 assem_debug("add r14,r14,#%d\n",imm&0xFF);
634 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
635 #else
636 emit_movw(imm,HOST_TEMPREG);
637 #endif
638 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
639 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
640 }
641}
642
e2b5e7aa 643static void emit_orimm(int rs,int imm,int rt)
57871462 644{
645 u_int armval;
790ee18e 646 if(imm==0) {
647 if(rs!=rt) emit_mov(rs,rt);
648 }else if(genimm(imm,&armval)) {
57871462 649 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
650 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
651 }else{
652 assert(imm>0&&imm<65536);
653 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
654 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
655 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
656 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
657 }
658}
659
e2b5e7aa 660static void emit_xorimm(int rs,int imm,int rt)
57871462 661{
57871462 662 u_int armval;
790ee18e 663 if(imm==0) {
664 if(rs!=rt) emit_mov(rs,rt);
665 }else if(genimm(imm,&armval)) {
57871462 666 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
667 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
668 }else{
514ed0d9 669 assert(imm>0&&imm<65536);
57871462 670 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
671 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
672 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
673 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
674 }
675}
676
e2b5e7aa 677static void emit_shlimm(int rs,u_int imm,int rt)
57871462 678{
679 assert(imm>0);
680 assert(imm<32);
681 //if(imm==1) ...
682 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
684}
685
e2b5e7aa 686static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 687{
688 assert(imm>0);
689 assert(imm<32);
690 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
692}
693
e2b5e7aa 694static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 695{
696 assert(imm>0);
697 assert(imm<32);
698 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
699 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
700}
701
e2b5e7aa 702static void emit_shrimm(int rs,u_int imm,int rt)
57871462 703{
704 assert(imm>0);
705 assert(imm<32);
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
708}
709
e2b5e7aa 710static void emit_sarimm(int rs,u_int imm,int rt)
57871462 711{
712 assert(imm>0);
713 assert(imm<32);
714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
716}
717
e2b5e7aa 718static void emit_rorimm(int rs,u_int imm,int rt)
57871462 719{
720 assert(imm>0);
721 assert(imm<32);
722 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
723 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
724}
725
e2b5e7aa 726static void emit_signextend16(int rs,int rt)
b9b61529 727{
332a4533 728 #ifndef HAVE_ARMV6
b9b61529 729 emit_shlimm(rs,16,rt);
730 emit_sarimm(rt,16,rt);
731 #else
732 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
734 #endif
735}
736
e2b5e7aa 737static void emit_signextend8(int rs,int rt)
c6c3b1b3 738{
332a4533 739 #ifndef HAVE_ARMV6
c6c3b1b3 740 emit_shlimm(rs,24,rt);
741 emit_sarimm(rt,24,rt);
742 #else
743 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
744 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
745 #endif
746}
747
e2b5e7aa 748static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 749{
750 assert(rs<16);
751 assert(rt<16);
752 assert(shift<16);
753 //if(imm==1) ...
754 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
755 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
756}
e2b5e7aa 757
758static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 759{
760 assert(rs<16);
761 assert(rt<16);
762 assert(shift<16);
763 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
764 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
765}
e2b5e7aa 766
767static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 768{
769 assert(rs<16);
770 assert(rt<16);
771 assert(shift<16);
772 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
773 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
774}
57871462 775
3968e69e 776static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 777{
778 assert(rs<16);
779 assert(rt<16);
780 assert(shift<16);
781 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
782 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
783}
e2b5e7aa 784
3968e69e 785static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 786{
787 assert(rs<16);
788 assert(rt<16);
789 assert(shift<16);
790 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
791 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
792}
793
e2b5e7aa 794static void emit_cmpimm(int rs,int imm)
57871462 795{
796 u_int armval;
797 if(genimm(imm,&armval)) {
5a05d80c 798 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 799 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
800 }else if(genimm(-imm,&armval)) {
5a05d80c 801 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 802 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
803 }else if(imm>0) {
804 assert(imm<65536);
57871462 805 emit_movimm(imm,HOST_TEMPREG);
57871462 806 assem_debug("cmp %s,r14\n",regname[rs]);
807 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
808 }else{
809 assert(imm>-65536);
57871462 810 emit_movimm(-imm,HOST_TEMPREG);
57871462 811 assem_debug("cmn %s,r14\n",regname[rs]);
812 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
813 }
814}
815
e2b5e7aa 816static void emit_cmovne_imm(int imm,int rt)
57871462 817{
818 assem_debug("movne %s,#%d\n",regname[rt],imm);
819 u_int armval;
cfbd3c6e 820 genimm_checked(imm,&armval);
57871462 821 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
822}
e2b5e7aa 823
824static void emit_cmovl_imm(int imm,int rt)
57871462 825{
826 assem_debug("movlt %s,#%d\n",regname[rt],imm);
827 u_int armval;
cfbd3c6e 828 genimm_checked(imm,&armval);
57871462 829 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
830}
e2b5e7aa 831
832static void emit_cmovb_imm(int imm,int rt)
57871462 833{
834 assem_debug("movcc %s,#%d\n",regname[rt],imm);
835 u_int armval;
cfbd3c6e 836 genimm_checked(imm,&armval);
57871462 837 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
838}
e2b5e7aa 839
3968e69e 840static void emit_cmovae_imm(int imm,int rt)
841{
842 assem_debug("movcs %s,#%d\n",regname[rt],imm);
843 u_int armval;
844 genimm_checked(imm,&armval);
845 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
846}
847
9c997d19 848static void emit_cmovs_imm(int imm,int rt)
849{
850 assem_debug("movmi %s,#%d\n",regname[rt],imm);
851 u_int armval;
852 genimm_checked(imm,&armval);
853 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
854}
855
e2b5e7aa 856static void emit_cmovne_reg(int rs,int rt)
57871462 857{
858 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
860}
e2b5e7aa 861
862static void emit_cmovl_reg(int rs,int rt)
57871462 863{
864 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
865 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
866}
e2b5e7aa 867
e3c6bdb5 868static void emit_cmovb_reg(int rs,int rt)
869{
870 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
871 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
872}
873
e2b5e7aa 874static void emit_cmovs_reg(int rs,int rt)
57871462 875{
876 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
878}
879
e2b5e7aa 880static void emit_slti32(int rs,int imm,int rt)
57871462 881{
882 if(rs!=rt) emit_zeroreg(rt);
883 emit_cmpimm(rs,imm);
884 if(rs==rt) emit_movimm(0,rt);
885 emit_cmovl_imm(1,rt);
886}
e2b5e7aa 887
888static void emit_sltiu32(int rs,int imm,int rt)
57871462 889{
890 if(rs!=rt) emit_zeroreg(rt);
891 emit_cmpimm(rs,imm);
892 if(rs==rt) emit_movimm(0,rt);
893 emit_cmovb_imm(1,rt);
894}
e2b5e7aa 895
e2b5e7aa 896static void emit_cmp(int rs,int rt)
57871462 897{
898 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
899 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
900}
e2b5e7aa 901
882a08fc 902static void emit_cmpcs(int rs,int rt)
903{
904 assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]);
905 output_w32(0x21500000|rd_rn_rm(0,rs,rt));
906}
907
e2b5e7aa 908static void emit_set_gz32(int rs, int rt)
57871462 909{
910 //assem_debug("set_gz32\n");
911 emit_cmpimm(rs,1);
912 emit_movimm(1,rt);
913 emit_cmovl_imm(0,rt);
914}
e2b5e7aa 915
916static void emit_set_nz32(int rs, int rt)
57871462 917{
918 //assem_debug("set_nz32\n");
919 if(rs!=rt) emit_movs(rs,rt);
920 else emit_test(rs,rs);
921 emit_cmovne_imm(1,rt);
922}
e2b5e7aa 923
e2b5e7aa 924static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 925{
926 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
927 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
928 emit_cmp(rs1,rs2);
929 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
930 emit_cmovl_imm(1,rt);
931}
e2b5e7aa 932
933static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 934{
935 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
936 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
937 emit_cmp(rs1,rs2);
938 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
939 emit_cmovb_imm(1,rt);
940}
e2b5e7aa 941
2a014d73 942static int can_jump_or_call(const void *a)
943{
944 intptr_t offset = (u_char *)a - out - 8;
945 return (-33554432 <= offset && offset < 33554432);
946}
947
643aeae3 948static void emit_call(const void *a_)
57871462 949{
643aeae3 950 int a = (int)a_;
d1e4ebd9 951 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 952 u_int offset=genjmp(a);
953 output_w32(0xeb000000|offset);
954}
e2b5e7aa 955
b14b6a8f 956static void emit_jmp(const void *a_)
57871462 957{
b14b6a8f 958 int a = (int)a_;
d1e4ebd9 959 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 960 u_int offset=genjmp(a);
961 output_w32(0xea000000|offset);
962}
e2b5e7aa 963
643aeae3 964static void emit_jne(const void *a_)
57871462 965{
643aeae3 966 int a = (int)a_;
57871462 967 assem_debug("bne %x\n",a);
968 u_int offset=genjmp(a);
969 output_w32(0x1a000000|offset);
970}
e2b5e7aa 971
7c3a5182 972static void emit_jeq(const void *a_)
57871462 973{
7c3a5182 974 int a = (int)a_;
57871462 975 assem_debug("beq %x\n",a);
976 u_int offset=genjmp(a);
977 output_w32(0x0a000000|offset);
978}
e2b5e7aa 979
7c3a5182 980static void emit_js(const void *a_)
57871462 981{
7c3a5182 982 int a = (int)a_;
57871462 983 assem_debug("bmi %x\n",a);
984 u_int offset=genjmp(a);
985 output_w32(0x4a000000|offset);
986}
e2b5e7aa 987
7c3a5182 988static void emit_jns(const void *a_)
57871462 989{
7c3a5182 990 int a = (int)a_;
57871462 991 assem_debug("bpl %x\n",a);
992 u_int offset=genjmp(a);
993 output_w32(0x5a000000|offset);
994}
e2b5e7aa 995
7c3a5182 996static void emit_jl(const void *a_)
57871462 997{
7c3a5182 998 int a = (int)a_;
57871462 999 assem_debug("blt %x\n",a);
1000 u_int offset=genjmp(a);
1001 output_w32(0xba000000|offset);
1002}
e2b5e7aa 1003
7c3a5182 1004static void emit_jge(const void *a_)
57871462 1005{
7c3a5182 1006 int a = (int)a_;
57871462 1007 assem_debug("bge %x\n",a);
1008 u_int offset=genjmp(a);
1009 output_w32(0xaa000000|offset);
1010}
e2b5e7aa 1011
7c3a5182 1012static void emit_jno(const void *a_)
57871462 1013{
7c3a5182 1014 int a = (int)a_;
57871462 1015 assem_debug("bvc %x\n",a);
1016 u_int offset=genjmp(a);
1017 output_w32(0x7a000000|offset);
1018}
e2b5e7aa 1019
7c3a5182 1020static void emit_jc(const void *a_)
57871462 1021{
7c3a5182 1022 int a = (int)a_;
57871462 1023 assem_debug("bcs %x\n",a);
1024 u_int offset=genjmp(a);
1025 output_w32(0x2a000000|offset);
1026}
e2b5e7aa 1027
7c3a5182 1028static void emit_jcc(const void *a_)
57871462 1029{
b14b6a8f 1030 int a = (int)a_;
57871462 1031 assem_debug("bcc %x\n",a);
1032 u_int offset=genjmp(a);
1033 output_w32(0x3a000000|offset);
1034}
1035
3968e69e 1036static unused void emit_callreg(u_int r)
57871462 1037{
c6c3b1b3 1038 assert(r<15);
1039 assem_debug("blx %s\n",regname[r]);
1040 output_w32(0xe12fff30|r);
57871462 1041}
e2b5e7aa 1042
1043static void emit_jmpreg(u_int r)
57871462 1044{
1045 assem_debug("mov pc,%s\n",regname[r]);
1046 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1047}
1048
be516ebe 1049static void emit_ret(void)
1050{
1051 emit_jmpreg(14);
1052}
1053
e2b5e7aa 1054static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1055{
1056 assert(offset>-4096&&offset<4096);
1057 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1058 if(offset>=0) {
1059 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1060 }else{
1061 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1062 }
1063}
e2b5e7aa 1064
1065static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1066{
1067 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1068 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1069}
39b71d9a 1070#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1071
1072static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1073{
1074 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1075 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1076}
e2b5e7aa 1077
1078static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1079{
1080 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1081 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1082}
e2b5e7aa 1083
37387d8b 1084static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1085{
1086 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1087 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1088}
1089
e2b5e7aa 1090static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1091{
1092 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1094}
e2b5e7aa 1095
37387d8b 1096static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1097{
1098 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1100}
1101
e2b5e7aa 1102static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1103{
1104 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1106}
e2b5e7aa 1107
37387d8b 1108static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1109{
1110 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1111 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1112}
1113
e2b5e7aa 1114static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1115{
1116 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1117 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1118}
e2b5e7aa 1119
37387d8b 1120static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1121{
1122 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1124}
1125
e2b5e7aa 1126static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1127{
1128 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1130}
1131
1132static void emit_str_dualindexed(int rs1, int rs2, int rt)
1133{
1134 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1135 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1136}
1137
1138static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1139{
1140 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1141 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1142}
1143
1144static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1145{
1146 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1147 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1148}
e2b5e7aa 1149
e2b5e7aa 1150static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1151{
1152 assert(offset>-256&&offset<256);
1153 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1154 if(offset>=0) {
1155 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1156 }else{
1157 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1158 }
1159}
e2b5e7aa 1160
e2b5e7aa 1161static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1162{
1163 assert(offset>-256&&offset<256);
1164 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1165 if(offset>=0) {
1166 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1167 }else{
1168 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1169 }
1170}
e2b5e7aa 1171
1172static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1173{
1174 assert(offset>-4096&&offset<4096);
1175 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1176 if(offset>=0) {
1177 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1178 }else{
1179 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1180 }
1181}
e2b5e7aa 1182
e2b5e7aa 1183static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1184{
1185 assert(offset>-256&&offset<256);
1186 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1187 if(offset>=0) {
1188 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1189 }else{
1190 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1191 }
1192}
e2b5e7aa 1193
054175e9 1194static void emit_ldrd(int offset, int rs, int rt)
1195{
1196 assert(offset>-256&&offset<256);
1197 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1198 if(offset>=0) {
1199 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1200 }else{
1201 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1202 }
1203}
e2b5e7aa 1204
643aeae3 1205static void emit_readword(void *addr, int rt)
57871462 1206{
643aeae3 1207 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1208 assert(offset<4096);
1209 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1210 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1211}
39b71d9a 1212#define emit_readptr emit_readword
e2b5e7aa 1213
e2b5e7aa 1214static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1215{
1216 assert(offset>-4096&&offset<4096);
1217 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1218 if(offset>=0) {
1219 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1220 }else{
1221 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1222 }
1223}
e2b5e7aa 1224
e2b5e7aa 1225static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1226{
1227 assert(offset>-256&&offset<256);
1228 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1229 if(offset>=0) {
1230 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1231 }else{
1232 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1233 }
1234}
e2b5e7aa 1235
1236static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1237{
1238 assert(offset>-4096&&offset<4096);
1239 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1240 if(offset>=0) {
1241 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1242 }else{
1243 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1244 }
1245}
e2b5e7aa 1246
e2b5e7aa 1247static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1248{
1249 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1251}
e2b5e7aa 1252
1253static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1254{
1255 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1256 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1257}
e2b5e7aa 1258
1259static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1260{
1261 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1262 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1263}
e2b5e7aa 1264
643aeae3 1265static void emit_writeword(int rt, void *addr)
57871462 1266{
643aeae3 1267 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1268 assert(offset<4096);
1269 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1270 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1271}
e2b5e7aa 1272
e2b5e7aa 1273static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1274{
1275 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1276 assert(rs1<16);
1277 assert(rs2<16);
1278 assert(hi<16);
1279 assert(lo<16);
1280 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1281}
e2b5e7aa 1282
1283static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1284{
1285 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1286 assert(rs1<16);
1287 assert(rs2<16);
1288 assert(hi<16);
1289 assert(lo<16);
1290 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1291}
1292
e2b5e7aa 1293static void emit_clz(int rs,int rt)
57871462 1294{
1295 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1296 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1297}
1298
e2b5e7aa 1299static void emit_subcs(int rs1,int rs2,int rt)
57871462 1300{
1301 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1302 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1303}
1304
e2b5e7aa 1305static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1306{
1307 assert(imm>0);
1308 assert(imm<32);
1309 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1310 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1311}
1312
e2b5e7aa 1313static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1314{
1315 assert(imm>0);
1316 assert(imm<32);
1317 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1318 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1319}
1320
e2b5e7aa 1321static void emit_negmi(int rs, int rt)
57871462 1322{
1323 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1324 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1325}
1326
e2b5e7aa 1327static void emit_negsmi(int rs, int rt)
57871462 1328{
1329 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1330 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1331}
1332
e2b5e7aa 1333static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1334{
1335 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1336 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1337}
1338
e2b5e7aa 1339static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1340{
1341 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1342 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1343}
1344
e2b5e7aa 1345static void emit_teq(int rs, int rt)
57871462 1346{
1347 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1348 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1349}
1350
3968e69e 1351static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1352{
1353 u_int armval;
cfbd3c6e 1354 genimm_checked(imm,&armval);
57871462 1355 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1356 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1357}
1358
57871462 1359// Conditionally select one of two immediates, optimizing for small code size
1360// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1361static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1362{
1363 u_int armval;
1364 if(genimm(imm2-imm1,&armval)) {
1365 emit_movimm(imm1,rt);
1366 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1367 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1368 }else if(genimm(imm1-imm2,&armval)) {
1369 emit_movimm(imm1,rt);
1370 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1371 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1372 }
1373 else {
665f33e1 1374 #ifndef HAVE_ARMV7
57871462 1375 emit_movimm(imm1,rt);
1376 add_literal((int)out,imm2);
1377 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1378 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1379 #else
1380 emit_movw(imm1&0x0000FFFF,rt);
1381 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1382 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1383 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1384 }
1385 emit_movt(imm1&0xFFFF0000,rt);
1386 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1387 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1388 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1389 }
1390 #endif
1391 }
1392}
1393
57871462 1394// special case for checking invalid_code
e2b5e7aa 1395static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1396{
1397 assert(imm<128&&imm>=0);
1398 assert(r>=0&&r<16);
1399 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1400 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1401 emit_cmpimm(HOST_TEMPREG,imm);
1402}
1403
e2b5e7aa 1404static void emit_callne(int a)
0bbd1454 1405{
1406 assem_debug("blne %x\n",a);
1407 u_int offset=genjmp(a);
1408 output_w32(0x1b000000|offset);
1409}
1410
57871462 1411// Used to preload hash table entries
e2b5e7aa 1412static unused void emit_prefetchreg(int r)
57871462 1413{
1414 assem_debug("pld %s\n",regname[r]);
1415 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1416}
1417
1418// Special case for mini_ht
e2b5e7aa 1419static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1420{
1421 assert(offset<4096);
1422 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1423 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1424}
1425
e2b5e7aa 1426static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1427{
1428 u_int armval;
cfbd3c6e 1429 genimm_checked(imm,&armval);
b9b61529 1430 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1431 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1432}
1433
e2b5e7aa 1434static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1435{
1436 u_int armval;
1437 genimm_checked(imm,&armval);
1438 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1439 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1440}
1441
e2b5e7aa 1442static void emit_jno_unlikely(int a)
57871462 1443{
1444 //emit_jno(a);
1445 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1446 output_w32(0x72800000|rd_rn_rm(15,15,0));
1447}
1448
054175e9 1449static void save_regs_all(u_int reglist)
57871462 1450{
054175e9 1451 int i;
57871462 1452 if(!reglist) return;
1453 assem_debug("stmia fp,{");
054175e9 1454 for(i=0;i<16;i++)
1455 if(reglist&(1<<i))
1456 assem_debug("r%d,",i);
57871462 1457 assem_debug("}\n");
1458 output_w32(0xe88b0000|reglist);
1459}
e2b5e7aa 1460
054175e9 1461static void restore_regs_all(u_int reglist)
57871462 1462{
054175e9 1463 int i;
57871462 1464 if(!reglist) return;
1465 assem_debug("ldmia fp,{");
054175e9 1466 for(i=0;i<16;i++)
1467 if(reglist&(1<<i))
1468 assem_debug("r%d,",i);
57871462 1469 assem_debug("}\n");
1470 output_w32(0xe89b0000|reglist);
1471}
e2b5e7aa 1472
054175e9 1473// Save registers before function call
1474static void save_regs(u_int reglist)
1475{
4d646738 1476 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1477 save_regs_all(reglist);
1478}
e2b5e7aa 1479
054175e9 1480// Restore registers after function call
1481static void restore_regs(u_int reglist)
1482{
4d646738 1483 reglist&=CALLER_SAVE_REGS;
054175e9 1484 restore_regs_all(reglist);
1485}
57871462 1486
57871462 1487/* Stubs/epilogue */
1488
e2b5e7aa 1489static void literal_pool(int n)
57871462 1490{
1491 if(!literalcount) return;
1492 if(n) {
1493 if((int)out-literals[0][0]<4096-n) return;
1494 }
1495 u_int *ptr;
1496 int i;
1497 for(i=0;i<literalcount;i++)
1498 {
77750690 1499 u_int l_addr=(u_int)out;
1500 int j;
1501 for(j=0;j<i;j++) {
1502 if(literals[j][1]==literals[i][1]) {
1503 //printf("dup %08x\n",literals[i][1]);
1504 l_addr=literals[j][0];
1505 break;
1506 }
1507 }
57871462 1508 ptr=(u_int *)literals[i][0];
77750690 1509 u_int offset=l_addr-(u_int)ptr-8;
57871462 1510 assert(offset<4096);
1511 assert(!(offset&3));
1512 *ptr|=offset;
77750690 1513 if(l_addr==(u_int)out) {
1514 literals[i][0]=l_addr; // remember for dupes
1515 output_w32(literals[i][1]);
1516 }
57871462 1517 }
1518 literalcount=0;
1519}
1520
e2b5e7aa 1521static void literal_pool_jumpover(int n)
57871462 1522{
1523 if(!literalcount) return;
1524 if(n) {
1525 if((int)out-literals[0][0]<4096-n) return;
1526 }
df4dc2b1 1527 void *jaddr = out;
57871462 1528 emit_jmp(0);
1529 literal_pool(0);
df4dc2b1 1530 set_jump_target(jaddr, out);
57871462 1531}
1532
7c3a5182 1533// parsed by get_pointer, find_extjump_insn
104df9d3 1534static void emit_extjump(u_char *addr, u_int target)
57871462 1535{
1536 u_char *ptr=(u_char *)addr;
1537 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1538 (void)ptr;
1539
57871462 1540 emit_loadlp(target,0);
643aeae3 1541 emit_loadlp((u_int)addr,1);
66ea165f 1542 assert(ndrc->translation_cache <= addr &&
1543 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
104df9d3 1544 emit_far_jump(dyna_linker);
57871462 1545}
1546
d1e4ebd9 1547static void check_extjump2(void *src)
1548{
1549 u_int *ptr = src;
1550 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1551 (void)ptr;
1552}
1553
13e35c04 1554// put rt_val into rt, potentially making use of rs with value rs_val
1555static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1556{
8575a877 1557 u_int armval;
1558 int diff;
1559 if(genimm(rt_val,&armval)) {
1560 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1561 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1562 return;
1563 }
1564 if(genimm(~rt_val,&armval)) {
1565 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1566 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1567 return;
1568 }
1569 diff=rt_val-rs_val;
1570 if(genimm(diff,&armval)) {
1571 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1572 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1573 return;
1574 }else if(genimm(-diff,&armval)) {
1575 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1576 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1577 return;
1578 }
1579 emit_movimm(rt_val,rt);
1580}
1581
1582// return 1 if above function can do it's job cheaply
1583static int is_similar_value(u_int v1,u_int v2)
1584{
13e35c04 1585 u_int xs;
8575a877 1586 int diff;
1587 if(v1==v2) return 1;
1588 diff=v2-v1;
1589 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1590 ;
8575a877 1591 if(xs<0x100) return 1;
1592 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1593 ;
1594 if(xs<0x100) return 1;
1595 return 0;
13e35c04 1596}
cbbab9cd 1597
b14b6a8f 1598static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1599{
1600 switch(type) {
1601 case LOADB_STUB: emit_signextend8(rs,rt); break;
1602 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1603 case LOADH_STUB: emit_signextend16(rs,rt); break;
1604 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1605 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1606 default: assert(0);
1607 }
1608}
1609
b1be1eee 1610#include "pcsxmem.h"
1611#include "pcsxmem_inline.c"
b1be1eee 1612
e2b5e7aa 1613static void do_readstub(int n)
57871462 1614{
b14b6a8f 1615 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1616 literal_pool(256);
b14b6a8f 1617 set_jump_target(stubs[n].addr, out);
1618 enum stub_type type=stubs[n].type;
1619 int i=stubs[n].a;
1620 int rs=stubs[n].b;
81dbbf4c 1621 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1622 u_int reglist=stubs[n].e;
81dbbf4c 1623 const signed char *i_regmap=i_regs->regmap;
581335b0 1624 int rt;
cf95b4f0 1625 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1626 rt=get_reg(i_regmap,FTEMP);
1627 }else{
cf95b4f0 1628 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1629 }
1630 assert(rs>=0);
df4dc2b1 1631 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1632 void *restore_jump = NULL;
c6c3b1b3 1633 reglist|=(1<<rs);
1634 for(r=0;r<=12;r++) {
1635 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1636 temp=r; break;
1637 }
1638 }
cf95b4f0 1639 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1640 reglist&=~(1<<rt);
1641 if(temp==-1) {
1642 save_regs(reglist);
1643 regs_saved=1;
1644 temp=(rs==0)?2:0;
1645 }
1646 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1647 temp2=1;
643aeae3 1648 emit_readword(&mem_rtab,temp);
c6c3b1b3 1649 emit_shrimm(rs,12,temp2);
1650 emit_readword_dualindexedx4(temp,temp2,temp2);
1651 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1652 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1653 switch(type) {
1654 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1655 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1656 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1657 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1658 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1659 default: assert(0);
c6c3b1b3 1660 }
1661 }
1662 if(regs_saved) {
df4dc2b1 1663 restore_jump=out;
c6c3b1b3 1664 emit_jcc(0); // jump to reg restore
1665 }
1666 else
b14b6a8f 1667 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1668
1669 if(!regs_saved)
1670 save_regs(reglist);
643aeae3 1671 void *handler=NULL;
c6c3b1b3 1672 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1673 handler=jump_handler_read8;
c6c3b1b3 1674 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1675 handler=jump_handler_read16;
c6c3b1b3 1676 if(type==LOADW_STUB)
643aeae3 1677 handler=jump_handler_read32;
1678 assert(handler);
b96d3df7 1679 pass_args(rs,temp2);
c6c3b1b3 1680 int cc=get_reg(i_regmap,CCREG);
1681 if(cc<0)
1682 emit_loadreg(CCREG,2);
2330734f 1683 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1684 emit_far_call(handler);
cf95b4f0 1685 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1686 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1687 }
1688 if(restore_jump)
df4dc2b1 1689 set_jump_target(restore_jump, out);
c6c3b1b3 1690 restore_regs(reglist);
b14b6a8f 1691 emit_jmp(stubs[n].retaddr); // return address
57871462 1692}
1693
81dbbf4c 1694static void inline_readstub(enum stub_type type, int i, u_int addr,
1695 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1696{
1697 int rs=get_reg(regmap,target);
57871462 1698 int rt=get_reg(regmap,target);
9de8a0c3 1699 if(rs<0) rs=get_reg_temp(regmap);
57871462 1700 assert(rs>=0);
2a014d73 1701 u_int is_dynamic;
687b4580 1702 uintptr_t host_addr = 0;
643aeae3 1703 void *handler;
b1be1eee 1704 int cc=get_reg(regmap,CCREG);
2330734f 1705 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
b1be1eee 1706 return;
643aeae3 1707 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1708 if (handler == NULL) {
cf95b4f0 1709 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1710 return;
13e35c04 1711 if(addr!=host_addr)
1712 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1713 switch(type) {
1714 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1715 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1716 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1717 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1718 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1719 default: assert(0);
1720 }
1721 return;
1722 }
b1be1eee 1723 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1724 if(is_dynamic) {
1725 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1726 handler=jump_handler_read8;
b1be1eee 1727 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1728 handler=jump_handler_read16;
b1be1eee 1729 if(type==LOADW_STUB)
643aeae3 1730 handler=jump_handler_read32;
b1be1eee 1731 }
c6c3b1b3 1732
1733 // call a memhandler
cf95b4f0 1734 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1735 reglist&=~(1<<rt);
1736 save_regs(reglist);
1737 if(target==0)
1738 emit_movimm(addr,0);
1739 else if(rs!=0)
1740 emit_mov(rs,0);
b1be1eee 1741 if(cc<0)
1742 emit_loadreg(CCREG,2);
1743 if(is_dynamic) {
1744 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1745 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1746 }
b1be1eee 1747 else {
643aeae3 1748 emit_readword(&last_count,3);
2330734f 1749 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1750 emit_add(2,3,2);
643aeae3 1751 emit_writeword(2,&Count);
b1be1eee 1752 }
1753
2a014d73 1754 emit_far_call(handler);
b1be1eee 1755
cf95b4f0 1756 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1757 switch(type) {
1758 case LOADB_STUB: emit_signextend8(0,rt); break;
1759 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1760 case LOADH_STUB: emit_signextend16(0,rt); break;
1761 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1762 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1763 default: assert(0);
1764 }
1765 }
1766 restore_regs(reglist);
57871462 1767}
1768
e2b5e7aa 1769static void do_writestub(int n)
57871462 1770{
b14b6a8f 1771 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1772 literal_pool(256);
b14b6a8f 1773 set_jump_target(stubs[n].addr, out);
1774 enum stub_type type=stubs[n].type;
1775 int i=stubs[n].a;
1776 int rs=stubs[n].b;
81dbbf4c 1777 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1778 u_int reglist=stubs[n].e;
81dbbf4c 1779 const signed char *i_regmap=i_regs->regmap;
581335b0 1780 int rt,r;
cf95b4f0 1781 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1782 rt=get_reg(i_regmap,r=FTEMP);
1783 }else{
cf95b4f0 1784 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1785 }
1786 assert(rs>=0);
1787 assert(rt>=0);
b14b6a8f 1788 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1789 void *restore_jump = NULL;
b96d3df7 1790 int reglist2=reglist|(1<<rs)|(1<<rt);
1791 for(rtmp=0;rtmp<=12;rtmp++) {
1792 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1793 temp=rtmp; break;
1794 }
1795 }
1796 if(temp==-1) {
1797 save_regs(reglist);
1798 regs_saved=1;
1799 for(rtmp=0;rtmp<=3;rtmp++)
1800 if(rtmp!=rs&&rtmp!=rt)
1801 {temp=rtmp;break;}
1802 }
1803 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1804 temp2=3;
643aeae3 1805 emit_readword(&mem_wtab,temp);
b96d3df7 1806 emit_shrimm(rs,12,temp2);
1807 emit_readword_dualindexedx4(temp,temp2,temp2);
1808 emit_lsls_imm(temp2,1,temp2);
1809 switch(type) {
1810 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1811 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1812 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1813 default: assert(0);
1814 }
1815 if(regs_saved) {
df4dc2b1 1816 restore_jump=out;
b96d3df7 1817 emit_jcc(0); // jump to reg restore
1818 }
1819 else
b14b6a8f 1820 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1821
1822 if(!regs_saved)
1823 save_regs(reglist);
643aeae3 1824 void *handler=NULL;
b96d3df7 1825 switch(type) {
643aeae3 1826 case STOREB_STUB: handler=jump_handler_write8; break;
1827 case STOREH_STUB: handler=jump_handler_write16; break;
1828 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1829 default: assert(0);
b96d3df7 1830 }
643aeae3 1831 assert(handler);
b96d3df7 1832 pass_args(rs,rt);
1833 if(temp2!=3)
1834 emit_mov(temp2,3);
1835 int cc=get_reg(i_regmap,CCREG);
1836 if(cc<0)
1837 emit_loadreg(CCREG,2);
2330734f 1838 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
b96d3df7 1839 // returns new cycle_count
2a014d73 1840 emit_far_call(handler);
2330734f 1841 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1842 if(cc<0)
1843 emit_storereg(CCREG,2);
1844 if(restore_jump)
df4dc2b1 1845 set_jump_target(restore_jump, out);
b96d3df7 1846 restore_regs(reglist);
b14b6a8f 1847 emit_jmp(stubs[n].retaddr);
57871462 1848}
1849
81dbbf4c 1850static void inline_writestub(enum stub_type type, int i, u_int addr,
1851 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1852{
9de8a0c3 1853 int rs=get_reg_temp(regmap);
57871462 1854 int rt=get_reg(regmap,target);
1855 assert(rs>=0);
1856 assert(rt>=0);
687b4580 1857 uintptr_t host_addr = 0;
643aeae3 1858 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1859 if (handler == NULL) {
13e35c04 1860 if(addr!=host_addr)
1861 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1862 switch(type) {
1863 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1864 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1865 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1866 default: assert(0);
1867 }
1868 return;
1869 }
1870
1871 // call a memhandler
1872 save_regs(reglist);
13e35c04 1873 pass_args(rs,rt);
b96d3df7 1874 int cc=get_reg(regmap,CCREG);
1875 if(cc<0)
1876 emit_loadreg(CCREG,2);
2330734f 1877 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1878 emit_movimm((u_int)handler,3);
b96d3df7 1879 // returns new cycle_count
2a014d73 1880 emit_far_call(jump_handler_write_h);
2330734f 1881 emit_addimm(0,-adj,cc<0?2:cc);
b96d3df7 1882 if(cc<0)
1883 emit_storereg(CCREG,2);
1884 restore_regs(reglist);
57871462 1885}
1886
57871462 1887/* Special assem */
1888
81dbbf4c 1889static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 1890{
1891 save_regs_all(reglist);
32631e6a 1892 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 1893#ifdef PCNT
81dbbf4c 1894 emit_movimm(op, 0);
2a014d73 1895 emit_far_call(pcnt_gte_start);
82ed88eb 1896#endif
81dbbf4c 1897 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 1898}
1899
1900static void c2op_epilogue(u_int op,u_int reglist)
1901{
82ed88eb 1902#ifdef PCNT
1903 emit_movimm(op,0);
2a014d73 1904 emit_far_call(pcnt_gte_end);
82ed88eb 1905#endif
054175e9 1906 restore_regs_all(reglist);
1907}
1908
6c0eefaf 1909static void c2op_call_MACtoIR(int lm,int need_flags)
1910{
1911 if(need_flags)
2a014d73 1912 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 1913 else
2a014d73 1914 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 1915}
1916
1917static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1918{
2a014d73 1919 emit_far_call(func);
6c0eefaf 1920 // func is C code and trashes r0
1921 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1922 if(need_flags||need_ir)
1923 c2op_call_MACtoIR(lm,need_flags);
2a014d73 1924 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 1925}
1926
81dbbf4c 1927static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 1928{
81dbbf4c 1929 u_int c2op = source[i] & 0x3f;
1930 u_int reglist_full = get_host_reglist(i_regs->regmap);
1931 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1932 int need_flags, need_ir;
b9b61529 1933
1934 if (gte_handlers[c2op]!=NULL) {
bedfea38 1935 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 1936 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 1937 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
1938 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 1939 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 1940 need_flags=0;
6c0eefaf 1941 int shift = (source[i] >> 19) & 1;
1942 int lm = (source[i] >> 10) & 1;
054175e9 1943 switch(c2op) {
19776aef 1944#ifndef DRC_DBG
054175e9 1945 case GTE_MVMVA: {
82336ba3 1946#ifdef HAVE_ARMV5
054175e9 1947 int v = (source[i] >> 15) & 3;
1948 int cv = (source[i] >> 13) & 3;
1949 int mx = (source[i] >> 17) & 3;
4d646738 1950 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 1951 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 1952 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
1953 if(v<3)
1954 emit_ldrd(v*8,0,4);
1955 else {
1956 emit_movzwl_indexed(9*4,0,4); // gteIR
1957 emit_movzwl_indexed(10*4,0,6);
1958 emit_movzwl_indexed(11*4,0,5);
1959 emit_orrshl_imm(6,16,4);
1960 }
1961 if(mx<3)
1962 emit_addimm(0,32*4+mx*8*4,6);
1963 else
643aeae3 1964 emit_readword(&zeromem_ptr,6);
054175e9 1965 if(cv<3)
1966 emit_addimm(0,32*4+(cv*8+5)*4,7);
1967 else
643aeae3 1968 emit_readword(&zeromem_ptr,7);
054175e9 1969#ifdef __ARM_NEON__
1970 emit_movimm(source[i],1); // opcode
2a014d73 1971 emit_far_call(gteMVMVA_part_neon);
054175e9 1972 if(need_flags) {
1973 emit_movimm(lm,1);
2a014d73 1974 emit_far_call(gteMACtoIR_flags_neon);
054175e9 1975 }
1976#else
1977 if(cv==3&&shift)
33788798 1978 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 1979 else {
1980 emit_movimm(shift,1);
33788798 1981 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 1982 }
6c0eefaf 1983 if(need_flags||need_ir)
1984 c2op_call_MACtoIR(lm,need_flags);
82336ba3 1985#endif
1986#else /* if not HAVE_ARMV5 */
81dbbf4c 1987 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 1988 emit_movimm(source[i],1); // opcode
643aeae3 1989 emit_writeword(1,&psxRegs.code);
2a014d73 1990 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 1991#endif
1992 break;
1993 }
6c0eefaf 1994 case GTE_OP:
81dbbf4c 1995 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 1996 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 1997 if(need_flags||need_ir) {
1998 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1999 c2op_call_MACtoIR(lm,need_flags);
2000 }
2001 break;
2002 case GTE_DPCS:
81dbbf4c 2003 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2004 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2005 break;
2006 case GTE_INTPL:
81dbbf4c 2007 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2008 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2009 break;
2010 case GTE_SQR:
81dbbf4c 2011 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2012 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2013 if(need_flags||need_ir) {
2014 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2015 c2op_call_MACtoIR(lm,need_flags);
2016 }
2017 break;
2018 case GTE_DCPL:
81dbbf4c 2019 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2020 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2021 break;
2022 case GTE_GPF:
81dbbf4c 2023 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2024 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2025 break;
2026 case GTE_GPL:
81dbbf4c 2027 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2028 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2029 break;
19776aef 2030#endif
054175e9 2031 default:
81dbbf4c 2032 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2033#ifdef DRC_DBG
2034 emit_movimm(source[i],1); // opcode
643aeae3 2035 emit_writeword(1,&psxRegs.code);
19776aef 2036#endif
2a014d73 2037 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2038 break;
2039 }
2040 c2op_epilogue(c2op,reglist);
2041 }
b9b61529 2042}
2043
3968e69e 2044static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2045{
2046 //value = value & 0x7ffff000;
2047 //if (value & 0x7f87e000) value |= 0x80000000;
2048 emit_shrimm(sl,12,temp);
2049 emit_shlimm(temp,12,temp);
2050 emit_testimm(temp,0x7f000000);
2051 emit_testeqimm(temp,0x00870000);
2052 emit_testeqimm(temp,0x0000e000);
2053 emit_orrne_imm(temp,0x80000000,temp);
2054}
2055
2056static void do_mfc2_31_one(u_int copr,signed char temp)
2057{
2058 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2059 emit_lsls_imm(temp,16,temp);
2060 emit_cmovs_imm(0,temp);
2061 emit_cmpimm(temp,0xf80<<16);
2062 emit_andimm(temp,0xf80<<16,temp);
2063 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2064}
2065
2066static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2067{
2068 if (temp < 0) {
2069 host_tempreg_acquire();
2070 temp = HOST_TEMPREG;
2071 }
2072 do_mfc2_31_one(9,temp);
9c997d19 2073 emit_shrimm(temp,7+16,tl);
3968e69e 2074 do_mfc2_31_one(10,temp);
9c997d19 2075 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2076 do_mfc2_31_one(11,temp);
9c997d19 2077 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2078 emit_writeword(tl,&reg_cop2d[29]);
2079 if (temp == HOST_TEMPREG)
2080 host_tempreg_release();
2081}
2082
2330734f 2083static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2084{
2085 // case 0x18: MULT
2086 // case 0x19: MULTU
2087 // case 0x1A: DIV
2088 // case 0x1B: DIVU
2089 // case 0x1C: DMULT
2090 // case 0x1D: DMULTU
2091 // case 0x1E: DDIV
2092 // case 0x1F: DDIVU
cf95b4f0 2093 if(dops[i].rs1&&dops[i].rs2)
57871462 2094 {
cf95b4f0 2095 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2096 {
cf95b4f0 2097 if(dops[i].opcode2==0x18) // MULT
57871462 2098 {
cf95b4f0 2099 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2100 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2101 signed char hi=get_reg(i_regs->regmap,HIREG);
2102 signed char lo=get_reg(i_regs->regmap,LOREG);
2103 assert(m1>=0);
2104 assert(m2>=0);
2105 assert(hi>=0);
2106 assert(lo>=0);
2107 emit_smull(m1,m2,hi,lo);
2108 }
cf95b4f0 2109 if(dops[i].opcode2==0x19) // MULTU
57871462 2110 {
cf95b4f0 2111 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2112 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2113 signed char hi=get_reg(i_regs->regmap,HIREG);
2114 signed char lo=get_reg(i_regs->regmap,LOREG);
2115 assert(m1>=0);
2116 assert(m2>=0);
2117 assert(hi>=0);
2118 assert(lo>=0);
2119 emit_umull(m1,m2,hi,lo);
2120 }
cf95b4f0 2121 if(dops[i].opcode2==0x1A) // DIV
57871462 2122 {
cf95b4f0 2123 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2124 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2125 assert(d1>=0);
2126 assert(d2>=0);
2127 signed char quotient=get_reg(i_regs->regmap,LOREG);
2128 signed char remainder=get_reg(i_regs->regmap,HIREG);
2129 assert(quotient>=0);
2130 assert(remainder>=0);
2131 emit_movs(d1,remainder);
44a80f6a 2132 emit_movimm(0xffffffff,quotient);
2133 emit_negmi(quotient,quotient); // .. quotient and ..
2134 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2135 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2136 emit_jeq(out+52); // Division by zero
82336ba3 2137 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2138#ifdef HAVE_ARMV5
57871462 2139 emit_clz(HOST_TEMPREG,quotient);
2140 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2141#else
2142 emit_movimm(0,quotient);
2143 emit_addpl_imm(quotient,1,quotient);
2144 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2145 emit_jns(out-2*4);
665f33e1 2146#endif
57871462 2147 emit_orimm(quotient,1<<31,quotient);
2148 emit_shr(quotient,quotient,quotient);
2149 emit_cmp(remainder,HOST_TEMPREG);
2150 emit_subcs(remainder,HOST_TEMPREG,remainder);
2151 emit_adcs(quotient,quotient,quotient);
2152 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2153 emit_jcc(out-16); // -4
57871462 2154 emit_teq(d1,d2);
2155 emit_negmi(quotient,quotient);
2156 emit_test(d1,d1);
2157 emit_negmi(remainder,remainder);
2158 }
cf95b4f0 2159 if(dops[i].opcode2==0x1B) // DIVU
57871462 2160 {
cf95b4f0 2161 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2162 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2163 assert(d1>=0);
2164 assert(d2>=0);
2165 signed char quotient=get_reg(i_regs->regmap,LOREG);
2166 signed char remainder=get_reg(i_regs->regmap,HIREG);
2167 assert(quotient>=0);
2168 assert(remainder>=0);
44a80f6a 2169 emit_mov(d1,remainder);
2170 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2171 emit_test(d2,d2);
7c3a5182 2172 emit_jeq(out+40); // Division by zero
665f33e1 2173#ifdef HAVE_ARMV5
57871462 2174 emit_clz(d2,HOST_TEMPREG);
2175 emit_movimm(1<<31,quotient);
2176 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2177#else
2178 emit_movimm(0,HOST_TEMPREG);
82336ba3 2179 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2180 emit_lslpls_imm(d2,1,d2);
7c3a5182 2181 emit_jns(out-2*4);
665f33e1 2182 emit_movimm(1<<31,quotient);
2183#endif
57871462 2184 emit_shr(quotient,HOST_TEMPREG,quotient);
2185 emit_cmp(remainder,d2);
2186 emit_subcs(remainder,d2,remainder);
2187 emit_adcs(quotient,quotient,quotient);
2188 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2189 emit_jcc(out-16); // -4
57871462 2190 }
2191 }
2192 else // 64-bit
71e490c5 2193 assert(0);
57871462 2194 }
2195 else
2196 {
2197 // Multiply by zero is zero.
2198 // MIPS does not have a divide by zero exception.
2199 // The result is undefined, we return zero.
2200 signed char hr=get_reg(i_regs->regmap,HIREG);
2201 signed char lr=get_reg(i_regs->regmap,LOREG);
2202 if(hr>=0) emit_zeroreg(hr);
2203 if(lr>=0) emit_zeroreg(lr);
2204 }
2205}
2206#define multdiv_assemble multdiv_assemble_arm
2207
d1e4ebd9 2208static void do_jump_vaddr(int rs)
2209{
2a014d73 2210 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2211}
2212
e2b5e7aa 2213static void do_preload_rhash(int r) {
57871462 2214 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2215 // register. On ARM the hash can be done with a single instruction (below)
2216}
2217
e2b5e7aa 2218static void do_preload_rhtbl(int ht) {
57871462 2219 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2220}
2221
e2b5e7aa 2222static void do_rhash(int rs,int rh) {
57871462 2223 emit_andimm(rs,0xf8,rh);
2224}
2225
e2b5e7aa 2226static void do_miniht_load(int ht,int rh) {
57871462 2227 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2228 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2229}
2230
e2b5e7aa 2231static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2232 emit_cmp(rh,rs);
2233 emit_ldreq_indexed(ht,4,15);
2234 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2235 if(rs!=7)
2236 emit_mov(rs,7);
2237 rs=7;
57871462 2238 #endif
d1e4ebd9 2239 do_jump_vaddr(rs);
57871462 2240}
2241
e2b5e7aa 2242static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2243 #ifndef HAVE_ARMV7
57871462 2244 emit_movimm(return_address,rt); // PC into link register
643aeae3 2245 add_to_linker(out,return_address,1);
57871462 2246 emit_pcreladdr(temp);
643aeae3 2247 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2248 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2249 #else
2250 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2251 add_to_linker(out,return_address,1);
57871462 2252 emit_pcreladdr(temp);
643aeae3 2253 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2254 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2255 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2256 #endif
2257}
2258
57871462 2259// CPU-architecture-specific initialization
2a014d73 2260static void arch_init(void)
2261{
2262 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2263 struct tramp_insns *ops = ndrc->tramp.ops;
2264 size_t i;
2265 assert(!(diff & 3));
2266 assert(diff < 0x1000);
2267 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2268 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2269 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2270 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2271}
b9b61529 2272
2273// vim:shiftwidth=2:expandtab