drc: rework block tracking
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
e2b5e7aa 30#define unused __attribute__((unused))
31
dd114d7d 32#ifdef DRC_DBG
33#pragma GCC diagnostic ignored "-Wunused-function"
34#pragma GCC diagnostic ignored "-Wunused-variable"
35#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
36#endif
37
57871462 38void indirect_jump_indexed();
39void indirect_jump();
40void do_interrupt();
41void jump_vaddr_r0();
42void jump_vaddr_r1();
43void jump_vaddr_r2();
44void jump_vaddr_r3();
45void jump_vaddr_r4();
46void jump_vaddr_r5();
47void jump_vaddr_r6();
48void jump_vaddr_r7();
49void jump_vaddr_r8();
50void jump_vaddr_r9();
51void jump_vaddr_r10();
52void jump_vaddr_r12();
53
b14b6a8f 54void * const jump_vaddr_reg[16] = {
55 jump_vaddr_r0,
56 jump_vaddr_r1,
57 jump_vaddr_r2,
58 jump_vaddr_r3,
59 jump_vaddr_r4,
60 jump_vaddr_r5,
61 jump_vaddr_r6,
62 jump_vaddr_r7,
63 jump_vaddr_r8,
64 jump_vaddr_r9,
65 jump_vaddr_r10,
57871462 66 0,
b14b6a8f 67 jump_vaddr_r12,
57871462 68 0,
69 0,
b14b6a8f 70 0
71};
57871462 72
0bbd1454 73void invalidate_addr_r0();
74void invalidate_addr_r1();
75void invalidate_addr_r2();
76void invalidate_addr_r3();
77void invalidate_addr_r4();
78void invalidate_addr_r5();
79void invalidate_addr_r6();
80void invalidate_addr_r7();
81void invalidate_addr_r8();
82void invalidate_addr_r9();
83void invalidate_addr_r10();
84void invalidate_addr_r12();
85
86const u_int invalidate_addr_reg[16] = {
87 (int)invalidate_addr_r0,
88 (int)invalidate_addr_r1,
89 (int)invalidate_addr_r2,
90 (int)invalidate_addr_r3,
91 (int)invalidate_addr_r4,
92 (int)invalidate_addr_r5,
93 (int)invalidate_addr_r6,
94 (int)invalidate_addr_r7,
95 (int)invalidate_addr_r8,
96 (int)invalidate_addr_r9,
97 (int)invalidate_addr_r10,
98 0,
99 (int)invalidate_addr_r12,
100 0,
101 0,
102 0};
103
57871462 104/* Linker */
105
df4dc2b1 106static void set_jump_target(void *addr, void *target_)
57871462 107{
df4dc2b1 108 u_int target = (u_int)target_;
109 u_char *ptr = addr;
57871462 110 u_int *ptr2=(u_int *)ptr;
111 if(ptr[3]==0xe2) {
112 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 113 assert(((uintptr_t)addr&3)==0);
57871462 114 assert((target&3)==0);
115 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 116 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 117 }
118 else if(ptr[3]==0x72) {
119 // generated by emit_jno_unlikely
120 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 121 assert(((uintptr_t)addr&3)==0);
57871462 122 assert((target&3)==0);
123 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
124 }
125 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 126 assert(((uintptr_t)addr&3)==0);
57871462 127 assert((target&3)==0);
128 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
129 }
130 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
131 }
132 else {
133 assert((ptr[3]&0x0e)==0xa);
134 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
135 }
136}
137
138// This optionally copies the instruction from the target of the branch into
139// the space before the branch. Works, but the difference in speed is
140// usually insignificant.
e2b5e7aa 141#if 0
142static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 143{
144 u_char *ptr=(u_char *)addr;
145 u_int *ptr2=(u_int *)ptr;
146 assert(!copy||ptr2[-1]==0xe28dd000);
147 if(ptr[3]==0xe2) {
148 assert(!copy);
149 assert((target-(u_int)ptr2-8)<4096);
150 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
151 }
152 else {
153 assert((ptr[3]&0x0e)==0xa);
154 u_int target_insn=*(u_int *)target;
155 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
156 copy=0;
157 }
158 if((target_insn&0x0c100000)==0x04100000) { // Load
159 copy=0;
160 }
161 if(target_insn&0x08000000) {
162 copy=0;
163 }
164 if(copy) {
165 ptr2[-1]=target_insn;
166 target+=4;
167 }
168 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
169 }
170}
e2b5e7aa 171#endif
57871462 172
173/* Literal pool */
e2b5e7aa 174static void add_literal(int addr,int val)
57871462 175{
15776b68 176 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 177 literals[literalcount][0]=addr;
178 literals[literalcount][1]=val;
9f51b4b9 179 literalcount++;
180}
57871462 181
d148d265 182// from a pointer to external jump stub (which was produced by emit_extjump2)
183// find where the jumping insn is
184static void *find_extjump_insn(void *stub)
57871462 185{
186 int *ptr=(int *)(stub+4);
d148d265 187 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 188 u_int offset=*ptr&0xfff;
d148d265 189 void **l_ptr=(void *)ptr+offset+8;
190 return *l_ptr;
57871462 191}
192
f968d35d 193// find where external branch is liked to using addr of it's stub:
194// get address that insn one after stub loads (dyna_linker arg1),
195// treat it as a pointer to branch insn,
196// return addr where that branch jumps to
104df9d3 197#if 0
643aeae3 198static void *get_pointer(void *stub)
57871462 199{
200 //printf("get_pointer(%x)\n",(int)stub);
d148d265 201 int *i_ptr=find_extjump_insn(stub);
3d680478 202 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 203 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 204}
104df9d3 205#endif
57871462 206
57871462 207// Allocate a specific ARM register.
e2b5e7aa 208static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 209{
210 int n;
f776eb14 211 int dirty=0;
9f51b4b9 212
57871462 213 // see if it's already allocated (and dealloc it)
214 for(n=0;n<HOST_REGS;n++)
215 {
f776eb14 216 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
217 dirty=(cur->dirty>>n)&1;
218 cur->regmap[n]=-1;
219 }
57871462 220 }
9f51b4b9 221
57871462 222 cur->regmap[hr]=reg;
223 cur->dirty&=~(1<<hr);
f776eb14 224 cur->dirty|=dirty<<hr;
57871462 225 cur->isconst&=~(1<<hr);
226}
227
228// Alloc cycle count into dedicated register
e2b5e7aa 229static void alloc_cc(struct regstat *cur,int i)
57871462 230{
231 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
232}
233
57871462 234/* Assembler */
235
e2b5e7aa 236static unused char regname[16][4] = {
57871462 237 "r0",
238 "r1",
239 "r2",
240 "r3",
241 "r4",
242 "r5",
243 "r6",
244 "r7",
245 "r8",
246 "r9",
247 "r10",
248 "fp",
249 "r12",
250 "sp",
251 "lr",
252 "pc"};
253
e2b5e7aa 254static void output_w32(u_int word)
57871462 255{
256 *((u_int *)out)=word;
257 out+=4;
258}
e2b5e7aa 259
260static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 261{
262 assert(rd<16);
263 assert(rn<16);
264 assert(rm<16);
265 return((rn<<16)|(rd<<12)|rm);
266}
e2b5e7aa 267
268static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 269{
270 assert(rd<16);
271 assert(rn<16);
272 assert(imm<256);
273 assert((shift&1)==0);
274 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
275}
e2b5e7aa 276
277static u_int genimm(u_int imm,u_int *encoded)
57871462 278{
c2e3bd42 279 *encoded=0;
280 if(imm==0) return 1;
57871462 281 int i=32;
282 while(i>0)
283 {
284 if(imm<256) {
285 *encoded=((i&30)<<7)|imm;
286 return 1;
287 }
288 imm=(imm>>2)|(imm<<30);i-=2;
289 }
290 return 0;
291}
e2b5e7aa 292
293static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 294{
295 u_int ret=genimm(imm,encoded);
296 assert(ret);
581335b0 297 (void)ret;
cfbd3c6e 298}
e2b5e7aa 299
300static u_int genjmp(u_int addr)
57871462 301{
7c3a5182 302 if (addr < 3) return 0; // a branch that will be patched later
303 int offset = addr-(int)out-8;
304 if (offset < -33554432 || offset >= 33554432) {
305 SysPrintf("genjmp: out of range: %08x\n", offset);
306 abort();
e80343e2 307 return 0;
308 }
57871462 309 return ((u_int)offset>>2)&0xffffff;
310}
311
d1e4ebd9 312static unused void emit_breakpoint(void)
313{
314 assem_debug("bkpt #0\n");
315 //output_w32(0xe1200070);
316 output_w32(0xe7f001f0);
317}
318
e2b5e7aa 319static void emit_mov(int rs,int rt)
57871462 320{
321 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
322 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
323}
324
e2b5e7aa 325static void emit_movs(int rs,int rt)
57871462 326{
327 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
328 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
329}
330
e2b5e7aa 331static void emit_add(int rs1,int rs2,int rt)
57871462 332{
333 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
334 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
335}
336
39b71d9a 337static void emit_adds(int rs1,int rs2,int rt)
338{
339 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
340 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
341}
342#define emit_adds_ptr emit_adds
343
e2b5e7aa 344static void emit_adcs(int rs1,int rs2,int rt)
57871462 345{
346 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
347 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
348}
349
e2b5e7aa 350static void emit_neg(int rs, int rt)
57871462 351{
352 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
353 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
354}
355
e2b5e7aa 356static void emit_sub(int rs1,int rs2,int rt)
57871462 357{
358 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
359 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
360}
361
e2b5e7aa 362static void emit_zeroreg(int rt)
57871462 363{
364 assem_debug("mov %s,#0\n",regname[rt]);
365 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
366}
367
e2b5e7aa 368static void emit_loadlp(u_int imm,u_int rt)
790ee18e 369{
370 add_literal((int)out,imm);
371 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
372 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
373}
e2b5e7aa 374
33788798 375#ifdef HAVE_ARMV7
e2b5e7aa 376static void emit_movw(u_int imm,u_int rt)
790ee18e 377{
378 assert(imm<65536);
379 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
380 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
381}
e2b5e7aa 382
383static void emit_movt(u_int imm,u_int rt)
790ee18e 384{
385 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
386 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
387}
33788798 388#endif
e2b5e7aa 389
390static void emit_movimm(u_int imm,u_int rt)
790ee18e 391{
392 u_int armval;
393 if(genimm(imm,&armval)) {
394 assem_debug("mov %s,#%d\n",regname[rt],imm);
395 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
396 }else if(genimm(~imm,&armval)) {
397 assem_debug("mvn %s,#%d\n",regname[rt],imm);
398 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
399 }else if(imm<65536) {
665f33e1 400 #ifndef HAVE_ARMV7
790ee18e 401 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
402 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
403 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
404 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
405 #else
406 emit_movw(imm,rt);
407 #endif
408 }else{
665f33e1 409 #ifndef HAVE_ARMV7
790ee18e 410 emit_loadlp(imm,rt);
411 #else
412 emit_movw(imm&0x0000FFFF,rt);
413 emit_movt(imm&0xFFFF0000,rt);
414 #endif
415 }
416}
e2b5e7aa 417
418static void emit_pcreladdr(u_int rt)
790ee18e 419{
420 assem_debug("add %s,pc,#?\n",regname[rt]);
421 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
422}
423
e2b5e7aa 424static void emit_loadreg(int r, int hr)
57871462 425{
53358c1d 426 assert(hr != EXCLUDE_REG);
427 if (r == 0)
57871462 428 emit_zeroreg(hr);
429 else {
33788798 430 void *addr;
7c3a5182 431 switch (r) {
432 //case HIREG: addr = &hi; break;
433 //case LOREG: addr = &lo; break;
33788798 434 case CCREG: addr = &cycle_count; break;
435 case CSREG: addr = &Status; break;
436 case INVCP: addr = &invc_ptr; break;
437 case ROREG: addr = &ram_offset; break;
438 default:
439 assert(r < 34);
440 addr = &psxRegs.GPR.r[r];
441 break;
7c3a5182 442 }
33788798 443 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 444 assert(offset<4096);
6cc8d23c 445 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 446 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
447 }
448}
e2b5e7aa 449
450static void emit_storereg(int r, int hr)
57871462 451{
53358c1d 452 assert(hr != EXCLUDE_REG);
7c3a5182 453 int addr = (int)&psxRegs.GPR.r[r];
454 switch (r) {
455 //case HIREG: addr = &hi; break;
456 //case LOREG: addr = &lo; break;
457 case CCREG: addr = (int)&cycle_count; break;
458 default: assert(r < 34); break;
459 }
57871462 460 u_int offset = addr-(u_int)&dynarec_local;
461 assert(offset<4096);
6cc8d23c 462 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 463 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
464}
465
e2b5e7aa 466static void emit_test(int rs, int rt)
57871462 467{
468 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
469 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
470}
471
e2b5e7aa 472static void emit_testimm(int rs,int imm)
57871462 473{
474 u_int armval;
5a05d80c 475 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 476 genimm_checked(imm,&armval);
57871462 477 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
478}
479
e2b5e7aa 480static void emit_testeqimm(int rs,int imm)
b9b61529 481{
482 u_int armval;
483 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 484 genimm_checked(imm,&armval);
b9b61529 485 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
486}
487
e2b5e7aa 488static void emit_not(int rs,int rt)
57871462 489{
490 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
491 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
492}
493
e2b5e7aa 494static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 495{
496 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
497 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
498}
499
e2b5e7aa 500static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 501{
502 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
503 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
504}
e2b5e7aa 505
e2b5e7aa 506static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 507{
508 assert(rs<16);
509 assert(rt<16);
510 assert(imm<32);
511 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
512 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
513}
514
e2b5e7aa 515static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 516{
517 assert(rs<16);
518 assert(rt<16);
519 assert(imm<32);
520 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
521 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
522}
523
e2b5e7aa 524static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 525{
526 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
527 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
528}
529
3968e69e 530static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
531{
532 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
533 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
534}
535
e2b5e7aa 536static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 537{
538 assert(rs<16);
539 assert(rt<16);
540 if(imm!=0) {
57871462 541 u_int armval;
542 if(genimm(imm,&armval)) {
543 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
544 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
545 }else if(genimm(-imm,&armval)) {
8a0a8423 546 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 547 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 548 #ifdef HAVE_ARMV7
549 }else if(rt!=rs&&(u_int)imm<65536) {
550 emit_movw(imm&0x0000ffff,rt);
551 emit_add(rs,rt,rt);
552 }else if(rt!=rs&&(u_int)-imm<65536) {
553 emit_movw(-imm&0x0000ffff,rt);
554 emit_sub(rs,rt,rt);
555 #endif
556 }else if((u_int)-imm<65536) {
57871462 557 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
558 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
559 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
560 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 561 }else {
562 do {
563 int shift = (ffs(imm) - 1) & ~1;
564 int imm8 = imm & (0xff << shift);
565 genimm_checked(imm8,&armval);
566 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
567 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
568 rs = rt;
569 imm &= ~imm8;
570 }
571 while (imm != 0);
57871462 572 }
573 }
574 else if(rs!=rt) emit_mov(rs,rt);
575}
576
e2b5e7aa 577static void emit_addimm_and_set_flags(int imm,int rt)
57871462 578{
579 assert(imm>-65536&&imm<65536);
580 u_int armval;
581 if(genimm(imm,&armval)) {
582 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
583 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
584 }else if(genimm(-imm,&armval)) {
585 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
586 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
587 }else if(imm<0) {
588 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
589 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
590 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
591 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
592 }else{
593 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
594 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
595 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
596 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
597 }
598}
e2b5e7aa 599
e2b5e7aa 600static void emit_addnop(u_int r)
57871462 601{
602 assert(r<16);
603 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
604 output_w32(0xe2800000|rd_rn_rm(r,r,0));
605}
606
e2b5e7aa 607static void emit_andimm(int rs,int imm,int rt)
57871462 608{
609 u_int armval;
790ee18e 610 if(imm==0) {
611 emit_zeroreg(rt);
612 }else if(genimm(imm,&armval)) {
57871462 613 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
614 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
615 }else if(genimm(~imm,&armval)) {
616 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
617 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
618 }else if(imm==65535) {
332a4533 619 #ifndef HAVE_ARMV6
57871462 620 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
621 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
622 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
623 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
624 #else
625 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
626 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
627 #endif
628 }else{
629 assert(imm>0&&imm<65535);
665f33e1 630 #ifndef HAVE_ARMV7
57871462 631 assem_debug("mov r14,#%d\n",imm&0xFF00);
632 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
633 assem_debug("add r14,r14,#%d\n",imm&0xFF);
634 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
635 #else
636 emit_movw(imm,HOST_TEMPREG);
637 #endif
638 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
639 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
640 }
641}
642
e2b5e7aa 643static void emit_orimm(int rs,int imm,int rt)
57871462 644{
645 u_int armval;
790ee18e 646 if(imm==0) {
647 if(rs!=rt) emit_mov(rs,rt);
648 }else if(genimm(imm,&armval)) {
57871462 649 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
650 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
651 }else{
652 assert(imm>0&&imm<65536);
653 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
654 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
655 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
656 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
657 }
658}
659
e2b5e7aa 660static void emit_xorimm(int rs,int imm,int rt)
57871462 661{
57871462 662 u_int armval;
790ee18e 663 if(imm==0) {
664 if(rs!=rt) emit_mov(rs,rt);
665 }else if(genimm(imm,&armval)) {
57871462 666 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
667 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
668 }else{
514ed0d9 669 assert(imm>0&&imm<65536);
57871462 670 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
671 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
672 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
673 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
674 }
675}
676
e2b5e7aa 677static void emit_shlimm(int rs,u_int imm,int rt)
57871462 678{
679 assert(imm>0);
680 assert(imm<32);
681 //if(imm==1) ...
682 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
683 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
684}
685
e2b5e7aa 686static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 687{
688 assert(imm>0);
689 assert(imm<32);
690 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
691 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
692}
693
e2b5e7aa 694static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 695{
696 assert(imm>0);
697 assert(imm<32);
698 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
699 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
700}
701
e2b5e7aa 702static void emit_shrimm(int rs,u_int imm,int rt)
57871462 703{
704 assert(imm>0);
705 assert(imm<32);
706 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
707 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
708}
709
e2b5e7aa 710static void emit_sarimm(int rs,u_int imm,int rt)
57871462 711{
712 assert(imm>0);
713 assert(imm<32);
714 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
715 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
716}
717
e2b5e7aa 718static void emit_rorimm(int rs,u_int imm,int rt)
57871462 719{
720 assert(imm>0);
721 assert(imm<32);
722 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
723 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
724}
725
e2b5e7aa 726static void emit_signextend16(int rs,int rt)
b9b61529 727{
332a4533 728 #ifndef HAVE_ARMV6
b9b61529 729 emit_shlimm(rs,16,rt);
730 emit_sarimm(rt,16,rt);
731 #else
732 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
733 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
734 #endif
735}
736
e2b5e7aa 737static void emit_signextend8(int rs,int rt)
c6c3b1b3 738{
332a4533 739 #ifndef HAVE_ARMV6
c6c3b1b3 740 emit_shlimm(rs,24,rt);
741 emit_sarimm(rt,24,rt);
742 #else
743 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
744 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
745 #endif
746}
747
e2b5e7aa 748static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 749{
750 assert(rs<16);
751 assert(rt<16);
752 assert(shift<16);
753 //if(imm==1) ...
754 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
755 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
756}
e2b5e7aa 757
758static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 759{
760 assert(rs<16);
761 assert(rt<16);
762 assert(shift<16);
763 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
764 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
765}
e2b5e7aa 766
767static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 768{
769 assert(rs<16);
770 assert(rt<16);
771 assert(shift<16);
772 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
773 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
774}
57871462 775
3968e69e 776static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 777{
778 assert(rs<16);
779 assert(rt<16);
780 assert(shift<16);
781 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
782 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
783}
e2b5e7aa 784
3968e69e 785static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 786{
787 assert(rs<16);
788 assert(rt<16);
789 assert(shift<16);
790 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
791 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
792}
793
e2b5e7aa 794static void emit_cmpimm(int rs,int imm)
57871462 795{
796 u_int armval;
797 if(genimm(imm,&armval)) {
5a05d80c 798 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 799 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
800 }else if(genimm(-imm,&armval)) {
5a05d80c 801 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 802 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
803 }else if(imm>0) {
804 assert(imm<65536);
57871462 805 emit_movimm(imm,HOST_TEMPREG);
57871462 806 assem_debug("cmp %s,r14\n",regname[rs]);
807 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
808 }else{
809 assert(imm>-65536);
57871462 810 emit_movimm(-imm,HOST_TEMPREG);
57871462 811 assem_debug("cmn %s,r14\n",regname[rs]);
812 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
813 }
814}
815
e2b5e7aa 816static void emit_cmovne_imm(int imm,int rt)
57871462 817{
818 assem_debug("movne %s,#%d\n",regname[rt],imm);
819 u_int armval;
cfbd3c6e 820 genimm_checked(imm,&armval);
57871462 821 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
822}
e2b5e7aa 823
824static void emit_cmovl_imm(int imm,int rt)
57871462 825{
826 assem_debug("movlt %s,#%d\n",regname[rt],imm);
827 u_int armval;
cfbd3c6e 828 genimm_checked(imm,&armval);
57871462 829 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
830}
e2b5e7aa 831
832static void emit_cmovb_imm(int imm,int rt)
57871462 833{
834 assem_debug("movcc %s,#%d\n",regname[rt],imm);
835 u_int armval;
cfbd3c6e 836 genimm_checked(imm,&armval);
57871462 837 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
838}
e2b5e7aa 839
3968e69e 840static void emit_cmovae_imm(int imm,int rt)
841{
842 assem_debug("movcs %s,#%d\n",regname[rt],imm);
843 u_int armval;
844 genimm_checked(imm,&armval);
845 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
846}
847
9c997d19 848static void emit_cmovs_imm(int imm,int rt)
849{
850 assem_debug("movmi %s,#%d\n",regname[rt],imm);
851 u_int armval;
852 genimm_checked(imm,&armval);
853 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
854}
855
e2b5e7aa 856static void emit_cmovne_reg(int rs,int rt)
57871462 857{
858 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
859 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
860}
e2b5e7aa 861
862static void emit_cmovl_reg(int rs,int rt)
57871462 863{
864 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
865 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
866}
e2b5e7aa 867
e3c6bdb5 868static void emit_cmovb_reg(int rs,int rt)
869{
870 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
871 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
872}
873
e2b5e7aa 874static void emit_cmovs_reg(int rs,int rt)
57871462 875{
876 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
877 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
878}
879
e2b5e7aa 880static void emit_slti32(int rs,int imm,int rt)
57871462 881{
882 if(rs!=rt) emit_zeroreg(rt);
883 emit_cmpimm(rs,imm);
884 if(rs==rt) emit_movimm(0,rt);
885 emit_cmovl_imm(1,rt);
886}
e2b5e7aa 887
888static void emit_sltiu32(int rs,int imm,int rt)
57871462 889{
890 if(rs!=rt) emit_zeroreg(rt);
891 emit_cmpimm(rs,imm);
892 if(rs==rt) emit_movimm(0,rt);
893 emit_cmovb_imm(1,rt);
894}
e2b5e7aa 895
e2b5e7aa 896static void emit_cmp(int rs,int rt)
57871462 897{
898 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
899 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
900}
e2b5e7aa 901
902static void emit_set_gz32(int rs, int rt)
57871462 903{
904 //assem_debug("set_gz32\n");
905 emit_cmpimm(rs,1);
906 emit_movimm(1,rt);
907 emit_cmovl_imm(0,rt);
908}
e2b5e7aa 909
910static void emit_set_nz32(int rs, int rt)
57871462 911{
912 //assem_debug("set_nz32\n");
913 if(rs!=rt) emit_movs(rs,rt);
914 else emit_test(rs,rs);
915 emit_cmovne_imm(1,rt);
916}
e2b5e7aa 917
e2b5e7aa 918static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 919{
920 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
921 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
922 emit_cmp(rs1,rs2);
923 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
924 emit_cmovl_imm(1,rt);
925}
e2b5e7aa 926
927static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 928{
929 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
930 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
931 emit_cmp(rs1,rs2);
932 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
933 emit_cmovb_imm(1,rt);
934}
e2b5e7aa 935
2a014d73 936static int can_jump_or_call(const void *a)
937{
938 intptr_t offset = (u_char *)a - out - 8;
939 return (-33554432 <= offset && offset < 33554432);
940}
941
643aeae3 942static void emit_call(const void *a_)
57871462 943{
643aeae3 944 int a = (int)a_;
d1e4ebd9 945 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 946 u_int offset=genjmp(a);
947 output_w32(0xeb000000|offset);
948}
e2b5e7aa 949
b14b6a8f 950static void emit_jmp(const void *a_)
57871462 951{
b14b6a8f 952 int a = (int)a_;
d1e4ebd9 953 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 954 u_int offset=genjmp(a);
955 output_w32(0xea000000|offset);
956}
e2b5e7aa 957
643aeae3 958static void emit_jne(const void *a_)
57871462 959{
643aeae3 960 int a = (int)a_;
57871462 961 assem_debug("bne %x\n",a);
962 u_int offset=genjmp(a);
963 output_w32(0x1a000000|offset);
964}
e2b5e7aa 965
7c3a5182 966static void emit_jeq(const void *a_)
57871462 967{
7c3a5182 968 int a = (int)a_;
57871462 969 assem_debug("beq %x\n",a);
970 u_int offset=genjmp(a);
971 output_w32(0x0a000000|offset);
972}
e2b5e7aa 973
7c3a5182 974static void emit_js(const void *a_)
57871462 975{
7c3a5182 976 int a = (int)a_;
57871462 977 assem_debug("bmi %x\n",a);
978 u_int offset=genjmp(a);
979 output_w32(0x4a000000|offset);
980}
e2b5e7aa 981
7c3a5182 982static void emit_jns(const void *a_)
57871462 983{
7c3a5182 984 int a = (int)a_;
57871462 985 assem_debug("bpl %x\n",a);
986 u_int offset=genjmp(a);
987 output_w32(0x5a000000|offset);
988}
e2b5e7aa 989
7c3a5182 990static void emit_jl(const void *a_)
57871462 991{
7c3a5182 992 int a = (int)a_;
57871462 993 assem_debug("blt %x\n",a);
994 u_int offset=genjmp(a);
995 output_w32(0xba000000|offset);
996}
e2b5e7aa 997
7c3a5182 998static void emit_jge(const void *a_)
57871462 999{
7c3a5182 1000 int a = (int)a_;
57871462 1001 assem_debug("bge %x\n",a);
1002 u_int offset=genjmp(a);
1003 output_w32(0xaa000000|offset);
1004}
e2b5e7aa 1005
7c3a5182 1006static void emit_jno(const void *a_)
57871462 1007{
7c3a5182 1008 int a = (int)a_;
57871462 1009 assem_debug("bvc %x\n",a);
1010 u_int offset=genjmp(a);
1011 output_w32(0x7a000000|offset);
1012}
e2b5e7aa 1013
7c3a5182 1014static void emit_jc(const void *a_)
57871462 1015{
7c3a5182 1016 int a = (int)a_;
57871462 1017 assem_debug("bcs %x\n",a);
1018 u_int offset=genjmp(a);
1019 output_w32(0x2a000000|offset);
1020}
e2b5e7aa 1021
7c3a5182 1022static void emit_jcc(const void *a_)
57871462 1023{
b14b6a8f 1024 int a = (int)a_;
57871462 1025 assem_debug("bcc %x\n",a);
1026 u_int offset=genjmp(a);
1027 output_w32(0x3a000000|offset);
1028}
1029
3968e69e 1030static unused void emit_callreg(u_int r)
57871462 1031{
c6c3b1b3 1032 assert(r<15);
1033 assem_debug("blx %s\n",regname[r]);
1034 output_w32(0xe12fff30|r);
57871462 1035}
e2b5e7aa 1036
1037static void emit_jmpreg(u_int r)
57871462 1038{
1039 assem_debug("mov pc,%s\n",regname[r]);
1040 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1041}
1042
be516ebe 1043static void emit_ret(void)
1044{
1045 emit_jmpreg(14);
1046}
1047
e2b5e7aa 1048static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1049{
1050 assert(offset>-4096&&offset<4096);
1051 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1052 if(offset>=0) {
1053 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1054 }else{
1055 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1056 }
1057}
e2b5e7aa 1058
1059static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1060{
1061 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1062 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1063}
39b71d9a 1064#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1065
1066static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1067{
1068 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1069 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1070}
e2b5e7aa 1071
1072static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1073{
1074 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1075 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1076}
e2b5e7aa 1077
37387d8b 1078static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1079{
1080 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1081 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1082}
1083
e2b5e7aa 1084static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1085{
1086 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1087 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1088}
e2b5e7aa 1089
37387d8b 1090static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1091{
1092 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1093 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1094}
1095
e2b5e7aa 1096static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1097{
1098 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1099 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1100}
e2b5e7aa 1101
37387d8b 1102static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1103{
1104 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1105 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1106}
1107
e2b5e7aa 1108static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1109{
1110 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1111 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1112}
e2b5e7aa 1113
37387d8b 1114static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1115{
1116 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1117 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1118}
1119
e2b5e7aa 1120static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1121{
1122 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1124}
1125
1126static void emit_str_dualindexed(int rs1, int rs2, int rt)
1127{
1128 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1129 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1130}
1131
1132static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1133{
1134 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1135 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1136}
1137
1138static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1139{
1140 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1141 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1142}
e2b5e7aa 1143
e2b5e7aa 1144static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1145{
1146 assert(offset>-256&&offset<256);
1147 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1148 if(offset>=0) {
1149 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1150 }else{
1151 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1152 }
1153}
e2b5e7aa 1154
e2b5e7aa 1155static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1156{
1157 assert(offset>-256&&offset<256);
1158 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1159 if(offset>=0) {
1160 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1161 }else{
1162 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1163 }
1164}
e2b5e7aa 1165
1166static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1167{
1168 assert(offset>-4096&&offset<4096);
1169 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1170 if(offset>=0) {
1171 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1172 }else{
1173 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1174 }
1175}
e2b5e7aa 1176
e2b5e7aa 1177static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1178{
1179 assert(offset>-256&&offset<256);
1180 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1181 if(offset>=0) {
1182 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1183 }else{
1184 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1185 }
1186}
e2b5e7aa 1187
054175e9 1188static void emit_ldrd(int offset, int rs, int rt)
1189{
1190 assert(offset>-256&&offset<256);
1191 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1192 if(offset>=0) {
1193 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1194 }else{
1195 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1196 }
1197}
e2b5e7aa 1198
643aeae3 1199static void emit_readword(void *addr, int rt)
57871462 1200{
643aeae3 1201 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1202 assert(offset<4096);
1203 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1204 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1205}
39b71d9a 1206#define emit_readptr emit_readword
e2b5e7aa 1207
e2b5e7aa 1208static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1209{
1210 assert(offset>-4096&&offset<4096);
1211 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1212 if(offset>=0) {
1213 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1214 }else{
1215 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1216 }
1217}
e2b5e7aa 1218
e2b5e7aa 1219static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1220{
1221 assert(offset>-256&&offset<256);
1222 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1223 if(offset>=0) {
1224 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1225 }else{
1226 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1227 }
1228}
e2b5e7aa 1229
1230static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1231{
1232 assert(offset>-4096&&offset<4096);
1233 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1234 if(offset>=0) {
1235 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1236 }else{
1237 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1238 }
1239}
e2b5e7aa 1240
e2b5e7aa 1241static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1242{
1243 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1244 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1245}
e2b5e7aa 1246
1247static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1248{
1249 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1250 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1251}
e2b5e7aa 1252
1253static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1254{
1255 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1256 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1257}
e2b5e7aa 1258
643aeae3 1259static void emit_writeword(int rt, void *addr)
57871462 1260{
643aeae3 1261 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1262 assert(offset<4096);
1263 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1264 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1265}
e2b5e7aa 1266
e2b5e7aa 1267static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1268{
1269 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1270 assert(rs1<16);
1271 assert(rs2<16);
1272 assert(hi<16);
1273 assert(lo<16);
1274 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1275}
e2b5e7aa 1276
1277static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1278{
1279 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1280 assert(rs1<16);
1281 assert(rs2<16);
1282 assert(hi<16);
1283 assert(lo<16);
1284 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1285}
1286
e2b5e7aa 1287static void emit_clz(int rs,int rt)
57871462 1288{
1289 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1290 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1291}
1292
e2b5e7aa 1293static void emit_subcs(int rs1,int rs2,int rt)
57871462 1294{
1295 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1296 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1297}
1298
e2b5e7aa 1299static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1300{
1301 assert(imm>0);
1302 assert(imm<32);
1303 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1305}
1306
e2b5e7aa 1307static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1308{
1309 assert(imm>0);
1310 assert(imm<32);
1311 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1312 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1313}
1314
e2b5e7aa 1315static void emit_negmi(int rs, int rt)
57871462 1316{
1317 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1318 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1319}
1320
e2b5e7aa 1321static void emit_negsmi(int rs, int rt)
57871462 1322{
1323 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1324 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1325}
1326
e2b5e7aa 1327static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1328{
1329 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1330 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1331}
1332
e2b5e7aa 1333static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1334{
1335 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1336 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1337}
1338
e2b5e7aa 1339static void emit_teq(int rs, int rt)
57871462 1340{
1341 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1342 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1343}
1344
3968e69e 1345static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1346{
1347 u_int armval;
cfbd3c6e 1348 genimm_checked(imm,&armval);
57871462 1349 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1351}
1352
57871462 1353// Conditionally select one of two immediates, optimizing for small code size
1354// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1355static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1356{
1357 u_int armval;
1358 if(genimm(imm2-imm1,&armval)) {
1359 emit_movimm(imm1,rt);
1360 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1361 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1362 }else if(genimm(imm1-imm2,&armval)) {
1363 emit_movimm(imm1,rt);
1364 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1365 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1366 }
1367 else {
665f33e1 1368 #ifndef HAVE_ARMV7
57871462 1369 emit_movimm(imm1,rt);
1370 add_literal((int)out,imm2);
1371 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1372 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1373 #else
1374 emit_movw(imm1&0x0000FFFF,rt);
1375 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1376 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1377 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1378 }
1379 emit_movt(imm1&0xFFFF0000,rt);
1380 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1381 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1382 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1383 }
1384 #endif
1385 }
1386}
1387
57871462 1388// special case for checking invalid_code
e2b5e7aa 1389static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1390{
1391 assert(imm<128&&imm>=0);
1392 assert(r>=0&&r<16);
1393 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1394 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1395 emit_cmpimm(HOST_TEMPREG,imm);
1396}
1397
e2b5e7aa 1398static void emit_callne(int a)
0bbd1454 1399{
1400 assem_debug("blne %x\n",a);
1401 u_int offset=genjmp(a);
1402 output_w32(0x1b000000|offset);
1403}
1404
57871462 1405// Used to preload hash table entries
e2b5e7aa 1406static unused void emit_prefetchreg(int r)
57871462 1407{
1408 assem_debug("pld %s\n",regname[r]);
1409 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1410}
1411
1412// Special case for mini_ht
e2b5e7aa 1413static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1414{
1415 assert(offset<4096);
1416 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1417 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1418}
1419
e2b5e7aa 1420static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1421{
1422 u_int armval;
cfbd3c6e 1423 genimm_checked(imm,&armval);
b9b61529 1424 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1425 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1426}
1427
e2b5e7aa 1428static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1429{
1430 u_int armval;
1431 genimm_checked(imm,&armval);
1432 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1433 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1434}
1435
e2b5e7aa 1436static void emit_jno_unlikely(int a)
57871462 1437{
1438 //emit_jno(a);
1439 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1440 output_w32(0x72800000|rd_rn_rm(15,15,0));
1441}
1442
054175e9 1443static void save_regs_all(u_int reglist)
57871462 1444{
054175e9 1445 int i;
57871462 1446 if(!reglist) return;
1447 assem_debug("stmia fp,{");
054175e9 1448 for(i=0;i<16;i++)
1449 if(reglist&(1<<i))
1450 assem_debug("r%d,",i);
57871462 1451 assem_debug("}\n");
1452 output_w32(0xe88b0000|reglist);
1453}
e2b5e7aa 1454
054175e9 1455static void restore_regs_all(u_int reglist)
57871462 1456{
054175e9 1457 int i;
57871462 1458 if(!reglist) return;
1459 assem_debug("ldmia fp,{");
054175e9 1460 for(i=0;i<16;i++)
1461 if(reglist&(1<<i))
1462 assem_debug("r%d,",i);
57871462 1463 assem_debug("}\n");
1464 output_w32(0xe89b0000|reglist);
1465}
e2b5e7aa 1466
054175e9 1467// Save registers before function call
1468static void save_regs(u_int reglist)
1469{
4d646738 1470 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1471 save_regs_all(reglist);
1472}
e2b5e7aa 1473
054175e9 1474// Restore registers after function call
1475static void restore_regs(u_int reglist)
1476{
4d646738 1477 reglist&=CALLER_SAVE_REGS;
054175e9 1478 restore_regs_all(reglist);
1479}
57871462 1480
57871462 1481/* Stubs/epilogue */
1482
e2b5e7aa 1483static void literal_pool(int n)
57871462 1484{
1485 if(!literalcount) return;
1486 if(n) {
1487 if((int)out-literals[0][0]<4096-n) return;
1488 }
1489 u_int *ptr;
1490 int i;
1491 for(i=0;i<literalcount;i++)
1492 {
77750690 1493 u_int l_addr=(u_int)out;
1494 int j;
1495 for(j=0;j<i;j++) {
1496 if(literals[j][1]==literals[i][1]) {
1497 //printf("dup %08x\n",literals[i][1]);
1498 l_addr=literals[j][0];
1499 break;
1500 }
1501 }
57871462 1502 ptr=(u_int *)literals[i][0];
77750690 1503 u_int offset=l_addr-(u_int)ptr-8;
57871462 1504 assert(offset<4096);
1505 assert(!(offset&3));
1506 *ptr|=offset;
77750690 1507 if(l_addr==(u_int)out) {
1508 literals[i][0]=l_addr; // remember for dupes
1509 output_w32(literals[i][1]);
1510 }
57871462 1511 }
1512 literalcount=0;
1513}
1514
e2b5e7aa 1515static void literal_pool_jumpover(int n)
57871462 1516{
1517 if(!literalcount) return;
1518 if(n) {
1519 if((int)out-literals[0][0]<4096-n) return;
1520 }
df4dc2b1 1521 void *jaddr = out;
57871462 1522 emit_jmp(0);
1523 literal_pool(0);
df4dc2b1 1524 set_jump_target(jaddr, out);
57871462 1525}
1526
7c3a5182 1527// parsed by get_pointer, find_extjump_insn
104df9d3 1528static void emit_extjump(u_char *addr, u_int target)
57871462 1529{
1530 u_char *ptr=(u_char *)addr;
1531 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1532 (void)ptr;
1533
57871462 1534 emit_loadlp(target,0);
643aeae3 1535 emit_loadlp((u_int)addr,1);
66ea165f 1536 assert(ndrc->translation_cache <= addr &&
1537 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
104df9d3 1538 emit_far_jump(dyna_linker);
57871462 1539}
1540
d1e4ebd9 1541static void check_extjump2(void *src)
1542{
1543 u_int *ptr = src;
1544 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1545 (void)ptr;
1546}
1547
13e35c04 1548// put rt_val into rt, potentially making use of rs with value rs_val
1549static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1550{
8575a877 1551 u_int armval;
1552 int diff;
1553 if(genimm(rt_val,&armval)) {
1554 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1555 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1556 return;
1557 }
1558 if(genimm(~rt_val,&armval)) {
1559 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1560 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1561 return;
1562 }
1563 diff=rt_val-rs_val;
1564 if(genimm(diff,&armval)) {
1565 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1566 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1567 return;
1568 }else if(genimm(-diff,&armval)) {
1569 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1570 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1571 return;
1572 }
1573 emit_movimm(rt_val,rt);
1574}
1575
1576// return 1 if above function can do it's job cheaply
1577static int is_similar_value(u_int v1,u_int v2)
1578{
13e35c04 1579 u_int xs;
8575a877 1580 int diff;
1581 if(v1==v2) return 1;
1582 diff=v2-v1;
1583 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1584 ;
8575a877 1585 if(xs<0x100) return 1;
1586 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1587 ;
1588 if(xs<0x100) return 1;
1589 return 0;
13e35c04 1590}
cbbab9cd 1591
b14b6a8f 1592static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1593{
1594 switch(type) {
1595 case LOADB_STUB: emit_signextend8(rs,rt); break;
1596 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1597 case LOADH_STUB: emit_signextend16(rs,rt); break;
1598 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1599 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1600 default: assert(0);
1601 }
1602}
1603
b1be1eee 1604#include "pcsxmem.h"
1605#include "pcsxmem_inline.c"
b1be1eee 1606
e2b5e7aa 1607static void do_readstub(int n)
57871462 1608{
b14b6a8f 1609 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1610 literal_pool(256);
b14b6a8f 1611 set_jump_target(stubs[n].addr, out);
1612 enum stub_type type=stubs[n].type;
1613 int i=stubs[n].a;
1614 int rs=stubs[n].b;
81dbbf4c 1615 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1616 u_int reglist=stubs[n].e;
81dbbf4c 1617 const signed char *i_regmap=i_regs->regmap;
581335b0 1618 int rt;
cf95b4f0 1619 if(dops[i].itype==C1LS||dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1620 rt=get_reg(i_regmap,FTEMP);
1621 }else{
cf95b4f0 1622 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1623 }
1624 assert(rs>=0);
df4dc2b1 1625 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1626 void *restore_jump = NULL;
c6c3b1b3 1627 reglist|=(1<<rs);
1628 for(r=0;r<=12;r++) {
1629 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1630 temp=r; break;
1631 }
1632 }
cf95b4f0 1633 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1634 reglist&=~(1<<rt);
1635 if(temp==-1) {
1636 save_regs(reglist);
1637 regs_saved=1;
1638 temp=(rs==0)?2:0;
1639 }
1640 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1641 temp2=1;
643aeae3 1642 emit_readword(&mem_rtab,temp);
c6c3b1b3 1643 emit_shrimm(rs,12,temp2);
1644 emit_readword_dualindexedx4(temp,temp2,temp2);
1645 emit_lsls_imm(temp2,1,temp2);
cf95b4f0 1646 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1647 switch(type) {
1648 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1649 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1650 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1651 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1652 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1653 default: assert(0);
c6c3b1b3 1654 }
1655 }
1656 if(regs_saved) {
df4dc2b1 1657 restore_jump=out;
c6c3b1b3 1658 emit_jcc(0); // jump to reg restore
1659 }
1660 else
b14b6a8f 1661 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1662
1663 if(!regs_saved)
1664 save_regs(reglist);
643aeae3 1665 void *handler=NULL;
c6c3b1b3 1666 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1667 handler=jump_handler_read8;
c6c3b1b3 1668 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1669 handler=jump_handler_read16;
c6c3b1b3 1670 if(type==LOADW_STUB)
643aeae3 1671 handler=jump_handler_read32;
1672 assert(handler);
b96d3df7 1673 pass_args(rs,temp2);
c6c3b1b3 1674 int cc=get_reg(i_regmap,CCREG);
1675 if(cc<0)
1676 emit_loadreg(CCREG,2);
2330734f 1677 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1678 emit_far_call(handler);
cf95b4f0 1679 if(dops[i].itype==C1LS||dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1680 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1681 }
1682 if(restore_jump)
df4dc2b1 1683 set_jump_target(restore_jump, out);
c6c3b1b3 1684 restore_regs(reglist);
b14b6a8f 1685 emit_jmp(stubs[n].retaddr); // return address
57871462 1686}
1687
81dbbf4c 1688static void inline_readstub(enum stub_type type, int i, u_int addr,
1689 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1690{
1691 int rs=get_reg(regmap,target);
57871462 1692 int rt=get_reg(regmap,target);
9de8a0c3 1693 if(rs<0) rs=get_reg_temp(regmap);
57871462 1694 assert(rs>=0);
2a014d73 1695 u_int is_dynamic;
687b4580 1696 uintptr_t host_addr = 0;
643aeae3 1697 void *handler;
b1be1eee 1698 int cc=get_reg(regmap,CCREG);
2330734f 1699 if(pcsx_direct_read(type,addr,adj,cc,target?rs:-1,rt))
b1be1eee 1700 return;
643aeae3 1701 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1702 if (handler == NULL) {
cf95b4f0 1703 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1704 return;
13e35c04 1705 if(addr!=host_addr)
1706 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 1707 switch(type) {
1708 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
1709 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
1710 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
1711 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
1712 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
1713 default: assert(0);
1714 }
1715 return;
1716 }
b1be1eee 1717 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1718 if(is_dynamic) {
1719 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1720 handler=jump_handler_read8;
b1be1eee 1721 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1722 handler=jump_handler_read16;
b1be1eee 1723 if(type==LOADW_STUB)
643aeae3 1724 handler=jump_handler_read32;
b1be1eee 1725 }
c6c3b1b3 1726
1727 // call a memhandler
cf95b4f0 1728 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1729 reglist&=~(1<<rt);
1730 save_regs(reglist);
1731 if(target==0)
1732 emit_movimm(addr,0);
1733 else if(rs!=0)
1734 emit_mov(rs,0);
b1be1eee 1735 if(cc<0)
1736 emit_loadreg(CCREG,2);
1737 if(is_dynamic) {
1738 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1739 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1740 }
b1be1eee 1741 else {
643aeae3 1742 emit_readword(&last_count,3);
2330734f 1743 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1744 emit_add(2,3,2);
643aeae3 1745 emit_writeword(2,&Count);
b1be1eee 1746 }
1747
2a014d73 1748 emit_far_call(handler);
b1be1eee 1749
cf95b4f0 1750 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1751 switch(type) {
1752 case LOADB_STUB: emit_signextend8(0,rt); break;
1753 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1754 case LOADH_STUB: emit_signextend16(0,rt); break;
1755 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1756 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1757 default: assert(0);
1758 }
1759 }
1760 restore_regs(reglist);
57871462 1761}
1762
e2b5e7aa 1763static void do_writestub(int n)
57871462 1764{
b14b6a8f 1765 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1766 literal_pool(256);
b14b6a8f 1767 set_jump_target(stubs[n].addr, out);
1768 enum stub_type type=stubs[n].type;
1769 int i=stubs[n].a;
1770 int rs=stubs[n].b;
81dbbf4c 1771 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1772 u_int reglist=stubs[n].e;
81dbbf4c 1773 const signed char *i_regmap=i_regs->regmap;
581335b0 1774 int rt,r;
cf95b4f0 1775 if(dops[i].itype==C1LS||dops[i].itype==C2LS) {
57871462 1776 rt=get_reg(i_regmap,r=FTEMP);
1777 }else{
cf95b4f0 1778 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1779 }
1780 assert(rs>=0);
1781 assert(rt>=0);
b14b6a8f 1782 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1783 void *restore_jump = NULL;
b96d3df7 1784 int reglist2=reglist|(1<<rs)|(1<<rt);
1785 for(rtmp=0;rtmp<=12;rtmp++) {
1786 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1787 temp=rtmp; break;
1788 }
1789 }
1790 if(temp==-1) {
1791 save_regs(reglist);
1792 regs_saved=1;
1793 for(rtmp=0;rtmp<=3;rtmp++)
1794 if(rtmp!=rs&&rtmp!=rt)
1795 {temp=rtmp;break;}
1796 }
1797 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1798 temp2=3;
643aeae3 1799 emit_readword(&mem_wtab,temp);
b96d3df7 1800 emit_shrimm(rs,12,temp2);
1801 emit_readword_dualindexedx4(temp,temp2,temp2);
1802 emit_lsls_imm(temp2,1,temp2);
1803 switch(type) {
1804 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1805 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1806 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1807 default: assert(0);
1808 }
1809 if(regs_saved) {
df4dc2b1 1810 restore_jump=out;
b96d3df7 1811 emit_jcc(0); // jump to reg restore
1812 }
1813 else
b14b6a8f 1814 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1815
1816 if(!regs_saved)
1817 save_regs(reglist);
643aeae3 1818 void *handler=NULL;
b96d3df7 1819 switch(type) {
643aeae3 1820 case STOREB_STUB: handler=jump_handler_write8; break;
1821 case STOREH_STUB: handler=jump_handler_write16; break;
1822 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1823 default: assert(0);
b96d3df7 1824 }
643aeae3 1825 assert(handler);
b96d3df7 1826 pass_args(rs,rt);
1827 if(temp2!=3)
1828 emit_mov(temp2,3);
1829 int cc=get_reg(i_regmap,CCREG);
1830 if(cc<0)
1831 emit_loadreg(CCREG,2);
2330734f 1832 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
b96d3df7 1833 // returns new cycle_count
2a014d73 1834 emit_far_call(handler);
2330734f 1835 emit_addimm(0,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1836 if(cc<0)
1837 emit_storereg(CCREG,2);
1838 if(restore_jump)
df4dc2b1 1839 set_jump_target(restore_jump, out);
b96d3df7 1840 restore_regs(reglist);
b14b6a8f 1841 emit_jmp(stubs[n].retaddr);
57871462 1842}
1843
81dbbf4c 1844static void inline_writestub(enum stub_type type, int i, u_int addr,
1845 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1846{
9de8a0c3 1847 int rs=get_reg_temp(regmap);
57871462 1848 int rt=get_reg(regmap,target);
1849 assert(rs>=0);
1850 assert(rt>=0);
687b4580 1851 uintptr_t host_addr = 0;
643aeae3 1852 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1853 if (handler == NULL) {
13e35c04 1854 if(addr!=host_addr)
1855 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 1856 switch(type) {
1857 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
1858 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
1859 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
1860 default: assert(0);
1861 }
1862 return;
1863 }
1864
1865 // call a memhandler
1866 save_regs(reglist);
13e35c04 1867 pass_args(rs,rt);
b96d3df7 1868 int cc=get_reg(regmap,CCREG);
1869 if(cc<0)
1870 emit_loadreg(CCREG,2);
2330734f 1871 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1872 emit_movimm((u_int)handler,3);
b96d3df7 1873 // returns new cycle_count
2a014d73 1874 emit_far_call(jump_handler_write_h);
2330734f 1875 emit_addimm(0,-adj,cc<0?2:cc);
b96d3df7 1876 if(cc<0)
1877 emit_storereg(CCREG,2);
1878 restore_regs(reglist);
57871462 1879}
1880
57871462 1881/* Special assem */
1882
81dbbf4c 1883static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 1884{
1885 save_regs_all(reglist);
32631e6a 1886 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 1887#ifdef PCNT
81dbbf4c 1888 emit_movimm(op, 0);
2a014d73 1889 emit_far_call(pcnt_gte_start);
82ed88eb 1890#endif
81dbbf4c 1891 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 1892}
1893
1894static void c2op_epilogue(u_int op,u_int reglist)
1895{
82ed88eb 1896#ifdef PCNT
1897 emit_movimm(op,0);
2a014d73 1898 emit_far_call(pcnt_gte_end);
82ed88eb 1899#endif
054175e9 1900 restore_regs_all(reglist);
1901}
1902
6c0eefaf 1903static void c2op_call_MACtoIR(int lm,int need_flags)
1904{
1905 if(need_flags)
2a014d73 1906 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 1907 else
2a014d73 1908 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 1909}
1910
1911static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1912{
2a014d73 1913 emit_far_call(func);
6c0eefaf 1914 // func is C code and trashes r0
1915 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1916 if(need_flags||need_ir)
1917 c2op_call_MACtoIR(lm,need_flags);
2a014d73 1918 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 1919}
1920
81dbbf4c 1921static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 1922{
81dbbf4c 1923 u_int c2op = source[i] & 0x3f;
1924 u_int reglist_full = get_host_reglist(i_regs->regmap);
1925 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1926 int need_flags, need_ir;
b9b61529 1927
1928 if (gte_handlers[c2op]!=NULL) {
bedfea38 1929 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 1930 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 1931 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
1932 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 1933 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 1934 need_flags=0;
6c0eefaf 1935 int shift = (source[i] >> 19) & 1;
1936 int lm = (source[i] >> 10) & 1;
054175e9 1937 switch(c2op) {
19776aef 1938#ifndef DRC_DBG
054175e9 1939 case GTE_MVMVA: {
82336ba3 1940#ifdef HAVE_ARMV5
054175e9 1941 int v = (source[i] >> 15) & 3;
1942 int cv = (source[i] >> 13) & 3;
1943 int mx = (source[i] >> 17) & 3;
4d646738 1944 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 1945 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 1946 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
1947 if(v<3)
1948 emit_ldrd(v*8,0,4);
1949 else {
1950 emit_movzwl_indexed(9*4,0,4); // gteIR
1951 emit_movzwl_indexed(10*4,0,6);
1952 emit_movzwl_indexed(11*4,0,5);
1953 emit_orrshl_imm(6,16,4);
1954 }
1955 if(mx<3)
1956 emit_addimm(0,32*4+mx*8*4,6);
1957 else
643aeae3 1958 emit_readword(&zeromem_ptr,6);
054175e9 1959 if(cv<3)
1960 emit_addimm(0,32*4+(cv*8+5)*4,7);
1961 else
643aeae3 1962 emit_readword(&zeromem_ptr,7);
054175e9 1963#ifdef __ARM_NEON__
1964 emit_movimm(source[i],1); // opcode
2a014d73 1965 emit_far_call(gteMVMVA_part_neon);
054175e9 1966 if(need_flags) {
1967 emit_movimm(lm,1);
2a014d73 1968 emit_far_call(gteMACtoIR_flags_neon);
054175e9 1969 }
1970#else
1971 if(cv==3&&shift)
33788798 1972 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 1973 else {
1974 emit_movimm(shift,1);
33788798 1975 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 1976 }
6c0eefaf 1977 if(need_flags||need_ir)
1978 c2op_call_MACtoIR(lm,need_flags);
82336ba3 1979#endif
1980#else /* if not HAVE_ARMV5 */
81dbbf4c 1981 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 1982 emit_movimm(source[i],1); // opcode
643aeae3 1983 emit_writeword(1,&psxRegs.code);
2a014d73 1984 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 1985#endif
1986 break;
1987 }
6c0eefaf 1988 case GTE_OP:
81dbbf4c 1989 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 1990 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 1991 if(need_flags||need_ir) {
1992 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1993 c2op_call_MACtoIR(lm,need_flags);
1994 }
1995 break;
1996 case GTE_DPCS:
81dbbf4c 1997 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 1998 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
1999 break;
2000 case GTE_INTPL:
81dbbf4c 2001 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2002 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2003 break;
2004 case GTE_SQR:
81dbbf4c 2005 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2006 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2007 if(need_flags||need_ir) {
2008 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2009 c2op_call_MACtoIR(lm,need_flags);
2010 }
2011 break;
2012 case GTE_DCPL:
81dbbf4c 2013 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2014 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2015 break;
2016 case GTE_GPF:
81dbbf4c 2017 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2018 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2019 break;
2020 case GTE_GPL:
81dbbf4c 2021 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2022 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2023 break;
19776aef 2024#endif
054175e9 2025 default:
81dbbf4c 2026 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2027#ifdef DRC_DBG
2028 emit_movimm(source[i],1); // opcode
643aeae3 2029 emit_writeword(1,&psxRegs.code);
19776aef 2030#endif
2a014d73 2031 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2032 break;
2033 }
2034 c2op_epilogue(c2op,reglist);
2035 }
b9b61529 2036}
2037
3968e69e 2038static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2039{
2040 //value = value & 0x7ffff000;
2041 //if (value & 0x7f87e000) value |= 0x80000000;
2042 emit_shrimm(sl,12,temp);
2043 emit_shlimm(temp,12,temp);
2044 emit_testimm(temp,0x7f000000);
2045 emit_testeqimm(temp,0x00870000);
2046 emit_testeqimm(temp,0x0000e000);
2047 emit_orrne_imm(temp,0x80000000,temp);
2048}
2049
2050static void do_mfc2_31_one(u_int copr,signed char temp)
2051{
2052 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2053 emit_lsls_imm(temp,16,temp);
2054 emit_cmovs_imm(0,temp);
2055 emit_cmpimm(temp,0xf80<<16);
2056 emit_andimm(temp,0xf80<<16,temp);
2057 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2058}
2059
2060static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2061{
2062 if (temp < 0) {
2063 host_tempreg_acquire();
2064 temp = HOST_TEMPREG;
2065 }
2066 do_mfc2_31_one(9,temp);
9c997d19 2067 emit_shrimm(temp,7+16,tl);
3968e69e 2068 do_mfc2_31_one(10,temp);
9c997d19 2069 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2070 do_mfc2_31_one(11,temp);
9c997d19 2071 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2072 emit_writeword(tl,&reg_cop2d[29]);
2073 if (temp == HOST_TEMPREG)
2074 host_tempreg_release();
2075}
2076
2330734f 2077static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2078{
2079 // case 0x18: MULT
2080 // case 0x19: MULTU
2081 // case 0x1A: DIV
2082 // case 0x1B: DIVU
2083 // case 0x1C: DMULT
2084 // case 0x1D: DMULTU
2085 // case 0x1E: DDIV
2086 // case 0x1F: DDIVU
cf95b4f0 2087 if(dops[i].rs1&&dops[i].rs2)
57871462 2088 {
cf95b4f0 2089 if((dops[i].opcode2&4)==0) // 32-bit
57871462 2090 {
cf95b4f0 2091 if(dops[i].opcode2==0x18) // MULT
57871462 2092 {
cf95b4f0 2093 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2094 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2095 signed char hi=get_reg(i_regs->regmap,HIREG);
2096 signed char lo=get_reg(i_regs->regmap,LOREG);
2097 assert(m1>=0);
2098 assert(m2>=0);
2099 assert(hi>=0);
2100 assert(lo>=0);
2101 emit_smull(m1,m2,hi,lo);
2102 }
cf95b4f0 2103 if(dops[i].opcode2==0x19) // MULTU
57871462 2104 {
cf95b4f0 2105 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2106 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2107 signed char hi=get_reg(i_regs->regmap,HIREG);
2108 signed char lo=get_reg(i_regs->regmap,LOREG);
2109 assert(m1>=0);
2110 assert(m2>=0);
2111 assert(hi>=0);
2112 assert(lo>=0);
2113 emit_umull(m1,m2,hi,lo);
2114 }
cf95b4f0 2115 if(dops[i].opcode2==0x1A) // DIV
57871462 2116 {
cf95b4f0 2117 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2118 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2119 assert(d1>=0);
2120 assert(d2>=0);
2121 signed char quotient=get_reg(i_regs->regmap,LOREG);
2122 signed char remainder=get_reg(i_regs->regmap,HIREG);
2123 assert(quotient>=0);
2124 assert(remainder>=0);
2125 emit_movs(d1,remainder);
44a80f6a 2126 emit_movimm(0xffffffff,quotient);
2127 emit_negmi(quotient,quotient); // .. quotient and ..
2128 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2129 emit_movs(d2,HOST_TEMPREG);
7c3a5182 2130 emit_jeq(out+52); // Division by zero
82336ba3 2131 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2132#ifdef HAVE_ARMV5
57871462 2133 emit_clz(HOST_TEMPREG,quotient);
2134 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 2135#else
2136 emit_movimm(0,quotient);
2137 emit_addpl_imm(quotient,1,quotient);
2138 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2139 emit_jns(out-2*4);
665f33e1 2140#endif
57871462 2141 emit_orimm(quotient,1<<31,quotient);
2142 emit_shr(quotient,quotient,quotient);
2143 emit_cmp(remainder,HOST_TEMPREG);
2144 emit_subcs(remainder,HOST_TEMPREG,remainder);
2145 emit_adcs(quotient,quotient,quotient);
2146 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2147 emit_jcc(out-16); // -4
57871462 2148 emit_teq(d1,d2);
2149 emit_negmi(quotient,quotient);
2150 emit_test(d1,d1);
2151 emit_negmi(remainder,remainder);
2152 }
cf95b4f0 2153 if(dops[i].opcode2==0x1B) // DIVU
57871462 2154 {
cf95b4f0 2155 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2156 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2157 assert(d1>=0);
2158 assert(d2>=0);
2159 signed char quotient=get_reg(i_regs->regmap,LOREG);
2160 signed char remainder=get_reg(i_regs->regmap,HIREG);
2161 assert(quotient>=0);
2162 assert(remainder>=0);
44a80f6a 2163 emit_mov(d1,remainder);
2164 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2165 emit_test(d2,d2);
7c3a5182 2166 emit_jeq(out+40); // Division by zero
665f33e1 2167#ifdef HAVE_ARMV5
57871462 2168 emit_clz(d2,HOST_TEMPREG);
2169 emit_movimm(1<<31,quotient);
2170 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2171#else
2172 emit_movimm(0,HOST_TEMPREG);
82336ba3 2173 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2174 emit_lslpls_imm(d2,1,d2);
7c3a5182 2175 emit_jns(out-2*4);
665f33e1 2176 emit_movimm(1<<31,quotient);
2177#endif
57871462 2178 emit_shr(quotient,HOST_TEMPREG,quotient);
2179 emit_cmp(remainder,d2);
2180 emit_subcs(remainder,d2,remainder);
2181 emit_adcs(quotient,quotient,quotient);
2182 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2183 emit_jcc(out-16); // -4
57871462 2184 }
2185 }
2186 else // 64-bit
71e490c5 2187 assert(0);
57871462 2188 }
2189 else
2190 {
2191 // Multiply by zero is zero.
2192 // MIPS does not have a divide by zero exception.
2193 // The result is undefined, we return zero.
2194 signed char hr=get_reg(i_regs->regmap,HIREG);
2195 signed char lr=get_reg(i_regs->regmap,LOREG);
2196 if(hr>=0) emit_zeroreg(hr);
2197 if(lr>=0) emit_zeroreg(lr);
2198 }
2199}
2200#define multdiv_assemble multdiv_assemble_arm
2201
d1e4ebd9 2202static void do_jump_vaddr(int rs)
2203{
2a014d73 2204 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2205}
2206
e2b5e7aa 2207static void do_preload_rhash(int r) {
57871462 2208 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2209 // register. On ARM the hash can be done with a single instruction (below)
2210}
2211
e2b5e7aa 2212static void do_preload_rhtbl(int ht) {
57871462 2213 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2214}
2215
e2b5e7aa 2216static void do_rhash(int rs,int rh) {
57871462 2217 emit_andimm(rs,0xf8,rh);
2218}
2219
e2b5e7aa 2220static void do_miniht_load(int ht,int rh) {
57871462 2221 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2222 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2223}
2224
e2b5e7aa 2225static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2226 emit_cmp(rh,rs);
2227 emit_ldreq_indexed(ht,4,15);
2228 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2229 if(rs!=7)
2230 emit_mov(rs,7);
2231 rs=7;
57871462 2232 #endif
d1e4ebd9 2233 do_jump_vaddr(rs);
57871462 2234}
2235
e2b5e7aa 2236static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2237 #ifndef HAVE_ARMV7
57871462 2238 emit_movimm(return_address,rt); // PC into link register
643aeae3 2239 add_to_linker(out,return_address,1);
57871462 2240 emit_pcreladdr(temp);
643aeae3 2241 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2242 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2243 #else
2244 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2245 add_to_linker(out,return_address,1);
57871462 2246 emit_pcreladdr(temp);
643aeae3 2247 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2248 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2249 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2250 #endif
2251}
2252
57871462 2253// CPU-architecture-specific initialization
2a014d73 2254static void arch_init(void)
2255{
2256 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2257 struct tramp_insns *ops = ndrc->tramp.ops;
2258 size_t i;
2259 assert(!(diff & 3));
2260 assert(diff < 0x1000);
2261 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2262 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2263 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2264 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2265}
b9b61529 2266
2267// vim:shiftwidth=2:expandtab