cdrom: change pause timing again
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
2a014d73 4 * Copyright (C) 2010-2021 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#define FLAGLESS
23#include "../gte.h"
24#undef FLAGLESS
054175e9 25#include "../gte_arm.h"
26#include "../gte_neon.h"
27#include "pcnt.h"
665f33e1 28#include "arm_features.h"
054175e9 29
dd114d7d 30#ifdef DRC_DBG
31#pragma GCC diagnostic ignored "-Wunused-function"
32#pragma GCC diagnostic ignored "-Wunused-variable"
33#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
34#endif
35
57871462 36void indirect_jump_indexed();
37void indirect_jump();
38void do_interrupt();
39void jump_vaddr_r0();
40void jump_vaddr_r1();
41void jump_vaddr_r2();
42void jump_vaddr_r3();
43void jump_vaddr_r4();
44void jump_vaddr_r5();
45void jump_vaddr_r6();
46void jump_vaddr_r7();
47void jump_vaddr_r8();
48void jump_vaddr_r9();
49void jump_vaddr_r10();
50void jump_vaddr_r12();
51
b14b6a8f 52void * const jump_vaddr_reg[16] = {
53 jump_vaddr_r0,
54 jump_vaddr_r1,
55 jump_vaddr_r2,
56 jump_vaddr_r3,
57 jump_vaddr_r4,
58 jump_vaddr_r5,
59 jump_vaddr_r6,
60 jump_vaddr_r7,
61 jump_vaddr_r8,
62 jump_vaddr_r9,
63 jump_vaddr_r10,
57871462 64 0,
b14b6a8f 65 jump_vaddr_r12,
57871462 66 0,
67 0,
b14b6a8f 68 0
69};
57871462 70
0bbd1454 71void invalidate_addr_r0();
72void invalidate_addr_r1();
73void invalidate_addr_r2();
74void invalidate_addr_r3();
75void invalidate_addr_r4();
76void invalidate_addr_r5();
77void invalidate_addr_r6();
78void invalidate_addr_r7();
79void invalidate_addr_r8();
80void invalidate_addr_r9();
81void invalidate_addr_r10();
82void invalidate_addr_r12();
83
84const u_int invalidate_addr_reg[16] = {
85 (int)invalidate_addr_r0,
86 (int)invalidate_addr_r1,
87 (int)invalidate_addr_r2,
88 (int)invalidate_addr_r3,
89 (int)invalidate_addr_r4,
90 (int)invalidate_addr_r5,
91 (int)invalidate_addr_r6,
92 (int)invalidate_addr_r7,
93 (int)invalidate_addr_r8,
94 (int)invalidate_addr_r9,
95 (int)invalidate_addr_r10,
96 0,
97 (int)invalidate_addr_r12,
98 0,
99 0,
100 0};
101
57871462 102/* Linker */
103
df4dc2b1 104static void set_jump_target(void *addr, void *target_)
57871462 105{
df4dc2b1 106 u_int target = (u_int)target_;
107 u_char *ptr = addr;
57871462 108 u_int *ptr2=(u_int *)ptr;
109 if(ptr[3]==0xe2) {
110 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 111 assert(((uintptr_t)addr&3)==0);
57871462 112 assert((target&3)==0);
113 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 114 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 115 }
116 else if(ptr[3]==0x72) {
117 // generated by emit_jno_unlikely
118 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 119 assert(((uintptr_t)addr&3)==0);
57871462 120 assert((target&3)==0);
121 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
122 }
123 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 124 assert(((uintptr_t)addr&3)==0);
57871462 125 assert((target&3)==0);
126 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
127 }
128 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
129 }
130 else {
131 assert((ptr[3]&0x0e)==0xa);
132 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
133 }
134}
135
136// This optionally copies the instruction from the target of the branch into
137// the space before the branch. Works, but the difference in speed is
138// usually insignificant.
e2b5e7aa 139#if 0
140static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 141{
142 u_char *ptr=(u_char *)addr;
143 u_int *ptr2=(u_int *)ptr;
144 assert(!copy||ptr2[-1]==0xe28dd000);
145 if(ptr[3]==0xe2) {
146 assert(!copy);
147 assert((target-(u_int)ptr2-8)<4096);
148 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
149 }
150 else {
151 assert((ptr[3]&0x0e)==0xa);
152 u_int target_insn=*(u_int *)target;
153 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
154 copy=0;
155 }
156 if((target_insn&0x0c100000)==0x04100000) { // Load
157 copy=0;
158 }
159 if(target_insn&0x08000000) {
160 copy=0;
161 }
162 if(copy) {
163 ptr2[-1]=target_insn;
164 target+=4;
165 }
166 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
167 }
168}
e2b5e7aa 169#endif
57871462 170
171/* Literal pool */
e2b5e7aa 172static void add_literal(int addr,int val)
57871462 173{
15776b68 174 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 175 literals[literalcount][0]=addr;
176 literals[literalcount][1]=val;
9f51b4b9 177 literalcount++;
178}
57871462 179
d148d265 180// from a pointer to external jump stub (which was produced by emit_extjump2)
181// find where the jumping insn is
182static void *find_extjump_insn(void *stub)
57871462 183{
184 int *ptr=(int *)(stub+4);
d148d265 185 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 186 u_int offset=*ptr&0xfff;
d148d265 187 void **l_ptr=(void *)ptr+offset+8;
188 return *l_ptr;
57871462 189}
190
f968d35d 191// find where external branch is liked to using addr of it's stub:
192// get address that insn one after stub loads (dyna_linker arg1),
193// treat it as a pointer to branch insn,
194// return addr where that branch jumps to
104df9d3 195#if 0
643aeae3 196static void *get_pointer(void *stub)
57871462 197{
198 //printf("get_pointer(%x)\n",(int)stub);
d148d265 199 int *i_ptr=find_extjump_insn(stub);
3d680478 200 assert((*i_ptr&0x0f000000)==0x0a000000); // b
643aeae3 201 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 202}
104df9d3 203#endif
57871462 204
57871462 205// Allocate a specific ARM register.
e2b5e7aa 206static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 207{
208 int n;
f776eb14 209 int dirty=0;
9f51b4b9 210
57871462 211 // see if it's already allocated (and dealloc it)
212 for(n=0;n<HOST_REGS;n++)
213 {
f776eb14 214 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
215 dirty=(cur->dirty>>n)&1;
216 cur->regmap[n]=-1;
217 }
57871462 218 }
9f51b4b9 219
90f98e7c 220 assert(n == hr || cur->regmap[hr] < 0 || !((cur->noevict >> hr) & 1));
221 cur->regmap[hr] = reg;
222 cur->dirty &= ~(1 << hr);
223 cur->dirty |= dirty << hr;
224 cur->isconst &= ~(1u << hr);
225 cur->noevict |= 1u << hr;
57871462 226}
227
228// Alloc cycle count into dedicated register
90f98e7c 229static void alloc_cc(struct regstat *cur, int i)
57871462 230{
90f98e7c 231 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
232}
233
234static void alloc_cc_optional(struct regstat *cur, int i)
235{
236 if (cur->regmap[HOST_CCREG] < 0) {
237 alloc_arm_reg(cur, i, CCREG, HOST_CCREG);
238 cur->noevict &= ~(1u << HOST_CCREG);
239 }
57871462 240}
241
57871462 242/* Assembler */
243
e2b5e7aa 244static unused char regname[16][4] = {
57871462 245 "r0",
246 "r1",
247 "r2",
248 "r3",
249 "r4",
250 "r5",
251 "r6",
252 "r7",
253 "r8",
254 "r9",
255 "r10",
256 "fp",
257 "r12",
258 "sp",
259 "lr",
260 "pc"};
261
e2b5e7aa 262static void output_w32(u_int word)
57871462 263{
264 *((u_int *)out)=word;
265 out+=4;
266}
e2b5e7aa 267
268static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 269{
270 assert(rd<16);
271 assert(rn<16);
272 assert(rm<16);
273 return((rn<<16)|(rd<<12)|rm);
274}
e2b5e7aa 275
276static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 277{
278 assert(rd<16);
279 assert(rn<16);
280 assert(imm<256);
281 assert((shift&1)==0);
282 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
283}
e2b5e7aa 284
285static u_int genimm(u_int imm,u_int *encoded)
57871462 286{
c2e3bd42 287 *encoded=0;
288 if(imm==0) return 1;
57871462 289 int i=32;
290 while(i>0)
291 {
292 if(imm<256) {
293 *encoded=((i&30)<<7)|imm;
294 return 1;
295 }
296 imm=(imm>>2)|(imm<<30);i-=2;
297 }
298 return 0;
299}
e2b5e7aa 300
301static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 302{
303 u_int ret=genimm(imm,encoded);
304 assert(ret);
581335b0 305 (void)ret;
cfbd3c6e 306}
e2b5e7aa 307
308static u_int genjmp(u_int addr)
57871462 309{
7c3a5182 310 if (addr < 3) return 0; // a branch that will be patched later
311 int offset = addr-(int)out-8;
312 if (offset < -33554432 || offset >= 33554432) {
313 SysPrintf("genjmp: out of range: %08x\n", offset);
314 abort();
e80343e2 315 return 0;
316 }
57871462 317 return ((u_int)offset>>2)&0xffffff;
318}
319
d1e4ebd9 320static unused void emit_breakpoint(void)
321{
322 assem_debug("bkpt #0\n");
323 //output_w32(0xe1200070);
324 output_w32(0xe7f001f0);
325}
326
e2b5e7aa 327static void emit_mov(int rs,int rt)
57871462 328{
329 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
330 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
331}
332
e2b5e7aa 333static void emit_movs(int rs,int rt)
57871462 334{
335 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
336 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
337}
338
e2b5e7aa 339static void emit_add(int rs1,int rs2,int rt)
57871462 340{
341 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
342 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
343}
344
39b71d9a 345static void emit_adds(int rs1,int rs2,int rt)
346{
347 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
348 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
349}
350#define emit_adds_ptr emit_adds
351
e2b5e7aa 352static void emit_adcs(int rs1,int rs2,int rt)
57871462 353{
354 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
355 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
356}
357
e2b5e7aa 358static void emit_neg(int rs, int rt)
57871462 359{
360 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
361 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
362}
363
a5cd72d0 364static void emit_negs(int rs, int rt)
365{
366 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
367 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
368}
369
e2b5e7aa 370static void emit_sub(int rs1,int rs2,int rt)
57871462 371{
372 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
373 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
374}
375
a5cd72d0 376static void emit_subs(int rs1,int rs2,int rt)
377{
378 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
379 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
380}
381
e2b5e7aa 382static void emit_zeroreg(int rt)
57871462 383{
384 assem_debug("mov %s,#0\n",regname[rt]);
385 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
386}
387
e2b5e7aa 388static void emit_loadlp(u_int imm,u_int rt)
790ee18e 389{
390 add_literal((int)out,imm);
391 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
392 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
393}
e2b5e7aa 394
33788798 395#ifdef HAVE_ARMV7
e2b5e7aa 396static void emit_movw(u_int imm,u_int rt)
790ee18e 397{
398 assert(imm<65536);
399 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
400 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
401}
e2b5e7aa 402
403static void emit_movt(u_int imm,u_int rt)
790ee18e 404{
405 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
406 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
407}
33788798 408#endif
e2b5e7aa 409
410static void emit_movimm(u_int imm,u_int rt)
790ee18e 411{
412 u_int armval;
413 if(genimm(imm,&armval)) {
414 assem_debug("mov %s,#%d\n",regname[rt],imm);
415 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
416 }else if(genimm(~imm,&armval)) {
417 assem_debug("mvn %s,#%d\n",regname[rt],imm);
418 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
419 }else if(imm<65536) {
665f33e1 420 #ifndef HAVE_ARMV7
790ee18e 421 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
422 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
423 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
424 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
425 #else
426 emit_movw(imm,rt);
427 #endif
428 }else{
665f33e1 429 #ifndef HAVE_ARMV7
790ee18e 430 emit_loadlp(imm,rt);
431 #else
432 emit_movw(imm&0x0000FFFF,rt);
433 emit_movt(imm&0xFFFF0000,rt);
434 #endif
435 }
436}
e2b5e7aa 437
438static void emit_pcreladdr(u_int rt)
790ee18e 439{
440 assem_debug("add %s,pc,#?\n",regname[rt]);
441 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
442}
443
e2b5e7aa 444static void emit_loadreg(int r, int hr)
57871462 445{
53358c1d 446 assert(hr != EXCLUDE_REG);
447 if (r == 0)
57871462 448 emit_zeroreg(hr);
449 else {
33788798 450 void *addr;
7c3a5182 451 switch (r) {
452 //case HIREG: addr = &hi; break;
453 //case LOREG: addr = &lo; break;
33788798 454 case CCREG: addr = &cycle_count; break;
33788798 455 case INVCP: addr = &invc_ptr; break;
456 case ROREG: addr = &ram_offset; break;
457 default:
458 assert(r < 34);
459 addr = &psxRegs.GPR.r[r];
460 break;
7c3a5182 461 }
33788798 462 u_int offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 463 assert(offset<4096);
6cc8d23c 464 assem_debug("ldr %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 465 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
466 }
467}
e2b5e7aa 468
469static void emit_storereg(int r, int hr)
57871462 470{
53358c1d 471 assert(hr != EXCLUDE_REG);
7c3a5182 472 int addr = (int)&psxRegs.GPR.r[r];
473 switch (r) {
474 //case HIREG: addr = &hi; break;
475 //case LOREG: addr = &lo; break;
476 case CCREG: addr = (int)&cycle_count; break;
477 default: assert(r < 34); break;
478 }
57871462 479 u_int offset = addr-(u_int)&dynarec_local;
480 assert(offset<4096);
6cc8d23c 481 assem_debug("str %s,fp+%d # r%d\n",regname[hr],offset,r);
57871462 482 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
483}
484
e2b5e7aa 485static void emit_test(int rs, int rt)
57871462 486{
487 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
488 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
489}
490
e2b5e7aa 491static void emit_testimm(int rs,int imm)
57871462 492{
493 u_int armval;
5a05d80c 494 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 495 genimm_checked(imm,&armval);
57871462 496 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
497}
498
e2b5e7aa 499static void emit_testeqimm(int rs,int imm)
b9b61529 500{
501 u_int armval;
502 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 503 genimm_checked(imm,&armval);
b9b61529 504 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
505}
506
e2b5e7aa 507static void emit_not(int rs,int rt)
57871462 508{
509 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
510 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
511}
512
a5cd72d0 513static void emit_mvneq(int rs,int rt)
514{
515 assem_debug("mvneq %s,%s\n",regname[rt],regname[rs]);
516 output_w32(0x01e00000|rd_rn_rm(rt,0,rs));
517}
518
e2b5e7aa 519static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 520{
521 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
522 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
523}
524
e2b5e7aa 525static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 526{
527 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
528 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
529}
e2b5e7aa 530
e2b5e7aa 531static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 532{
533 assert(rs<16);
534 assert(rt<16);
535 assert(imm<32);
536 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
537 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
538}
539
e2b5e7aa 540static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 541{
542 assert(rs<16);
543 assert(rt<16);
544 assert(imm<32);
545 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
546 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
547}
548
e2b5e7aa 549static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 550{
551 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
552 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
553}
554
3968e69e 555static void emit_xorsar_imm(u_int rs1,u_int rs2,u_int imm,u_int rt)
556{
557 assem_debug("eor %s,%s,%s,asr #%d\n",regname[rt],regname[rs1],regname[rs2],imm);
558 output_w32(0xe0200040|rd_rn_rm(rt,rs1,rs2)|(imm<<7));
559}
560
e2b5e7aa 561static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 562{
563 assert(rs<16);
564 assert(rt<16);
565 if(imm!=0) {
57871462 566 u_int armval;
567 if(genimm(imm,&armval)) {
568 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
569 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
570 }else if(genimm(-imm,&armval)) {
8a0a8423 571 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 572 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 573 #ifdef HAVE_ARMV7
574 }else if(rt!=rs&&(u_int)imm<65536) {
575 emit_movw(imm&0x0000ffff,rt);
576 emit_add(rs,rt,rt);
577 }else if(rt!=rs&&(u_int)-imm<65536) {
578 emit_movw(-imm&0x0000ffff,rt);
579 emit_sub(rs,rt,rt);
580 #endif
581 }else if((u_int)-imm<65536) {
57871462 582 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
583 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
584 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
585 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 586 }else {
587 do {
588 int shift = (ffs(imm) - 1) & ~1;
589 int imm8 = imm & (0xff << shift);
590 genimm_checked(imm8,&armval);
591 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
592 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
593 rs = rt;
594 imm &= ~imm8;
595 }
596 while (imm != 0);
57871462 597 }
598 }
599 else if(rs!=rt) emit_mov(rs,rt);
600}
601
bc7c5acb 602static void emit_addimm_ptr(u_int rs, uintptr_t imm, u_int rt)
603{
604 emit_addimm(rs, imm, rt);
605}
606
a5cd72d0 607static void emit_addimm_and_set_flags3(u_int rs, int imm, u_int rt)
57871462 608{
609 assert(imm>-65536&&imm<65536);
610 u_int armval;
a5cd72d0 611 if (genimm(imm, &armval)) {
612 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rs],imm);
613 output_w32(0xe2900000|rd_rn_rm(rt,rs,0)|armval);
614 } else if (genimm(-imm, &armval)) {
615 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rs],imm);
616 output_w32(0xe2500000|rd_rn_rm(rt,rs,0)|armval);
617 } else if (rs != rt) {
618 emit_movimm(imm, rt);
619 emit_adds(rs, rt, rt);
620 } else if (imm < 0) {
621 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
57871462 622 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
a5cd72d0 623 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
57871462 624 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
a5cd72d0 625 } else {
626 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
57871462 627 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
a5cd72d0 628 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
57871462 629 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
630 }
631}
e2b5e7aa 632
a5cd72d0 633static void emit_addimm_and_set_flags(int imm, u_int rt)
634{
635 emit_addimm_and_set_flags3(rt, imm, rt);
636}
637
e2b5e7aa 638static void emit_addnop(u_int r)
57871462 639{
640 assert(r<16);
641 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
642 output_w32(0xe2800000|rd_rn_rm(r,r,0));
643}
644
e2b5e7aa 645static void emit_andimm(int rs,int imm,int rt)
57871462 646{
647 u_int armval;
790ee18e 648 if(imm==0) {
649 emit_zeroreg(rt);
650 }else if(genimm(imm,&armval)) {
57871462 651 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
652 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
653 }else if(genimm(~imm,&armval)) {
654 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
655 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
656 }else if(imm==65535) {
332a4533 657 #ifndef HAVE_ARMV6
57871462 658 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
659 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
660 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
661 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
662 #else
663 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
664 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
665 #endif
666 }else{
667 assert(imm>0&&imm<65535);
665f33e1 668 #ifndef HAVE_ARMV7
57871462 669 assem_debug("mov r14,#%d\n",imm&0xFF00);
670 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
671 assem_debug("add r14,r14,#%d\n",imm&0xFF);
672 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
673 #else
674 emit_movw(imm,HOST_TEMPREG);
675 #endif
676 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
677 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
678 }
679}
680
e2b5e7aa 681static void emit_orimm(int rs,int imm,int rt)
57871462 682{
683 u_int armval;
790ee18e 684 if(imm==0) {
685 if(rs!=rt) emit_mov(rs,rt);
686 }else if(genimm(imm,&armval)) {
57871462 687 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
688 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
689 }else{
690 assert(imm>0&&imm<65536);
691 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
692 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
693 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
694 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
695 }
696}
697
e2b5e7aa 698static void emit_xorimm(int rs,int imm,int rt)
57871462 699{
57871462 700 u_int armval;
790ee18e 701 if(imm==0) {
702 if(rs!=rt) emit_mov(rs,rt);
703 }else if(genimm(imm,&armval)) {
57871462 704 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
705 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
706 }else{
514ed0d9 707 assert(imm>0&&imm<65536);
57871462 708 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
709 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
710 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
711 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
712 }
713}
714
e2b5e7aa 715static void emit_shlimm(int rs,u_int imm,int rt)
57871462 716{
717 assert(imm>0);
718 assert(imm<32);
719 //if(imm==1) ...
720 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
721 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
722}
723
e2b5e7aa 724static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 725{
726 assert(imm>0);
727 assert(imm<32);
728 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
729 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
730}
731
e2b5e7aa 732static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 733{
734 assert(imm>0);
735 assert(imm<32);
736 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
737 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
738}
739
e2b5e7aa 740static void emit_shrimm(int rs,u_int imm,int rt)
57871462 741{
742 assert(imm>0);
743 assert(imm<32);
744 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
745 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
746}
747
e2b5e7aa 748static void emit_sarimm(int rs,u_int imm,int rt)
57871462 749{
750 assert(imm>0);
751 assert(imm<32);
752 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
753 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
754}
755
e2b5e7aa 756static void emit_rorimm(int rs,u_int imm,int rt)
57871462 757{
758 assert(imm>0);
759 assert(imm<32);
760 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
761 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
762}
763
e2b5e7aa 764static void emit_signextend16(int rs,int rt)
b9b61529 765{
332a4533 766 #ifndef HAVE_ARMV6
b9b61529 767 emit_shlimm(rs,16,rt);
768 emit_sarimm(rt,16,rt);
769 #else
770 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
771 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
772 #endif
773}
774
e2b5e7aa 775static void emit_signextend8(int rs,int rt)
c6c3b1b3 776{
332a4533 777 #ifndef HAVE_ARMV6
c6c3b1b3 778 emit_shlimm(rs,24,rt);
779 emit_sarimm(rt,24,rt);
780 #else
781 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
782 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
783 #endif
784}
785
e2b5e7aa 786static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 787{
788 assert(rs<16);
789 assert(rt<16);
790 assert(shift<16);
791 //if(imm==1) ...
792 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
793 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
794}
e2b5e7aa 795
796static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 797{
798 assert(rs<16);
799 assert(rt<16);
800 assert(shift<16);
801 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
802 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
803}
e2b5e7aa 804
805static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 806{
807 assert(rs<16);
808 assert(rt<16);
809 assert(shift<16);
810 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
811 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
812}
57871462 813
3968e69e 814static unused void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 815{
816 assert(rs<16);
817 assert(rt<16);
818 assert(shift<16);
819 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
820 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
821}
e2b5e7aa 822
3968e69e 823static unused void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 824{
825 assert(rs<16);
826 assert(rt<16);
827 assert(shift<16);
828 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
829 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
830}
831
e2b5e7aa 832static void emit_cmpimm(int rs,int imm)
57871462 833{
834 u_int armval;
835 if(genimm(imm,&armval)) {
5a05d80c 836 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 837 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
838 }else if(genimm(-imm,&armval)) {
5a05d80c 839 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 840 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
841 }else if(imm>0) {
842 assert(imm<65536);
57871462 843 emit_movimm(imm,HOST_TEMPREG);
57871462 844 assem_debug("cmp %s,r14\n",regname[rs]);
845 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
846 }else{
847 assert(imm>-65536);
57871462 848 emit_movimm(-imm,HOST_TEMPREG);
57871462 849 assem_debug("cmn %s,r14\n",regname[rs]);
850 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
851 }
852}
853
e2b5e7aa 854static void emit_cmovne_imm(int imm,int rt)
57871462 855{
856 assem_debug("movne %s,#%d\n",regname[rt],imm);
857 u_int armval;
cfbd3c6e 858 genimm_checked(imm,&armval);
57871462 859 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
860}
e2b5e7aa 861
862static void emit_cmovl_imm(int imm,int rt)
57871462 863{
864 assem_debug("movlt %s,#%d\n",regname[rt],imm);
865 u_int armval;
cfbd3c6e 866 genimm_checked(imm,&armval);
57871462 867 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
868}
e2b5e7aa 869
870static void emit_cmovb_imm(int imm,int rt)
57871462 871{
872 assem_debug("movcc %s,#%d\n",regname[rt],imm);
873 u_int armval;
cfbd3c6e 874 genimm_checked(imm,&armval);
57871462 875 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
876}
e2b5e7aa 877
3968e69e 878static void emit_cmovae_imm(int imm,int rt)
879{
880 assem_debug("movcs %s,#%d\n",regname[rt],imm);
881 u_int armval;
882 genimm_checked(imm,&armval);
883 output_w32(0x23a00000|rd_rn_rm(rt,0,0)|armval);
884}
885
9c997d19 886static void emit_cmovs_imm(int imm,int rt)
887{
888 assem_debug("movmi %s,#%d\n",regname[rt],imm);
889 u_int armval;
890 genimm_checked(imm,&armval);
891 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
892}
893
de6dbc52 894static unused void emit_cmovne_reg(int rs,int rt)
57871462 895{
896 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
897 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
898}
e2b5e7aa 899
900static void emit_cmovl_reg(int rs,int rt)
57871462 901{
902 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
903 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
904}
e2b5e7aa 905
e3c6bdb5 906static void emit_cmovb_reg(int rs,int rt)
907{
908 assem_debug("movcc %s,%s\n",regname[rt],regname[rs]);
909 output_w32(0x31a00000|rd_rn_rm(rt,0,rs));
910}
911
e2b5e7aa 912static void emit_cmovs_reg(int rs,int rt)
57871462 913{
914 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
915 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
916}
917
e2b5e7aa 918static void emit_slti32(int rs,int imm,int rt)
57871462 919{
920 if(rs!=rt) emit_zeroreg(rt);
921 emit_cmpimm(rs,imm);
922 if(rs==rt) emit_movimm(0,rt);
923 emit_cmovl_imm(1,rt);
924}
e2b5e7aa 925
926static void emit_sltiu32(int rs,int imm,int rt)
57871462 927{
928 if(rs!=rt) emit_zeroreg(rt);
929 emit_cmpimm(rs,imm);
930 if(rs==rt) emit_movimm(0,rt);
931 emit_cmovb_imm(1,rt);
932}
e2b5e7aa 933
e2b5e7aa 934static void emit_cmp(int rs,int rt)
57871462 935{
936 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
937 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
938}
e2b5e7aa 939
882a08fc 940static void emit_cmpcs(int rs,int rt)
941{
942 assem_debug("cmpcs %s,%s\n",regname[rs],regname[rt]);
943 output_w32(0x21500000|rd_rn_rm(0,rs,rt));
944}
945
e2b5e7aa 946static void emit_set_gz32(int rs, int rt)
57871462 947{
948 //assem_debug("set_gz32\n");
949 emit_cmpimm(rs,1);
950 emit_movimm(1,rt);
951 emit_cmovl_imm(0,rt);
952}
e2b5e7aa 953
954static void emit_set_nz32(int rs, int rt)
57871462 955{
956 //assem_debug("set_nz32\n");
957 if(rs!=rt) emit_movs(rs,rt);
958 else emit_test(rs,rs);
959 emit_cmovne_imm(1,rt);
960}
e2b5e7aa 961
e2b5e7aa 962static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 963{
964 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
965 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
966 emit_cmp(rs1,rs2);
967 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
968 emit_cmovl_imm(1,rt);
969}
e2b5e7aa 970
971static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 972{
973 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
974 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
975 emit_cmp(rs1,rs2);
976 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
977 emit_cmovb_imm(1,rt);
978}
e2b5e7aa 979
2a014d73 980static int can_jump_or_call(const void *a)
981{
982 intptr_t offset = (u_char *)a - out - 8;
983 return (-33554432 <= offset && offset < 33554432);
984}
985
643aeae3 986static void emit_call(const void *a_)
57871462 987{
643aeae3 988 int a = (int)a_;
d1e4ebd9 989 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 990 u_int offset=genjmp(a);
991 output_w32(0xeb000000|offset);
992}
e2b5e7aa 993
b14b6a8f 994static void emit_jmp(const void *a_)
57871462 995{
b14b6a8f 996 int a = (int)a_;
d1e4ebd9 997 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a_));
57871462 998 u_int offset=genjmp(a);
999 output_w32(0xea000000|offset);
1000}
e2b5e7aa 1001
643aeae3 1002static void emit_jne(const void *a_)
57871462 1003{
643aeae3 1004 int a = (int)a_;
57871462 1005 assem_debug("bne %x\n",a);
1006 u_int offset=genjmp(a);
1007 output_w32(0x1a000000|offset);
1008}
e2b5e7aa 1009
7c3a5182 1010static void emit_jeq(const void *a_)
57871462 1011{
7c3a5182 1012 int a = (int)a_;
57871462 1013 assem_debug("beq %x\n",a);
1014 u_int offset=genjmp(a);
1015 output_w32(0x0a000000|offset);
1016}
e2b5e7aa 1017
7c3a5182 1018static void emit_js(const void *a_)
57871462 1019{
7c3a5182 1020 int a = (int)a_;
57871462 1021 assem_debug("bmi %x\n",a);
1022 u_int offset=genjmp(a);
1023 output_w32(0x4a000000|offset);
1024}
e2b5e7aa 1025
7c3a5182 1026static void emit_jns(const void *a_)
57871462 1027{
7c3a5182 1028 int a = (int)a_;
57871462 1029 assem_debug("bpl %x\n",a);
1030 u_int offset=genjmp(a);
1031 output_w32(0x5a000000|offset);
1032}
e2b5e7aa 1033
7c3a5182 1034static void emit_jl(const void *a_)
57871462 1035{
7c3a5182 1036 int a = (int)a_;
57871462 1037 assem_debug("blt %x\n",a);
1038 u_int offset=genjmp(a);
1039 output_w32(0xba000000|offset);
1040}
e2b5e7aa 1041
7c3a5182 1042static void emit_jge(const void *a_)
57871462 1043{
7c3a5182 1044 int a = (int)a_;
57871462 1045 assem_debug("bge %x\n",a);
1046 u_int offset=genjmp(a);
1047 output_w32(0xaa000000|offset);
1048}
e2b5e7aa 1049
a5cd72d0 1050static void emit_jo(const void *a_)
1051{
1052 int a = (int)a_;
1053 assem_debug("bvs %x\n",a);
1054 u_int offset=genjmp(a);
1055 output_w32(0x6a000000|offset);
1056}
1057
7c3a5182 1058static void emit_jno(const void *a_)
57871462 1059{
7c3a5182 1060 int a = (int)a_;
57871462 1061 assem_debug("bvc %x\n",a);
1062 u_int offset=genjmp(a);
1063 output_w32(0x7a000000|offset);
1064}
e2b5e7aa 1065
7c3a5182 1066static void emit_jc(const void *a_)
57871462 1067{
7c3a5182 1068 int a = (int)a_;
57871462 1069 assem_debug("bcs %x\n",a);
1070 u_int offset=genjmp(a);
1071 output_w32(0x2a000000|offset);
1072}
e2b5e7aa 1073
7c3a5182 1074static void emit_jcc(const void *a_)
57871462 1075{
b14b6a8f 1076 int a = (int)a_;
57871462 1077 assem_debug("bcc %x\n",a);
1078 u_int offset=genjmp(a);
1079 output_w32(0x3a000000|offset);
1080}
1081
9b495f6e 1082static void *emit_cbz(int rs, const void *a)
1083{
1084 void *ret;
1085 emit_test(rs, rs);
1086 ret = out;
1087 emit_jeq(a);
1088 return ret;
1089}
1090
3968e69e 1091static unused void emit_callreg(u_int r)
57871462 1092{
c6c3b1b3 1093 assert(r<15);
1094 assem_debug("blx %s\n",regname[r]);
1095 output_w32(0xe12fff30|r);
57871462 1096}
e2b5e7aa 1097
1098static void emit_jmpreg(u_int r)
57871462 1099{
1100 assem_debug("mov pc,%s\n",regname[r]);
1101 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1102}
1103
be516ebe 1104static void emit_ret(void)
1105{
1106 emit_jmpreg(14);
1107}
1108
e2b5e7aa 1109static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1110{
1111 assert(offset>-4096&&offset<4096);
1112 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1113 if(offset>=0) {
1114 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1115 }else{
1116 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1117 }
1118}
e2b5e7aa 1119
1120static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1121{
1122 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1123 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1124}
39b71d9a 1125#define emit_readptr_dualindexedx_ptrlen emit_readword_dualindexedx4
1126
1127static void emit_ldr_dualindexed(int rs1, int rs2, int rt)
1128{
1129 assem_debug("ldr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1130 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2));
1131}
e2b5e7aa 1132
1133static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1134{
1135 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1136 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1137}
e2b5e7aa 1138
37387d8b 1139static void emit_ldrb_dualindexed(int rs1, int rs2, int rt)
1140{
1141 assem_debug("ldrb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1142 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2));
1143}
1144
e2b5e7aa 1145static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1146{
1147 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1148 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1149}
e2b5e7aa 1150
37387d8b 1151static void emit_ldrsb_dualindexed(int rs1, int rs2, int rt)
1152{
1153 assem_debug("ldrsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1154 output_w32(0xe19000d0|rd_rn_rm(rt,rs1,rs2));
1155}
1156
e2b5e7aa 1157static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1158{
1159 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1160 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1161}
e2b5e7aa 1162
37387d8b 1163static void emit_ldrh_dualindexed(int rs1, int rs2, int rt)
1164{
1165 assem_debug("ldrh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1166 output_w32(0xe19000b0|rd_rn_rm(rt,rs1,rs2));
1167}
1168
e2b5e7aa 1169static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1170{
1171 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1172 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1173}
e2b5e7aa 1174
37387d8b 1175static void emit_ldrsh_dualindexed(int rs1, int rs2, int rt)
1176{
1177 assem_debug("ldrsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1178 output_w32(0xe19000f0|rd_rn_rm(rt,rs1,rs2));
1179}
1180
e2b5e7aa 1181static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1182{
1183 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1184 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
37387d8b 1185}
1186
1187static void emit_str_dualindexed(int rs1, int rs2, int rt)
1188{
1189 assem_debug("str %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1190 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2));
1191}
1192
1193static void emit_strb_dualindexed(int rs1, int rs2, int rt)
1194{
1195 assem_debug("strb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1196 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2));
1197}
1198
1199static void emit_strh_dualindexed(int rs1, int rs2, int rt)
1200{
1201 assem_debug("strh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1202 output_w32(0xe18000b0|rd_rn_rm(rt,rs1,rs2));
c6c3b1b3 1203}
e2b5e7aa 1204
e2b5e7aa 1205static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1206{
1207 assert(offset>-256&&offset<256);
1208 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1209 if(offset>=0) {
1210 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1211 }else{
1212 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1213 }
1214}
e2b5e7aa 1215
e2b5e7aa 1216static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1217{
1218 assert(offset>-256&&offset<256);
1219 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1220 if(offset>=0) {
1221 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1222 }else{
1223 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1224 }
1225}
e2b5e7aa 1226
1227static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1228{
1229 assert(offset>-4096&&offset<4096);
1230 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1231 if(offset>=0) {
1232 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1233 }else{
1234 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1235 }
1236}
e2b5e7aa 1237
e2b5e7aa 1238static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1239{
1240 assert(offset>-256&&offset<256);
1241 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1242 if(offset>=0) {
1243 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1244 }else{
1245 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1246 }
1247}
e2b5e7aa 1248
054175e9 1249static void emit_ldrd(int offset, int rs, int rt)
1250{
1251 assert(offset>-256&&offset<256);
1252 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1253 if(offset>=0) {
1254 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1255 }else{
1256 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1257 }
1258}
e2b5e7aa 1259
643aeae3 1260static void emit_readword(void *addr, int rt)
57871462 1261{
643aeae3 1262 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1263 assert(offset<4096);
a5cd72d0 1264 assem_debug("ldr %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset));
57871462 1265 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1266}
39b71d9a 1267#define emit_readptr emit_readword
e2b5e7aa 1268
e2b5e7aa 1269static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1270{
1271 assert(offset>-4096&&offset<4096);
1272 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1273 if(offset>=0) {
1274 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1275 }else{
1276 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1277 }
1278}
e2b5e7aa 1279
e2b5e7aa 1280static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1281{
1282 assert(offset>-256&&offset<256);
1283 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1284 if(offset>=0) {
1285 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1286 }else{
1287 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1288 }
1289}
e2b5e7aa 1290
1291static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1292{
1293 assert(offset>-4096&&offset<4096);
1294 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1295 if(offset>=0) {
1296 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1297 }else{
1298 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1299 }
1300}
e2b5e7aa 1301
e2b5e7aa 1302static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1303{
1304 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1305 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1306}
e2b5e7aa 1307
1308static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1309{
1310 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1311 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1312}
e2b5e7aa 1313
1314static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1315{
1316 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1317 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1318}
e2b5e7aa 1319
643aeae3 1320static void emit_writeword(int rt, void *addr)
57871462 1321{
643aeae3 1322 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1323 assert(offset<4096);
a5cd72d0 1324 assem_debug("str %s,fp+%#x%s\n", regname[rt], offset, fpofs_name(offset));
57871462 1325 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1326}
e2b5e7aa 1327
e2b5e7aa 1328static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1329{
1330 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1331 assert(rs1<16);
1332 assert(rs2<16);
1333 assert(hi<16);
1334 assert(lo<16);
1335 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1336}
e2b5e7aa 1337
1338static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1339{
1340 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1341 assert(rs1<16);
1342 assert(rs2<16);
1343 assert(hi<16);
1344 assert(lo<16);
1345 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1346}
1347
e2b5e7aa 1348static void emit_clz(int rs,int rt)
57871462 1349{
1350 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1351 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1352}
1353
e2b5e7aa 1354static void emit_subcs(int rs1,int rs2,int rt)
57871462 1355{
1356 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1357 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1358}
1359
e2b5e7aa 1360static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1361{
1362 assert(imm>0);
1363 assert(imm<32);
1364 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1365 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1366}
1367
e2b5e7aa 1368static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1369{
1370 assert(imm>0);
1371 assert(imm<32);
1372 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1373 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1374}
1375
e2b5e7aa 1376static void emit_negmi(int rs, int rt)
57871462 1377{
1378 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1379 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1380}
1381
e2b5e7aa 1382static void emit_negsmi(int rs, int rt)
57871462 1383{
1384 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1385 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1386}
1387
e2b5e7aa 1388static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1389{
1390 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1391 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1392}
1393
e2b5e7aa 1394static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1395{
1396 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1397 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1398}
1399
e2b5e7aa 1400static void emit_teq(int rs, int rt)
57871462 1401{
1402 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1403 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1404}
1405
3968e69e 1406static unused void emit_rsbimm(int rs, int imm, int rt)
57871462 1407{
1408 u_int armval;
cfbd3c6e 1409 genimm_checked(imm,&armval);
57871462 1410 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1411 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1412}
1413
57871462 1414// Conditionally select one of two immediates, optimizing for small code size
1415// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1416static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1417{
1418 u_int armval;
1419 if(genimm(imm2-imm1,&armval)) {
1420 emit_movimm(imm1,rt);
1421 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1422 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1423 }else if(genimm(imm1-imm2,&armval)) {
1424 emit_movimm(imm1,rt);
1425 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1426 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1427 }
1428 else {
665f33e1 1429 #ifndef HAVE_ARMV7
57871462 1430 emit_movimm(imm1,rt);
1431 add_literal((int)out,imm2);
1432 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1433 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1434 #else
1435 emit_movw(imm1&0x0000FFFF,rt);
1436 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1437 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1438 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1439 }
1440 emit_movt(imm1&0xFFFF0000,rt);
1441 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1442 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1443 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1444 }
1445 #endif
1446 }
1447}
1448
57871462 1449// special case for checking invalid_code
9b495f6e 1450static void emit_ldrb_indexedsr12_reg(int base, int r, int rt)
57871462 1451{
9b495f6e 1452 assem_debug("ldrb %s,%s,%s lsr #12\n",regname[rt],regname[base],regname[r]);
1453 output_w32(0xe7d00000|rd_rn_rm(rt,base,r)|0x620);
57871462 1454}
1455
e2b5e7aa 1456static void emit_callne(int a)
0bbd1454 1457{
1458 assem_debug("blne %x\n",a);
1459 u_int offset=genjmp(a);
1460 output_w32(0x1b000000|offset);
1461}
1462
57871462 1463// Used to preload hash table entries
e2b5e7aa 1464static unused void emit_prefetchreg(int r)
57871462 1465{
1466 assem_debug("pld %s\n",regname[r]);
1467 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1468}
1469
1470// Special case for mini_ht
e2b5e7aa 1471static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1472{
1473 assert(offset<4096);
1474 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1475 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1476}
1477
e2b5e7aa 1478static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1479{
1480 u_int armval;
cfbd3c6e 1481 genimm_checked(imm,&armval);
b9b61529 1482 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1483 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1484}
1485
e2b5e7aa 1486static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1487{
1488 u_int armval;
1489 genimm_checked(imm,&armval);
1490 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1491 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1492}
1493
e2b5e7aa 1494static void emit_jno_unlikely(int a)
57871462 1495{
1496 //emit_jno(a);
1497 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1498 output_w32(0x72800000|rd_rn_rm(15,15,0));
1499}
1500
054175e9 1501static void save_regs_all(u_int reglist)
57871462 1502{
054175e9 1503 int i;
57871462 1504 if(!reglist) return;
1505 assem_debug("stmia fp,{");
054175e9 1506 for(i=0;i<16;i++)
1507 if(reglist&(1<<i))
1508 assem_debug("r%d,",i);
57871462 1509 assem_debug("}\n");
1510 output_w32(0xe88b0000|reglist);
1511}
e2b5e7aa 1512
054175e9 1513static void restore_regs_all(u_int reglist)
57871462 1514{
054175e9 1515 int i;
57871462 1516 if(!reglist) return;
1517 assem_debug("ldmia fp,{");
054175e9 1518 for(i=0;i<16;i++)
1519 if(reglist&(1<<i))
1520 assem_debug("r%d,",i);
57871462 1521 assem_debug("}\n");
1522 output_w32(0xe89b0000|reglist);
1523}
e2b5e7aa 1524
054175e9 1525// Save registers before function call
1526static void save_regs(u_int reglist)
1527{
4d646738 1528 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1529 save_regs_all(reglist);
1530}
e2b5e7aa 1531
054175e9 1532// Restore registers after function call
1533static void restore_regs(u_int reglist)
1534{
4d646738 1535 reglist&=CALLER_SAVE_REGS;
054175e9 1536 restore_regs_all(reglist);
1537}
57871462 1538
57871462 1539/* Stubs/epilogue */
1540
e2b5e7aa 1541static void literal_pool(int n)
57871462 1542{
1543 if(!literalcount) return;
1544 if(n) {
1545 if((int)out-literals[0][0]<4096-n) return;
1546 }
1547 u_int *ptr;
1548 int i;
1549 for(i=0;i<literalcount;i++)
1550 {
77750690 1551 u_int l_addr=(u_int)out;
1552 int j;
1553 for(j=0;j<i;j++) {
1554 if(literals[j][1]==literals[i][1]) {
1555 //printf("dup %08x\n",literals[i][1]);
1556 l_addr=literals[j][0];
1557 break;
1558 }
1559 }
57871462 1560 ptr=(u_int *)literals[i][0];
77750690 1561 u_int offset=l_addr-(u_int)ptr-8;
57871462 1562 assert(offset<4096);
1563 assert(!(offset&3));
1564 *ptr|=offset;
77750690 1565 if(l_addr==(u_int)out) {
1566 literals[i][0]=l_addr; // remember for dupes
1567 output_w32(literals[i][1]);
1568 }
57871462 1569 }
1570 literalcount=0;
1571}
1572
e2b5e7aa 1573static void literal_pool_jumpover(int n)
57871462 1574{
1575 if(!literalcount) return;
1576 if(n) {
1577 if((int)out-literals[0][0]<4096-n) return;
1578 }
df4dc2b1 1579 void *jaddr = out;
57871462 1580 emit_jmp(0);
1581 literal_pool(0);
df4dc2b1 1582 set_jump_target(jaddr, out);
57871462 1583}
1584
7c3a5182 1585// parsed by get_pointer, find_extjump_insn
104df9d3 1586static void emit_extjump(u_char *addr, u_int target)
57871462 1587{
1588 u_char *ptr=(u_char *)addr;
1589 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1590 (void)ptr;
1591
57871462 1592 emit_loadlp(target,0);
643aeae3 1593 emit_loadlp((u_int)addr,1);
66ea165f 1594 assert(ndrc->translation_cache <= addr &&
1595 addr < ndrc->translation_cache + sizeof(ndrc->translation_cache));
104df9d3 1596 emit_far_jump(dyna_linker);
57871462 1597}
1598
d1e4ebd9 1599static void check_extjump2(void *src)
1600{
1601 u_int *ptr = src;
1602 assert((ptr[1] & 0x0fff0000) == 0x059f0000); // ldr rx, [pc, #ofs]
1603 (void)ptr;
1604}
1605
13e35c04 1606// put rt_val into rt, potentially making use of rs with value rs_val
1607static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1608{
8575a877 1609 u_int armval;
1610 int diff;
1611 if(genimm(rt_val,&armval)) {
1612 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1613 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1614 return;
1615 }
1616 if(genimm(~rt_val,&armval)) {
1617 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1618 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1619 return;
1620 }
1621 diff=rt_val-rs_val;
1622 if(genimm(diff,&armval)) {
1623 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1624 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1625 return;
1626 }else if(genimm(-diff,&armval)) {
1627 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1628 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1629 return;
1630 }
1631 emit_movimm(rt_val,rt);
1632}
1633
1634// return 1 if above function can do it's job cheaply
1635static int is_similar_value(u_int v1,u_int v2)
1636{
13e35c04 1637 u_int xs;
8575a877 1638 int diff;
1639 if(v1==v2) return 1;
1640 diff=v2-v1;
1641 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1642 ;
8575a877 1643 if(xs<0x100) return 1;
1644 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1645 ;
1646 if(xs<0x100) return 1;
1647 return 0;
13e35c04 1648}
cbbab9cd 1649
b14b6a8f 1650static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 1651{
1652 switch(type) {
1653 case LOADB_STUB: emit_signextend8(rs,rt); break;
1654 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
1655 case LOADH_STUB: emit_signextend16(rs,rt); break;
1656 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
1657 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
1658 default: assert(0);
1659 }
1660}
1661
b1be1eee 1662#include "pcsxmem.h"
1663#include "pcsxmem_inline.c"
b1be1eee 1664
e2b5e7aa 1665static void do_readstub(int n)
57871462 1666{
b14b6a8f 1667 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 1668 literal_pool(256);
b14b6a8f 1669 set_jump_target(stubs[n].addr, out);
1670 enum stub_type type=stubs[n].type;
1671 int i=stubs[n].a;
1672 int rs=stubs[n].b;
81dbbf4c 1673 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1674 u_int reglist=stubs[n].e;
81dbbf4c 1675 const signed char *i_regmap=i_regs->regmap;
581335b0 1676 int rt;
a5cd72d0 1677 if(dops[i].itype==C2LS||dops[i].itype==LOADLR) {
57871462 1678 rt=get_reg(i_regmap,FTEMP);
1679 }else{
cf95b4f0 1680 rt=get_reg(i_regmap,dops[i].rt1);
57871462 1681 }
1682 assert(rs>=0);
df4dc2b1 1683 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
1684 void *restore_jump = NULL;
c6c3b1b3 1685 reglist|=(1<<rs);
1686 for(r=0;r<=12;r++) {
1687 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
1688 temp=r; break;
1689 }
1690 }
cf95b4f0 1691 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1692 reglist&=~(1<<rt);
1693 if(temp==-1) {
1694 save_regs(reglist);
1695 regs_saved=1;
1696 temp=(rs==0)?2:0;
1697 }
1698 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
1699 temp2=1;
643aeae3 1700 emit_readword(&mem_rtab,temp);
c6c3b1b3 1701 emit_shrimm(rs,12,temp2);
1702 emit_readword_dualindexedx4(temp,temp2,temp2);
1703 emit_lsls_imm(temp2,1,temp2);
a5cd72d0 1704 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
c6c3b1b3 1705 switch(type) {
1706 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
1707 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
1708 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
1709 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
1710 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 1711 default: assert(0);
c6c3b1b3 1712 }
1713 }
1714 if(regs_saved) {
df4dc2b1 1715 restore_jump=out;
c6c3b1b3 1716 emit_jcc(0); // jump to reg restore
1717 }
1718 else
b14b6a8f 1719 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 1720
1721 if(!regs_saved)
1722 save_regs(reglist);
643aeae3 1723 void *handler=NULL;
c6c3b1b3 1724 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1725 handler=jump_handler_read8;
c6c3b1b3 1726 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1727 handler=jump_handler_read16;
c6c3b1b3 1728 if(type==LOADW_STUB)
643aeae3 1729 handler=jump_handler_read32;
1730 assert(handler);
b96d3df7 1731 pass_args(rs,temp2);
c6c3b1b3 1732 int cc=get_reg(i_regmap,CCREG);
1733 if(cc<0)
1734 emit_loadreg(CCREG,2);
2330734f 1735 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1736 emit_far_call(handler);
7da5c7ad 1737#if 0
1738 if (type == LOADW_STUB) {
1739 // new cycle_count returned in r2
1740 emit_addimm(2, -(int)stubs[n].d, cc<0?2:cc);
1741 if (cc < 0)
1742 emit_storereg(CCREG, 2);
1743 }
1744#endif
a5cd72d0 1745 if(dops[i].itype==C2LS||(rt>=0&&dops[i].rt1!=0)) {
b1be1eee 1746 mov_loadtype_adj(type,0,rt);
c6c3b1b3 1747 }
1748 if(restore_jump)
df4dc2b1 1749 set_jump_target(restore_jump, out);
c6c3b1b3 1750 restore_regs(reglist);
b14b6a8f 1751 emit_jmp(stubs[n].retaddr); // return address
57871462 1752}
1753
81dbbf4c 1754static void inline_readstub(enum stub_type type, int i, u_int addr,
1755 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1756{
277718fa 1757 int ra = cinfo[i].addr;
1758 int rt = get_reg(regmap,target);
1759 assert(ra >= 0);
2a014d73 1760 u_int is_dynamic;
687b4580 1761 uintptr_t host_addr = 0;
643aeae3 1762 void *handler;
b1be1eee 1763 int cc=get_reg(regmap,CCREG);
277718fa 1764 if(pcsx_direct_read(type,addr,adj,cc,target?ra:-1,rt))
b1be1eee 1765 return;
643aeae3 1766 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
1767 if (handler == NULL) {
cf95b4f0 1768 if(rt<0||dops[i].rt1==0)
c6c3b1b3 1769 return;
13e35c04 1770 if(addr!=host_addr)
277718fa 1771 emit_movimm_from(addr,ra,host_addr,ra);
c6c3b1b3 1772 switch(type) {
277718fa 1773 case LOADB_STUB: emit_movsbl_indexed(0,ra,rt); break;
1774 case LOADBU_STUB: emit_movzbl_indexed(0,ra,rt); break;
1775 case LOADH_STUB: emit_movswl_indexed(0,ra,rt); break;
1776 case LOADHU_STUB: emit_movzwl_indexed(0,ra,rt); break;
1777 case LOADW_STUB: emit_readword_indexed(0,ra,rt); break;
c6c3b1b3 1778 default: assert(0);
1779 }
1780 return;
1781 }
b1be1eee 1782 is_dynamic=pcsxmem_is_handler_dynamic(addr);
1783 if(is_dynamic) {
1784 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 1785 handler=jump_handler_read8;
b1be1eee 1786 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 1787 handler=jump_handler_read16;
b1be1eee 1788 if(type==LOADW_STUB)
643aeae3 1789 handler=jump_handler_read32;
b1be1eee 1790 }
c6c3b1b3 1791
1792 // call a memhandler
cf95b4f0 1793 if(rt>=0&&dops[i].rt1!=0)
c6c3b1b3 1794 reglist&=~(1<<rt);
1795 save_regs(reglist);
1796 if(target==0)
1797 emit_movimm(addr,0);
277718fa 1798 else if(ra!=0)
1799 emit_mov(ra,0);
b1be1eee 1800 if(cc<0)
1801 emit_loadreg(CCREG,2);
1802 if(is_dynamic) {
1803 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2330734f 1804 emit_addimm(cc<0?2:cc,adj,2);
c6c3b1b3 1805 }
b1be1eee 1806 else {
643aeae3 1807 emit_readword(&last_count,3);
2330734f 1808 emit_addimm(cc<0?2:cc,adj,2);
b1be1eee 1809 emit_add(2,3,2);
d7546062 1810 emit_writeword(2,&psxRegs.cycle);
b1be1eee 1811 }
1812
2a014d73 1813 emit_far_call(handler);
b1be1eee 1814
7da5c7ad 1815#if 0
1816 if (type == LOADW_STUB) {
1817 // new cycle_count returned in r2
1818 emit_addimm(2, -adj, cc<0?2:cc);
1819 if (cc < 0)
1820 emit_storereg(CCREG, 2);
1821 }
1822#endif
cf95b4f0 1823 if(rt>=0&&dops[i].rt1!=0) {
c6c3b1b3 1824 switch(type) {
1825 case LOADB_STUB: emit_signextend8(0,rt); break;
1826 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
1827 case LOADH_STUB: emit_signextend16(0,rt); break;
1828 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
1829 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
1830 default: assert(0);
1831 }
1832 }
1833 restore_regs(reglist);
57871462 1834}
1835
e2b5e7aa 1836static void do_writestub(int n)
57871462 1837{
b14b6a8f 1838 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 1839 literal_pool(256);
b14b6a8f 1840 set_jump_target(stubs[n].addr, out);
1841 enum stub_type type=stubs[n].type;
1842 int i=stubs[n].a;
1843 int rs=stubs[n].b;
81dbbf4c 1844 const struct regstat *i_regs=(struct regstat *)stubs[n].c;
b14b6a8f 1845 u_int reglist=stubs[n].e;
81dbbf4c 1846 const signed char *i_regmap=i_regs->regmap;
581335b0 1847 int rt,r;
a5cd72d0 1848 if(dops[i].itype==C2LS) {
57871462 1849 rt=get_reg(i_regmap,r=FTEMP);
1850 }else{
cf95b4f0 1851 rt=get_reg(i_regmap,r=dops[i].rs2);
57871462 1852 }
1853 assert(rs>=0);
1854 assert(rt>=0);
b14b6a8f 1855 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 1856 void *restore_jump = NULL;
b96d3df7 1857 int reglist2=reglist|(1<<rs)|(1<<rt);
1858 for(rtmp=0;rtmp<=12;rtmp++) {
1859 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
1860 temp=rtmp; break;
1861 }
1862 }
1863 if(temp==-1) {
1864 save_regs(reglist);
1865 regs_saved=1;
1866 for(rtmp=0;rtmp<=3;rtmp++)
1867 if(rtmp!=rs&&rtmp!=rt)
1868 {temp=rtmp;break;}
1869 }
1870 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
1871 temp2=3;
643aeae3 1872 emit_readword(&mem_wtab,temp);
b96d3df7 1873 emit_shrimm(rs,12,temp2);
1874 emit_readword_dualindexedx4(temp,temp2,temp2);
1875 emit_lsls_imm(temp2,1,temp2);
1876 switch(type) {
1877 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
1878 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
1879 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
1880 default: assert(0);
1881 }
1882 if(regs_saved) {
df4dc2b1 1883 restore_jump=out;
b96d3df7 1884 emit_jcc(0); // jump to reg restore
1885 }
1886 else
b14b6a8f 1887 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 1888
1889 if(!regs_saved)
1890 save_regs(reglist);
643aeae3 1891 void *handler=NULL;
b96d3df7 1892 switch(type) {
643aeae3 1893 case STOREB_STUB: handler=jump_handler_write8; break;
1894 case STOREH_STUB: handler=jump_handler_write16; break;
1895 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 1896 default: assert(0);
b96d3df7 1897 }
643aeae3 1898 assert(handler);
b96d3df7 1899 pass_args(rs,rt);
1900 if(temp2!=3)
1901 emit_mov(temp2,3);
1902 int cc=get_reg(i_regmap,CCREG);
1903 if(cc<0)
1904 emit_loadreg(CCREG,2);
2330734f 1905 emit_addimm(cc<0?2:cc,(int)stubs[n].d,2);
2a014d73 1906 emit_far_call(handler);
7da5c7ad 1907 // new cycle_count returned in r2
1908 emit_addimm(2,-(int)stubs[n].d,cc<0?2:cc);
b96d3df7 1909 if(cc<0)
1910 emit_storereg(CCREG,2);
1911 if(restore_jump)
df4dc2b1 1912 set_jump_target(restore_jump, out);
b96d3df7 1913 restore_regs(reglist);
b14b6a8f 1914 emit_jmp(stubs[n].retaddr);
57871462 1915}
1916
81dbbf4c 1917static void inline_writestub(enum stub_type type, int i, u_int addr,
1918 const signed char regmap[], int target, int adj, u_int reglist)
57871462 1919{
277718fa 1920 int ra = cinfo[i].addr;
1921 int rt = get_reg(regmap, target);
1922 assert(ra>=0);
57871462 1923 assert(rt>=0);
687b4580 1924 uintptr_t host_addr = 0;
643aeae3 1925 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
1926 if (handler == NULL) {
13e35c04 1927 if(addr!=host_addr)
277718fa 1928 emit_movimm_from(addr,ra,host_addr,ra);
b96d3df7 1929 switch(type) {
277718fa 1930 case STOREB_STUB: emit_writebyte_indexed(rt,0,ra); break;
1931 case STOREH_STUB: emit_writehword_indexed(rt,0,ra); break;
1932 case STOREW_STUB: emit_writeword_indexed(rt,0,ra); break;
b96d3df7 1933 default: assert(0);
1934 }
1935 return;
1936 }
1937
1938 // call a memhandler
1939 save_regs(reglist);
277718fa 1940 pass_args(ra,rt);
b96d3df7 1941 int cc=get_reg(regmap,CCREG);
1942 if(cc<0)
1943 emit_loadreg(CCREG,2);
2330734f 1944 emit_addimm(cc<0?2:cc,adj,2);
643aeae3 1945 emit_movimm((u_int)handler,3);
2a014d73 1946 emit_far_call(jump_handler_write_h);
7da5c7ad 1947 // new cycle_count returned in r2
1948 emit_addimm(2,-adj,cc<0?2:cc);
b96d3df7 1949 if(cc<0)
1950 emit_storereg(CCREG,2);
1951 restore_regs(reglist);
57871462 1952}
1953
57871462 1954/* Special assem */
1955
81dbbf4c 1956static void c2op_prologue(u_int op, int i, const struct regstat *i_regs, u_int reglist)
054175e9 1957{
1958 save_regs_all(reglist);
32631e6a 1959 cop2_do_stall_check(op, i, i_regs, 0);
82ed88eb 1960#ifdef PCNT
81dbbf4c 1961 emit_movimm(op, 0);
2a014d73 1962 emit_far_call(pcnt_gte_start);
82ed88eb 1963#endif
81dbbf4c 1964 emit_addimm(FP, (u_char *)&psxRegs.CP2D.r[0] - (u_char *)&dynarec_local, 0); // cop2 regs
054175e9 1965}
1966
1967static void c2op_epilogue(u_int op,u_int reglist)
1968{
82ed88eb 1969#ifdef PCNT
1970 emit_movimm(op,0);
2a014d73 1971 emit_far_call(pcnt_gte_end);
82ed88eb 1972#endif
054175e9 1973 restore_regs_all(reglist);
1974}
1975
6c0eefaf 1976static void c2op_call_MACtoIR(int lm,int need_flags)
1977{
1978 if(need_flags)
2a014d73 1979 emit_far_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 1980 else
2a014d73 1981 emit_far_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 1982}
1983
1984static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
1985{
2a014d73 1986 emit_far_call(func);
6c0eefaf 1987 // func is C code and trashes r0
1988 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
1989 if(need_flags||need_ir)
1990 c2op_call_MACtoIR(lm,need_flags);
2a014d73 1991 emit_far_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 1992}
1993
81dbbf4c 1994static void c2op_assemble(int i, const struct regstat *i_regs)
b9b61529 1995{
81dbbf4c 1996 u_int c2op = source[i] & 0x3f;
1997 u_int reglist_full = get_host_reglist(i_regs->regmap);
1998 u_int reglist = reglist_full & CALLER_SAVE_REGS;
1999 int need_flags, need_ir;
b9b61529 2000
2001 if (gte_handlers[c2op]!=NULL) {
bedfea38 2002 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 2003 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 2004 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
2005 source[i],gte_unneeded[i+1],need_flags,need_ir);
81dbbf4c 2006 if(HACK_ENABLED(NDHACK_GTE_NO_FLAGS))
0ff8c62c 2007 need_flags=0;
6c0eefaf 2008 int shift = (source[i] >> 19) & 1;
2009 int lm = (source[i] >> 10) & 1;
054175e9 2010 switch(c2op) {
19776aef 2011#ifndef DRC_DBG
054175e9 2012 case GTE_MVMVA: {
82336ba3 2013#ifdef HAVE_ARMV5
054175e9 2014 int v = (source[i] >> 15) & 3;
2015 int cv = (source[i] >> 13) & 3;
2016 int mx = (source[i] >> 17) & 3;
4d646738 2017 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
81dbbf4c 2018 c2op_prologue(c2op,i,i_regs,reglist);
054175e9 2019 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
2020 if(v<3)
2021 emit_ldrd(v*8,0,4);
2022 else {
2023 emit_movzwl_indexed(9*4,0,4); // gteIR
2024 emit_movzwl_indexed(10*4,0,6);
2025 emit_movzwl_indexed(11*4,0,5);
2026 emit_orrshl_imm(6,16,4);
2027 }
2028 if(mx<3)
2029 emit_addimm(0,32*4+mx*8*4,6);
2030 else
643aeae3 2031 emit_readword(&zeromem_ptr,6);
054175e9 2032 if(cv<3)
2033 emit_addimm(0,32*4+(cv*8+5)*4,7);
2034 else
643aeae3 2035 emit_readword(&zeromem_ptr,7);
054175e9 2036#ifdef __ARM_NEON__
2037 emit_movimm(source[i],1); // opcode
2a014d73 2038 emit_far_call(gteMVMVA_part_neon);
054175e9 2039 if(need_flags) {
2040 emit_movimm(lm,1);
2a014d73 2041 emit_far_call(gteMACtoIR_flags_neon);
054175e9 2042 }
2043#else
2044 if(cv==3&&shift)
33788798 2045 emit_far_call(gteMVMVA_part_cv3sh12_arm);
054175e9 2046 else {
2047 emit_movimm(shift,1);
33788798 2048 emit_far_call(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm);
054175e9 2049 }
6c0eefaf 2050 if(need_flags||need_ir)
2051 c2op_call_MACtoIR(lm,need_flags);
82336ba3 2052#endif
2053#else /* if not HAVE_ARMV5 */
81dbbf4c 2054 c2op_prologue(c2op,i,i_regs,reglist);
82336ba3 2055 emit_movimm(source[i],1); // opcode
643aeae3 2056 emit_writeword(1,&psxRegs.code);
2a014d73 2057 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2058#endif
2059 break;
2060 }
6c0eefaf 2061 case GTE_OP:
81dbbf4c 2062 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2063 emit_far_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 2064 if(need_flags||need_ir) {
2065 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2066 c2op_call_MACtoIR(lm,need_flags);
2067 }
2068 break;
2069 case GTE_DPCS:
81dbbf4c 2070 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2071 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
2072 break;
2073 case GTE_INTPL:
81dbbf4c 2074 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2075 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
2076 break;
2077 case GTE_SQR:
81dbbf4c 2078 c2op_prologue(c2op,i,i_regs,reglist);
2a014d73 2079 emit_far_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 2080 if(need_flags||need_ir) {
2081 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
2082 c2op_call_MACtoIR(lm,need_flags);
2083 }
2084 break;
2085 case GTE_DCPL:
81dbbf4c 2086 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2087 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
2088 break;
2089 case GTE_GPF:
81dbbf4c 2090 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2091 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
2092 break;
2093 case GTE_GPL:
81dbbf4c 2094 c2op_prologue(c2op,i,i_regs,reglist);
6c0eefaf 2095 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
2096 break;
19776aef 2097#endif
054175e9 2098 default:
81dbbf4c 2099 c2op_prologue(c2op,i,i_regs,reglist);
19776aef 2100#ifdef DRC_DBG
2101 emit_movimm(source[i],1); // opcode
643aeae3 2102 emit_writeword(1,&psxRegs.code);
19776aef 2103#endif
2a014d73 2104 emit_far_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 2105 break;
2106 }
2107 c2op_epilogue(c2op,reglist);
2108 }
b9b61529 2109}
2110
3968e69e 2111static void c2op_ctc2_31_assemble(signed char sl, signed char temp)
2112{
2113 //value = value & 0x7ffff000;
2114 //if (value & 0x7f87e000) value |= 0x80000000;
2115 emit_shrimm(sl,12,temp);
2116 emit_shlimm(temp,12,temp);
2117 emit_testimm(temp,0x7f000000);
2118 emit_testeqimm(temp,0x00870000);
2119 emit_testeqimm(temp,0x0000e000);
2120 emit_orrne_imm(temp,0x80000000,temp);
2121}
2122
2123static void do_mfc2_31_one(u_int copr,signed char temp)
2124{
2125 emit_readword(&reg_cop2d[copr],temp);
9c997d19 2126 emit_lsls_imm(temp,16,temp);
2127 emit_cmovs_imm(0,temp);
2128 emit_cmpimm(temp,0xf80<<16);
2129 emit_andimm(temp,0xf80<<16,temp);
2130 emit_cmovae_imm(0xf80<<16,temp);
3968e69e 2131}
2132
2133static void c2op_mfc2_29_assemble(signed char tl, signed char temp)
2134{
2135 if (temp < 0) {
2136 host_tempreg_acquire();
2137 temp = HOST_TEMPREG;
2138 }
2139 do_mfc2_31_one(9,temp);
9c997d19 2140 emit_shrimm(temp,7+16,tl);
3968e69e 2141 do_mfc2_31_one(10,temp);
9c997d19 2142 emit_orrshr_imm(temp,2+16,tl);
3968e69e 2143 do_mfc2_31_one(11,temp);
9c997d19 2144 emit_orrshr_imm(temp,-3+16,tl);
3968e69e 2145 emit_writeword(tl,&reg_cop2d[29]);
2146 if (temp == HOST_TEMPREG)
2147 host_tempreg_release();
2148}
2149
2330734f 2150static void multdiv_assemble_arm(int i, const struct regstat *i_regs)
57871462 2151{
2152 // case 0x18: MULT
2153 // case 0x19: MULTU
2154 // case 0x1A: DIV
2155 // case 0x1B: DIVU
cf95b4f0 2156 if(dops[i].rs1&&dops[i].rs2)
57871462 2157 {
a5cd72d0 2158 switch (dops[i].opcode2)
57871462 2159 {
a5cd72d0 2160 case 0x18: // MULT
57871462 2161 {
cf95b4f0 2162 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2163 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2164 signed char hi=get_reg(i_regs->regmap,HIREG);
2165 signed char lo=get_reg(i_regs->regmap,LOREG);
2166 assert(m1>=0);
2167 assert(m2>=0);
2168 assert(hi>=0);
2169 assert(lo>=0);
2170 emit_smull(m1,m2,hi,lo);
2171 }
a5cd72d0 2172 break;
2173 case 0x19: // MULTU
57871462 2174 {
cf95b4f0 2175 signed char m1=get_reg(i_regs->regmap,dops[i].rs1);
2176 signed char m2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2177 signed char hi=get_reg(i_regs->regmap,HIREG);
2178 signed char lo=get_reg(i_regs->regmap,LOREG);
2179 assert(m1>=0);
2180 assert(m2>=0);
2181 assert(hi>=0);
2182 assert(lo>=0);
2183 emit_umull(m1,m2,hi,lo);
2184 }
a5cd72d0 2185 break;
2186 case 0x1A: // DIV
57871462 2187 {
cf95b4f0 2188 signed char d1=get_reg(i_regs->regmap,dops[i].rs1);
2189 signed char d2=get_reg(i_regs->regmap,dops[i].rs2);
57871462 2190 signed char quotient=get_reg(i_regs->regmap,LOREG);
2191 signed char remainder=get_reg(i_regs->regmap,HIREG);
a5cd72d0 2192 void *jaddr_div0;
2193 assert(d1>=0);
2194 assert(d2>=0);
57871462 2195 assert(quotient>=0);
2196 assert(remainder>=0);
2197 emit_movs(d1,remainder);
44a80f6a 2198 emit_movimm(0xffffffff,quotient);
2199 emit_negmi(quotient,quotient); // .. quotient and ..
2200 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 2201 emit_movs(d2,HOST_TEMPREG);
a5cd72d0 2202 jaddr_div0 = out;
2203 emit_jeq(0); // Division by zero
82336ba3 2204 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 2205#ifdef HAVE_ARMV5
57871462 2206 emit_clz(HOST_TEMPREG,quotient);
a5cd72d0 2207 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG); // shifted divisor
665f33e1 2208#else
2209 emit_movimm(0,quotient);
2210 emit_addpl_imm(quotient,1,quotient);
2211 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
7c3a5182 2212 emit_jns(out-2*4);
665f33e1 2213#endif
57871462 2214 emit_orimm(quotient,1<<31,quotient);
2215 emit_shr(quotient,quotient,quotient);
2216 emit_cmp(remainder,HOST_TEMPREG);
2217 emit_subcs(remainder,HOST_TEMPREG,remainder);
2218 emit_adcs(quotient,quotient,quotient);
2219 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 2220 emit_jcc(out-16); // -4
57871462 2221 emit_teq(d1,d2);
2222 emit_negmi(quotient,quotient);
a5cd72d0 2223 set_jump_target(jaddr_div0, out);
57871462 2224 emit_test(d1,d1);
2225 emit_negmi(remainder,remainder);
2226 }
a5cd72d0 2227 break;
2228 case 0x1B: // DIVU
57871462 2229 {
cf95b4f0 2230 signed char d1=get_reg(i_regs->regmap,dops[i].rs1); // dividend
2231 signed char d2=get_reg(i_regs->regmap,dops[i].rs2); // divisor
57871462 2232 signed char quotient=get_reg(i_regs->regmap,LOREG);
2233 signed char remainder=get_reg(i_regs->regmap,HIREG);
a5cd72d0 2234 void *jaddr_div0;
2235 assert(d1>=0);
2236 assert(d2>=0);
57871462 2237 assert(quotient>=0);
2238 assert(remainder>=0);
44a80f6a 2239 emit_mov(d1,remainder);
2240 emit_movimm(0xffffffff,quotient); // div0 case
57871462 2241 emit_test(d2,d2);
a5cd72d0 2242 jaddr_div0 = out;
2243 emit_jeq(0); // Division by zero
665f33e1 2244#ifdef HAVE_ARMV5
57871462 2245 emit_clz(d2,HOST_TEMPREG);
2246 emit_movimm(1<<31,quotient);
2247 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 2248#else
2249 emit_movimm(0,HOST_TEMPREG);
82336ba3 2250 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
2251 emit_lslpls_imm(d2,1,d2);
7c3a5182 2252 emit_jns(out-2*4);
665f33e1 2253 emit_movimm(1<<31,quotient);
2254#endif
57871462 2255 emit_shr(quotient,HOST_TEMPREG,quotient);
2256 emit_cmp(remainder,d2);
2257 emit_subcs(remainder,d2,remainder);
2258 emit_adcs(quotient,quotient,quotient);
2259 emit_shrcc_imm(d2,1,d2);
b14b6a8f 2260 emit_jcc(out-16); // -4
a5cd72d0 2261 set_jump_target(jaddr_div0, out);
57871462 2262 }
a5cd72d0 2263 break;
57871462 2264 }
57871462 2265 }
2266 else
2267 {
57871462 2268 signed char hr=get_reg(i_regs->regmap,HIREG);
2269 signed char lr=get_reg(i_regs->regmap,LOREG);
a5cd72d0 2270 if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs2==0) // div 0
2271 {
2272 if (dops[i].rs1) {
2273 signed char numerator = get_reg(i_regs->regmap, dops[i].rs1);
2274 assert(numerator >= 0);
2275 if (hr < 0)
2276 hr = HOST_TEMPREG;
2277 emit_movs(numerator, hr);
2278 if (lr >= 0) {
2279 if (dops[i].opcode2 == 0x1A) { // DIV
2280 emit_movimm(0xffffffff, lr);
2281 emit_negmi(lr, lr);
2282 }
2283 else
2284 emit_movimm(~0, lr);
2285 }
2286 }
2287 else {
2288 if (hr >= 0) emit_zeroreg(hr);
2289 if (lr >= 0) emit_movimm(~0,lr);
2290 }
2291 }
2292 else if ((dops[i].opcode2==0x1A || dops[i].opcode2==0x1B) && dops[i].rs1==0)
2293 {
2294 signed char denominator = get_reg(i_regs->regmap, dops[i].rs2);
2295 assert(denominator >= 0);
2296 if (hr >= 0) emit_zeroreg(hr);
2297 if (lr >= 0) {
2298 emit_zeroreg(lr);
2299 emit_test(denominator, denominator);
2300 emit_mvneq(lr, lr);
2301 }
2302 }
2303 else
2304 {
2305 // Multiply by zero is zero.
2306 if (hr >= 0) emit_zeroreg(hr);
2307 if (lr >= 0) emit_zeroreg(lr);
2308 }
57871462 2309 }
2310}
2311#define multdiv_assemble multdiv_assemble_arm
2312
d1e4ebd9 2313static void do_jump_vaddr(int rs)
2314{
2a014d73 2315 emit_far_jump(jump_vaddr_reg[rs]);
d1e4ebd9 2316}
2317
e2b5e7aa 2318static void do_preload_rhash(int r) {
57871462 2319 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
2320 // register. On ARM the hash can be done with a single instruction (below)
2321}
2322
e2b5e7aa 2323static void do_preload_rhtbl(int ht) {
57871462 2324 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
2325}
2326
e2b5e7aa 2327static void do_rhash(int rs,int rh) {
57871462 2328 emit_andimm(rs,0xf8,rh);
2329}
2330
e2b5e7aa 2331static void do_miniht_load(int ht,int rh) {
57871462 2332 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
2333 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
2334}
2335
e2b5e7aa 2336static void do_miniht_jump(int rs,int rh,int ht) {
57871462 2337 emit_cmp(rh,rs);
2338 emit_ldreq_indexed(ht,4,15);
2339 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
d1e4ebd9 2340 if(rs!=7)
2341 emit_mov(rs,7);
2342 rs=7;
57871462 2343 #endif
d1e4ebd9 2344 do_jump_vaddr(rs);
57871462 2345}
2346
e2b5e7aa 2347static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 2348 #ifndef HAVE_ARMV7
57871462 2349 emit_movimm(return_address,rt); // PC into link register
643aeae3 2350 add_to_linker(out,return_address,1);
57871462 2351 emit_pcreladdr(temp);
643aeae3 2352 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
2353 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2354 #else
2355 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 2356 add_to_linker(out,return_address,1);
57871462 2357 emit_pcreladdr(temp);
643aeae3 2358 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 2359 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 2360 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 2361 #endif
2362}
2363
57871462 2364// CPU-architecture-specific initialization
2a014d73 2365static void arch_init(void)
2366{
2367 uintptr_t diff = (u_char *)&ndrc->tramp.f - (u_char *)&ndrc->tramp.ops - 8;
2368 struct tramp_insns *ops = ndrc->tramp.ops;
2369 size_t i;
2370 assert(!(diff & 3));
2371 assert(diff < 0x1000);
2372 start_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
2373 for (i = 0; i < ARRAY_SIZE(ndrc->tramp.ops); i++)
2374 ops[i].ldrpc = 0xe5900000 | rd_rn_rm(15,15,0) | diff; // ldr pc, [=val]
2375 end_tcache_write(ops, (u_char *)ops + sizeof(ndrc->tramp.ops));
57871462 2376}
b9b61529 2377
2378// vim:shiftwidth=2:expandtab