drc: remove yet yet more n64 stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
643aeae3 33u_char *translation_cache;
1e212a25 34#else
643aeae3 35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
bdeade46 36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
57871462 58extern void *dynarec_local;
57871462 59extern u_int mini_ht[32][2];
57871462 60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
b14b6a8f 77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
57871462 89 0,
b14b6a8f 90 jump_vaddr_r12,
57871462 91 0,
92 0,
b14b6a8f 93 0
94};
57871462 95
0bbd1454 96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
d148d265 127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 128
57871462 129/* Linker */
130
df4dc2b1 131static void set_jump_target(void *addr, void *target_)
57871462 132{
df4dc2b1 133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
57871462 135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 138 assert(((uintptr_t)addr&3)==0);
57871462 139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 146 assert(((uintptr_t)addr&3)==0);
57871462 147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 151 assert(((uintptr_t)addr&3)==0);
57871462 152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
e2b5e7aa 166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
e2b5e7aa 196#endif
57871462 197
198/* Literal pool */
e2b5e7aa 199static void add_literal(int addr,int val)
57871462 200{
15776b68 201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
9f51b4b9 204 literalcount++;
205}
57871462 206
d148d265 207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
57871462 210{
211 int *ptr=(int *)(stub+4);
d148d265 212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 213 u_int offset=*ptr&0xfff;
d148d265 214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
57871462 216}
217
f968d35d 218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
643aeae3 222static void *get_pointer(void *stub)
57871462 223{
224 //printf("get_pointer(%x)\n",(int)stub);
d148d265 225 int *i_ptr=find_extjump_insn(stub);
57871462 226 assert((*i_ptr&0x0f000000)==0x0a000000);
643aeae3 227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
df4dc2b1 232static void *get_clean_addr(void *addr)
57871462 233{
df4dc2b1 234 signed int *ptr = addr;
665f33e1 235 #ifndef HAVE_ARMV7
57871462 236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 245 }
df4dc2b1 246 return ptr;
57871462 247}
248
e2b5e7aa 249static int verify_dirty(u_int *ptr)
57871462 250{
665f33e1 251 #ifndef HAVE_ARMV7
16c8be17 252 u_int offset;
57871462 253 // get from literal pool
15776b68 254 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
57871462 267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
df4dc2b1 283static int isclean(void *addr)
57871462 284{
665f33e1 285 #ifndef HAVE_ARMV7
581335b0 286 u_int *ptr=((u_int *)addr)+4;
57871462 287 #else
581335b0 288 u_int *ptr=((u_int *)addr)+6;
57871462 289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
4a35de07 298// get source that block at addr was compiled from (host pointers)
01d26796 299static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 300{
643aeae3 301 u_int *ptr = addr;
665f33e1 302 #ifndef HAVE_ARMV7
16c8be17 303 u_int offset;
57871462 304 // get from literal pool
15776b68 305 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
57871462 318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 328 *start=(u_char *)source;
329 *end=(u_char *)source+len;
57871462 330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 336static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 342
57871462 343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
9f51b4b9 345
57871462 346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
9f51b4b9 351
57871462 352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 354
57871462 355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
00fa9369 363 assert(r < 64);
364 if((cur->u>>r)&1) {
57871462 365 cur->regmap[preferred_reg]=reg;
366 cur->dirty&=~(1<<preferred_reg);
367 cur->isconst&=~(1<<preferred_reg);
368 return;
369 }
9f51b4b9 370
57871462 371 // Clear any unneeded registers
372 // We try to keep the mapping consistent, if possible, because it
373 // makes branches easier (especially loops). So we try to allocate
374 // first (see above) before removing old mappings. If this is not
375 // possible then go ahead and clear out the registers that are no
376 // longer needed.
377 for(hr=0;hr<HOST_REGS;hr++)
378 {
379 r=cur->regmap[hr];
380 if(r>=0) {
00fa9369 381 assert(r < 64);
382 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
57871462 383 }
384 }
385 // Try to allocate any available register, but prefer
386 // registers that have not been used recently.
387 if(i>0) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 }
399 // Try to allocate any available register
400 for(hr=0;hr<HOST_REGS;hr++) {
401 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
9f51b4b9 408
57871462 409 // Ok, now we have to evict someone
410 // Pick a register we hopefully won't need soon
411 u_char hsn[MAXREG+1];
412 memset(hsn,10,sizeof(hsn));
413 int j;
414 lsn(hsn,i,&preferred_reg);
415 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
416 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
417 if(i>0) {
418 // Don't evict the cycle count at entry points, otherwise the entry
419 // stub will have to write it.
420 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
ad49de89 421 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
57871462 422 for(j=10;j>=3;j--)
423 {
424 // Alloc preferred register if available
425 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
426 for(hr=0;hr<HOST_REGS;hr++) {
427 // Evict both parts of a 64-bit register
428 if((cur->regmap[hr]&63)==r) {
429 cur->regmap[hr]=-1;
430 cur->dirty&=~(1<<hr);
431 cur->isconst&=~(1<<hr);
432 }
433 }
434 cur->regmap[preferred_reg]=reg;
435 return;
436 }
437 for(r=1;r<=MAXREG;r++)
438 {
439 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
440 for(hr=0;hr<HOST_REGS;hr++) {
441 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
442 if(cur->regmap[hr]==r+64) {
443 cur->regmap[hr]=reg;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 return;
447 }
448 }
449 }
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 }
461 }
462 }
463 }
464 for(j=10;j>=0;j--)
465 {
466 for(r=1;r<=MAXREG;r++)
467 {
468 if(hsn[r]==j) {
469 for(hr=0;hr<HOST_REGS;hr++) {
470 if(cur->regmap[hr]==r+64) {
471 cur->regmap[hr]=reg;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 return;
475 }
476 }
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 }
486 }
487 }
c43b5311 488 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 489}
490
57871462 491// Allocate a temporary register. This is done without regard to
492// dirty status or whether the register we request is on the unneeded list
493// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 494static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 495{
496 int r,hr;
497 int preferred_reg = -1;
9f51b4b9 498
57871462 499 // see if it's already allocated
500 for(hr=0;hr<HOST_REGS;hr++)
501 {
502 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
503 }
9f51b4b9 504
57871462 505 // Try to allocate any available register
506 for(hr=HOST_REGS-1;hr>=0;hr--) {
507 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
508 cur->regmap[hr]=reg;
509 cur->dirty&=~(1<<hr);
510 cur->isconst&=~(1<<hr);
511 return;
512 }
513 }
9f51b4b9 514
57871462 515 // Find an unneeded register
516 for(hr=HOST_REGS-1;hr>=0;hr--)
517 {
518 r=cur->regmap[hr];
519 if(r>=0) {
00fa9369 520 assert(r < 64);
521 if((cur->u>>r)&1) {
522 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
523 cur->regmap[hr]=reg;
524 cur->dirty&=~(1<<hr);
525 cur->isconst&=~(1<<hr);
526 return;
57871462 527 }
528 }
529 }
530 }
9f51b4b9 531
57871462 532 // Ok, now we have to evict someone
533 // Pick a register we hopefully won't need soon
534 // TODO: we might want to follow unconditional jumps here
535 // TODO: get rid of dupe code and make this into a function
536 u_char hsn[MAXREG+1];
537 memset(hsn,10,sizeof(hsn));
538 int j;
539 lsn(hsn,i,&preferred_reg);
540 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
541 if(i>0) {
542 // Don't evict the cycle count at entry points, otherwise the entry
543 // stub will have to write it.
544 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
ad49de89 545 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP)) hsn[CCREG]=2;
57871462 546 for(j=10;j>=3;j--)
547 {
548 for(r=1;r<=MAXREG;r++)
549 {
550 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
551 for(hr=0;hr<HOST_REGS;hr++) {
552 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
553 if(cur->regmap[hr]==r+64) {
554 cur->regmap[hr]=reg;
555 cur->dirty&=~(1<<hr);
556 cur->isconst&=~(1<<hr);
557 return;
558 }
559 }
560 }
561 for(hr=0;hr<HOST_REGS;hr++) {
562 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
563 if(cur->regmap[hr]==r) {
564 cur->regmap[hr]=reg;
565 cur->dirty&=~(1<<hr);
566 cur->isconst&=~(1<<hr);
567 return;
568 }
569 }
570 }
571 }
572 }
573 }
574 }
575 for(j=10;j>=0;j--)
576 {
577 for(r=1;r<=MAXREG;r++)
578 {
579 if(hsn[r]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 if(cur->regmap[hr]==r+64) {
582 cur->regmap[hr]=reg;
583 cur->dirty&=~(1<<hr);
584 cur->isconst&=~(1<<hr);
585 return;
586 }
587 }
588 for(hr=0;hr<HOST_REGS;hr++) {
589 if(cur->regmap[hr]==r) {
590 cur->regmap[hr]=reg;
591 cur->dirty&=~(1<<hr);
592 cur->isconst&=~(1<<hr);
593 return;
594 }
595 }
596 }
597 }
598 }
c43b5311 599 SysPrintf("This shouldn't happen");exit(1);
57871462 600}
e2b5e7aa 601
57871462 602// Allocate a specific ARM register.
e2b5e7aa 603static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 604{
605 int n;
f776eb14 606 int dirty=0;
9f51b4b9 607
57871462 608 // see if it's already allocated (and dealloc it)
609 for(n=0;n<HOST_REGS;n++)
610 {
f776eb14 611 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
612 dirty=(cur->dirty>>n)&1;
613 cur->regmap[n]=-1;
614 }
57871462 615 }
9f51b4b9 616
57871462 617 cur->regmap[hr]=reg;
618 cur->dirty&=~(1<<hr);
f776eb14 619 cur->dirty|=dirty<<hr;
57871462 620 cur->isconst&=~(1<<hr);
621}
622
623// Alloc cycle count into dedicated register
e2b5e7aa 624static void alloc_cc(struct regstat *cur,int i)
57871462 625{
626 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
627}
628
629/* Special alloc */
630
631
632/* Assembler */
633
e2b5e7aa 634static unused char regname[16][4] = {
57871462 635 "r0",
636 "r1",
637 "r2",
638 "r3",
639 "r4",
640 "r5",
641 "r6",
642 "r7",
643 "r8",
644 "r9",
645 "r10",
646 "fp",
647 "r12",
648 "sp",
649 "lr",
650 "pc"};
651
e2b5e7aa 652static void output_w32(u_int word)
57871462 653{
654 *((u_int *)out)=word;
655 out+=4;
656}
e2b5e7aa 657
658static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 659{
660 assert(rd<16);
661 assert(rn<16);
662 assert(rm<16);
663 return((rn<<16)|(rd<<12)|rm);
664}
e2b5e7aa 665
666static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 667{
668 assert(rd<16);
669 assert(rn<16);
670 assert(imm<256);
671 assert((shift&1)==0);
672 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
673}
e2b5e7aa 674
675static u_int genimm(u_int imm,u_int *encoded)
57871462 676{
c2e3bd42 677 *encoded=0;
678 if(imm==0) return 1;
57871462 679 int i=32;
680 while(i>0)
681 {
682 if(imm<256) {
683 *encoded=((i&30)<<7)|imm;
684 return 1;
685 }
686 imm=(imm>>2)|(imm<<30);i-=2;
687 }
688 return 0;
689}
e2b5e7aa 690
691static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 692{
693 u_int ret=genimm(imm,encoded);
694 assert(ret);
581335b0 695 (void)ret;
cfbd3c6e 696}
e2b5e7aa 697
698static u_int genjmp(u_int addr)
57871462 699{
700 int offset=addr-(int)out-8;
e80343e2 701 if(offset<-33554432||offset>=33554432) {
702 if (addr>2) {
c43b5311 703 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 704 exit(1);
705 }
706 return 0;
707 }
57871462 708 return ((u_int)offset>>2)&0xffffff;
709}
710
e2b5e7aa 711static void emit_mov(int rs,int rt)
57871462 712{
713 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
714 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
715}
716
e2b5e7aa 717static void emit_movs(int rs,int rt)
57871462 718{
719 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
720 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
721}
722
e2b5e7aa 723static void emit_add(int rs1,int rs2,int rt)
57871462 724{
725 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
726 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
727}
728
e2b5e7aa 729static void emit_adds(int rs1,int rs2,int rt)
57871462 730{
731 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
732 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
733}
734
e2b5e7aa 735static void emit_adcs(int rs1,int rs2,int rt)
57871462 736{
737 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
738 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
739}
740
e2b5e7aa 741static void emit_neg(int rs, int rt)
57871462 742{
743 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
744 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
745}
746
e2b5e7aa 747static void emit_sub(int rs1,int rs2,int rt)
57871462 748{
749 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
750 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
751}
752
e2b5e7aa 753static void emit_zeroreg(int rt)
57871462 754{
755 assem_debug("mov %s,#0\n",regname[rt]);
756 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
757}
758
e2b5e7aa 759static void emit_loadlp(u_int imm,u_int rt)
790ee18e 760{
761 add_literal((int)out,imm);
762 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
763 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
764}
e2b5e7aa 765
766static void emit_movw(u_int imm,u_int rt)
790ee18e 767{
768 assert(imm<65536);
769 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
770 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
771}
e2b5e7aa 772
773static void emit_movt(u_int imm,u_int rt)
790ee18e 774{
775 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
776 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
777}
e2b5e7aa 778
779static void emit_movimm(u_int imm,u_int rt)
790ee18e 780{
781 u_int armval;
782 if(genimm(imm,&armval)) {
783 assem_debug("mov %s,#%d\n",regname[rt],imm);
784 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
785 }else if(genimm(~imm,&armval)) {
786 assem_debug("mvn %s,#%d\n",regname[rt],imm);
787 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
788 }else if(imm<65536) {
665f33e1 789 #ifndef HAVE_ARMV7
790ee18e 790 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
791 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
792 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
793 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
794 #else
795 emit_movw(imm,rt);
796 #endif
797 }else{
665f33e1 798 #ifndef HAVE_ARMV7
790ee18e 799 emit_loadlp(imm,rt);
800 #else
801 emit_movw(imm&0x0000FFFF,rt);
802 emit_movt(imm&0xFFFF0000,rt);
803 #endif
804 }
805}
e2b5e7aa 806
807static void emit_pcreladdr(u_int rt)
790ee18e 808{
809 assem_debug("add %s,pc,#?\n",regname[rt]);
810 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
811}
812
e2b5e7aa 813static void emit_loadreg(int r, int hr)
57871462 814{
3d624f89 815 if(r&64) {
c43b5311 816 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 817 assert(0);
818 return;
3d624f89 819 }
57871462 820 if((r&63)==0)
821 emit_zeroreg(hr);
822 else {
3d624f89 823 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 824 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
825 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
826 if(r==CCREG) addr=(int)&cycle_count;
827 if(r==CSREG) addr=(int)&Status;
57871462 828 if(r==INVCP) addr=(int)&invc_ptr;
829 u_int offset = addr-(u_int)&dynarec_local;
830 assert(offset<4096);
831 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
832 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
833 }
834}
e2b5e7aa 835
836static void emit_storereg(int r, int hr)
57871462 837{
3d624f89 838 if(r&64) {
c43b5311 839 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 840 assert(0);
841 return;
3d624f89 842 }
3d624f89 843 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 844 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
845 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
846 if(r==CCREG) addr=(int)&cycle_count;
57871462 847 u_int offset = addr-(u_int)&dynarec_local;
848 assert(offset<4096);
849 assem_debug("str %s,fp+%d\n",regname[hr],offset);
850 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
851}
852
e2b5e7aa 853static void emit_test(int rs, int rt)
57871462 854{
855 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
856 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
857}
858
e2b5e7aa 859static void emit_testimm(int rs,int imm)
57871462 860{
861 u_int armval;
5a05d80c 862 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 863 genimm_checked(imm,&armval);
57871462 864 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
865}
866
e2b5e7aa 867static void emit_testeqimm(int rs,int imm)
b9b61529 868{
869 u_int armval;
870 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 871 genimm_checked(imm,&armval);
b9b61529 872 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
873}
874
e2b5e7aa 875static void emit_not(int rs,int rt)
57871462 876{
877 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
878 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
879}
880
e2b5e7aa 881static void emit_mvnmi(int rs,int rt)
b9b61529 882{
883 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
884 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
885}
886
e2b5e7aa 887static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 888{
889 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
890 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
891}
892
e2b5e7aa 893static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 894{
895 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
896 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
897}
e2b5e7aa 898
e2b5e7aa 899static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 900{
901 assert(rs<16);
902 assert(rt<16);
903 assert(imm<32);
904 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
905 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
906}
907
e2b5e7aa 908static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 909{
910 assert(rs<16);
911 assert(rt<16);
912 assert(imm<32);
913 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
914 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
915}
916
e2b5e7aa 917static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 918{
919 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
921}
922
e2b5e7aa 923static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 924{
925 assert(rs<16);
926 assert(rt<16);
927 if(imm!=0) {
57871462 928 u_int armval;
929 if(genimm(imm,&armval)) {
930 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
931 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
932 }else if(genimm(-imm,&armval)) {
8a0a8423 933 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 934 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 935 #ifdef HAVE_ARMV7
936 }else if(rt!=rs&&(u_int)imm<65536) {
937 emit_movw(imm&0x0000ffff,rt);
938 emit_add(rs,rt,rt);
939 }else if(rt!=rs&&(u_int)-imm<65536) {
940 emit_movw(-imm&0x0000ffff,rt);
941 emit_sub(rs,rt,rt);
942 #endif
943 }else if((u_int)-imm<65536) {
57871462 944 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
945 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
946 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
947 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 948 }else {
949 do {
950 int shift = (ffs(imm) - 1) & ~1;
951 int imm8 = imm & (0xff << shift);
952 genimm_checked(imm8,&armval);
953 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
954 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
955 rs = rt;
956 imm &= ~imm8;
957 }
958 while (imm != 0);
57871462 959 }
960 }
961 else if(rs!=rt) emit_mov(rs,rt);
962}
963
e2b5e7aa 964static void emit_addimm_and_set_flags(int imm,int rt)
57871462 965{
966 assert(imm>-65536&&imm<65536);
967 u_int armval;
968 if(genimm(imm,&armval)) {
969 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
970 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
971 }else if(genimm(-imm,&armval)) {
972 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
973 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
974 }else if(imm<0) {
975 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
976 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
977 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
978 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
979 }else{
980 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
981 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
982 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
983 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
984 }
985}
e2b5e7aa 986
987static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 988{
989 emit_addimm(rt,imm,rt);
990}
991
e2b5e7aa 992static void emit_addnop(u_int r)
57871462 993{
994 assert(r<16);
995 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
996 output_w32(0xe2800000|rd_rn_rm(r,r,0));
997}
998
e2b5e7aa 999static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1000{
1001 u_int armval;
cfbd3c6e 1002 genimm_checked(imm,&armval);
57871462 1003 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1004 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1005}
1edfcc68 1006
e2b5e7aa 1007static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1008{
1009 // TODO: if(genimm(imm,&armval)) ...
1010 // else
1011 emit_movimm(imm,HOST_TEMPREG);
1012 emit_adds(HOST_TEMPREG,rsl,rtl);
1013 emit_adcimm(rsh,0,rth);
1014}
1015
e2b5e7aa 1016static void emit_andimm(int rs,int imm,int rt)
57871462 1017{
1018 u_int armval;
790ee18e 1019 if(imm==0) {
1020 emit_zeroreg(rt);
1021 }else if(genimm(imm,&armval)) {
57871462 1022 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1023 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1024 }else if(genimm(~imm,&armval)) {
1025 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1026 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1027 }else if(imm==65535) {
332a4533 1028 #ifndef HAVE_ARMV6
57871462 1029 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1030 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1031 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1032 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1033 #else
1034 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1035 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1036 #endif
1037 }else{
1038 assert(imm>0&&imm<65535);
665f33e1 1039 #ifndef HAVE_ARMV7
57871462 1040 assem_debug("mov r14,#%d\n",imm&0xFF00);
1041 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1042 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1043 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1044 #else
1045 emit_movw(imm,HOST_TEMPREG);
1046 #endif
1047 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1048 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1049 }
1050}
1051
e2b5e7aa 1052static void emit_orimm(int rs,int imm,int rt)
57871462 1053{
1054 u_int armval;
790ee18e 1055 if(imm==0) {
1056 if(rs!=rt) emit_mov(rs,rt);
1057 }else if(genimm(imm,&armval)) {
57871462 1058 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1059 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1060 }else{
1061 assert(imm>0&&imm<65536);
1062 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1063 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1064 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1065 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1066 }
1067}
1068
e2b5e7aa 1069static void emit_xorimm(int rs,int imm,int rt)
57871462 1070{
57871462 1071 u_int armval;
790ee18e 1072 if(imm==0) {
1073 if(rs!=rt) emit_mov(rs,rt);
1074 }else if(genimm(imm,&armval)) {
57871462 1075 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1076 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1077 }else{
514ed0d9 1078 assert(imm>0&&imm<65536);
57871462 1079 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1080 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1081 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1082 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1083 }
1084}
1085
e2b5e7aa 1086static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1087{
1088 assert(imm>0);
1089 assert(imm<32);
1090 //if(imm==1) ...
1091 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1092 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1093}
1094
e2b5e7aa 1095static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1096{
1097 assert(imm>0);
1098 assert(imm<32);
1099 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1100 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1101}
1102
e2b5e7aa 1103static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1104{
1105 assert(imm>0);
1106 assert(imm<32);
1107 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1108 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1109}
1110
e2b5e7aa 1111static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1112{
1113 assert(imm>0);
1114 assert(imm<32);
1115 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1116 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1117}
1118
e2b5e7aa 1119static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1120{
1121 assert(imm>0);
1122 assert(imm<32);
1123 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1124 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1125}
1126
e2b5e7aa 1127static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1128{
1129 assert(imm>0);
1130 assert(imm<32);
1131 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1132 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1133}
1134
e2b5e7aa 1135static void emit_signextend16(int rs,int rt)
b9b61529 1136{
332a4533 1137 #ifndef HAVE_ARMV6
b9b61529 1138 emit_shlimm(rs,16,rt);
1139 emit_sarimm(rt,16,rt);
1140 #else
1141 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1142 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1143 #endif
1144}
1145
e2b5e7aa 1146static void emit_signextend8(int rs,int rt)
c6c3b1b3 1147{
332a4533 1148 #ifndef HAVE_ARMV6
c6c3b1b3 1149 emit_shlimm(rs,24,rt);
1150 emit_sarimm(rt,24,rt);
1151 #else
1152 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1153 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1154 #endif
1155}
1156
e2b5e7aa 1157static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1158{
1159 assert(rs<16);
1160 assert(rt<16);
1161 assert(shift<16);
1162 //if(imm==1) ...
1163 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1164 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1165}
e2b5e7aa 1166
1167static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1168{
1169 assert(rs<16);
1170 assert(rt<16);
1171 assert(shift<16);
1172 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1173 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1174}
e2b5e7aa 1175
1176static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1177{
1178 assert(rs<16);
1179 assert(rt<16);
1180 assert(shift<16);
1181 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1182 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1183}
57871462 1184
e2b5e7aa 1185static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1186{
1187 assert(rs<16);
1188 assert(rt<16);
1189 assert(shift<16);
1190 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1191 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1192}
e2b5e7aa 1193
1194static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1195{
1196 assert(rs<16);
1197 assert(rt<16);
1198 assert(shift<16);
1199 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1200 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1201}
1202
e2b5e7aa 1203static void emit_cmpimm(int rs,int imm)
57871462 1204{
1205 u_int armval;
1206 if(genimm(imm,&armval)) {
5a05d80c 1207 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1208 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1209 }else if(genimm(-imm,&armval)) {
5a05d80c 1210 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1211 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1212 }else if(imm>0) {
1213 assert(imm<65536);
57871462 1214 emit_movimm(imm,HOST_TEMPREG);
57871462 1215 assem_debug("cmp %s,r14\n",regname[rs]);
1216 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1217 }else{
1218 assert(imm>-65536);
57871462 1219 emit_movimm(-imm,HOST_TEMPREG);
57871462 1220 assem_debug("cmn %s,r14\n",regname[rs]);
1221 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1222 }
1223}
1224
e2b5e7aa 1225static void emit_cmovne_imm(int imm,int rt)
57871462 1226{
1227 assem_debug("movne %s,#%d\n",regname[rt],imm);
1228 u_int armval;
cfbd3c6e 1229 genimm_checked(imm,&armval);
57871462 1230 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1231}
e2b5e7aa 1232
1233static void emit_cmovl_imm(int imm,int rt)
57871462 1234{
1235 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1236 u_int armval;
cfbd3c6e 1237 genimm_checked(imm,&armval);
57871462 1238 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1239}
e2b5e7aa 1240
1241static void emit_cmovb_imm(int imm,int rt)
57871462 1242{
1243 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1244 u_int armval;
cfbd3c6e 1245 genimm_checked(imm,&armval);
57871462 1246 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1247}
e2b5e7aa 1248
e2b5e7aa 1249static void emit_cmovne_reg(int rs,int rt)
57871462 1250{
1251 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1252 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1253}
e2b5e7aa 1254
1255static void emit_cmovl_reg(int rs,int rt)
57871462 1256{
1257 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1258 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1259}
e2b5e7aa 1260
1261static void emit_cmovs_reg(int rs,int rt)
57871462 1262{
1263 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1265}
1266
e2b5e7aa 1267static void emit_slti32(int rs,int imm,int rt)
57871462 1268{
1269 if(rs!=rt) emit_zeroreg(rt);
1270 emit_cmpimm(rs,imm);
1271 if(rs==rt) emit_movimm(0,rt);
1272 emit_cmovl_imm(1,rt);
1273}
e2b5e7aa 1274
1275static void emit_sltiu32(int rs,int imm,int rt)
57871462 1276{
1277 if(rs!=rt) emit_zeroreg(rt);
1278 emit_cmpimm(rs,imm);
1279 if(rs==rt) emit_movimm(0,rt);
1280 emit_cmovb_imm(1,rt);
1281}
e2b5e7aa 1282
e2b5e7aa 1283static void emit_cmp(int rs,int rt)
57871462 1284{
1285 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1286 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1287}
e2b5e7aa 1288
1289static void emit_set_gz32(int rs, int rt)
57871462 1290{
1291 //assem_debug("set_gz32\n");
1292 emit_cmpimm(rs,1);
1293 emit_movimm(1,rt);
1294 emit_cmovl_imm(0,rt);
1295}
e2b5e7aa 1296
1297static void emit_set_nz32(int rs, int rt)
57871462 1298{
1299 //assem_debug("set_nz32\n");
1300 if(rs!=rt) emit_movs(rs,rt);
1301 else emit_test(rs,rs);
1302 emit_cmovne_imm(1,rt);
1303}
e2b5e7aa 1304
e2b5e7aa 1305static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1306{
1307 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1308 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1309 emit_cmp(rs1,rs2);
1310 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1311 emit_cmovl_imm(1,rt);
1312}
e2b5e7aa 1313
1314static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1315{
1316 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1317 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1318 emit_cmp(rs1,rs2);
1319 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1320 emit_cmovb_imm(1,rt);
1321}
e2b5e7aa 1322
dd114d7d 1323#ifdef DRC_DBG
1324extern void gen_interupt();
1325extern void do_insn_cmp();
1326#define FUNCNAME(f) { (intptr_t)f, " " #f }
1327static const struct {
1328 intptr_t addr;
1329 const char *name;
1330} function_names[] = {
1331 FUNCNAME(cc_interrupt),
1332 FUNCNAME(gen_interupt),
1333 FUNCNAME(get_addr_ht),
1334 FUNCNAME(get_addr),
1335 FUNCNAME(jump_handler_read8),
1336 FUNCNAME(jump_handler_read16),
1337 FUNCNAME(jump_handler_read32),
1338 FUNCNAME(jump_handler_write8),
1339 FUNCNAME(jump_handler_write16),
1340 FUNCNAME(jump_handler_write32),
1341 FUNCNAME(invalidate_addr),
1342 FUNCNAME(verify_code_vm),
1343 FUNCNAME(verify_code),
1344 FUNCNAME(jump_hlecall),
1345 FUNCNAME(jump_syscall_hle),
1346 FUNCNAME(new_dyna_leave),
1347 FUNCNAME(pcsx_mtc0),
1348 FUNCNAME(pcsx_mtc0_ds),
1349 FUNCNAME(do_insn_cmp),
1350};
1351
1352static const char *func_name(intptr_t a)
1353{
1354 int i;
1355 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1356 if (function_names[i].addr == a)
1357 return function_names[i].name;
1358 return "";
1359}
1360#else
1361#define func_name(x) ""
1362#endif
1363
643aeae3 1364static void emit_call(const void *a_)
57871462 1365{
643aeae3 1366 int a = (int)a_;
dd114d7d 1367 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1368 u_int offset=genjmp(a);
1369 output_w32(0xeb000000|offset);
1370}
e2b5e7aa 1371
b14b6a8f 1372static void emit_jmp(const void *a_)
57871462 1373{
b14b6a8f 1374 int a = (int)a_;
dd114d7d 1375 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1376 u_int offset=genjmp(a);
1377 output_w32(0xea000000|offset);
1378}
e2b5e7aa 1379
643aeae3 1380static void emit_jne(const void *a_)
57871462 1381{
643aeae3 1382 int a = (int)a_;
57871462 1383 assem_debug("bne %x\n",a);
1384 u_int offset=genjmp(a);
1385 output_w32(0x1a000000|offset);
1386}
e2b5e7aa 1387
1388static void emit_jeq(int a)
57871462 1389{
1390 assem_debug("beq %x\n",a);
1391 u_int offset=genjmp(a);
1392 output_w32(0x0a000000|offset);
1393}
e2b5e7aa 1394
1395static void emit_js(int a)
57871462 1396{
1397 assem_debug("bmi %x\n",a);
1398 u_int offset=genjmp(a);
1399 output_w32(0x4a000000|offset);
1400}
e2b5e7aa 1401
1402static void emit_jns(int a)
57871462 1403{
1404 assem_debug("bpl %x\n",a);
1405 u_int offset=genjmp(a);
1406 output_w32(0x5a000000|offset);
1407}
e2b5e7aa 1408
1409static void emit_jl(int a)
57871462 1410{
1411 assem_debug("blt %x\n",a);
1412 u_int offset=genjmp(a);
1413 output_w32(0xba000000|offset);
1414}
e2b5e7aa 1415
1416static void emit_jge(int a)
57871462 1417{
1418 assem_debug("bge %x\n",a);
1419 u_int offset=genjmp(a);
1420 output_w32(0xaa000000|offset);
1421}
e2b5e7aa 1422
1423static void emit_jno(int a)
57871462 1424{
1425 assem_debug("bvc %x\n",a);
1426 u_int offset=genjmp(a);
1427 output_w32(0x7a000000|offset);
1428}
e2b5e7aa 1429
1430static void emit_jc(int a)
57871462 1431{
1432 assem_debug("bcs %x\n",a);
1433 u_int offset=genjmp(a);
1434 output_w32(0x2a000000|offset);
1435}
e2b5e7aa 1436
b14b6a8f 1437static void emit_jcc(void *a_)
57871462 1438{
b14b6a8f 1439 int a = (int)a_;
57871462 1440 assem_debug("bcc %x\n",a);
1441 u_int offset=genjmp(a);
1442 output_w32(0x3a000000|offset);
1443}
1444
e2b5e7aa 1445static void emit_callreg(u_int r)
57871462 1446{
c6c3b1b3 1447 assert(r<15);
1448 assem_debug("blx %s\n",regname[r]);
1449 output_w32(0xe12fff30|r);
57871462 1450}
e2b5e7aa 1451
1452static void emit_jmpreg(u_int r)
57871462 1453{
1454 assem_debug("mov pc,%s\n",regname[r]);
1455 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1456}
1457
e2b5e7aa 1458static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1459{
1460 assert(offset>-4096&&offset<4096);
1461 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1462 if(offset>=0) {
1463 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1464 }else{
1465 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1466 }
1467}
e2b5e7aa 1468
1469static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1470{
1471 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1472 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1473}
e2b5e7aa 1474
1475static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1476{
1477 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1478 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1479}
e2b5e7aa 1480
1481static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1482{
1483 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1484 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1485}
e2b5e7aa 1486
1487static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1488{
1489 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1490 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1491}
e2b5e7aa 1492
1493static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1494{
1495 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1496 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1497}
e2b5e7aa 1498
1499static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1500{
1501 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1502 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1503}
e2b5e7aa 1504
e2b5e7aa 1505static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1506{
1507 assert(offset>-256&&offset<256);
1508 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1509 if(offset>=0) {
1510 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1511 }else{
1512 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1513 }
1514}
e2b5e7aa 1515
e2b5e7aa 1516static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1517{
1518 assert(offset>-256&&offset<256);
1519 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1520 if(offset>=0) {
1521 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1522 }else{
1523 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1524 }
1525}
e2b5e7aa 1526
1527static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1528{
1529 assert(offset>-4096&&offset<4096);
1530 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1531 if(offset>=0) {
1532 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1533 }else{
1534 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1535 }
1536}
e2b5e7aa 1537
e2b5e7aa 1538static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1539{
1540 assert(offset>-256&&offset<256);
1541 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1542 if(offset>=0) {
1543 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1544 }else{
1545 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1546 }
1547}
e2b5e7aa 1548
054175e9 1549static void emit_ldrd(int offset, int rs, int rt)
1550{
1551 assert(offset>-256&&offset<256);
1552 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1553 if(offset>=0) {
1554 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1555 }else{
1556 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1557 }
1558}
e2b5e7aa 1559
643aeae3 1560static void emit_readword(void *addr, int rt)
57871462 1561{
643aeae3 1562 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1563 assert(offset<4096);
1564 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1565 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1566}
e2b5e7aa 1567
e2b5e7aa 1568static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1569{
1570 assert(offset>-4096&&offset<4096);
1571 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1572 if(offset>=0) {
1573 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1574 }else{
1575 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1576 }
1577}
e2b5e7aa 1578
e2b5e7aa 1579static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1580{
1581 assert(offset>-256&&offset<256);
1582 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1583 if(offset>=0) {
1584 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1585 }else{
1586 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1587 }
1588}
e2b5e7aa 1589
1590static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1591{
1592 assert(offset>-4096&&offset<4096);
1593 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1594 if(offset>=0) {
1595 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1596 }else{
1597 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1598 }
1599}
e2b5e7aa 1600
e2b5e7aa 1601static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1602{
1603 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1604 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1605}
e2b5e7aa 1606
1607static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1608{
1609 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1610 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1611}
e2b5e7aa 1612
1613static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1614{
1615 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1616 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1617}
e2b5e7aa 1618
643aeae3 1619static void emit_writeword(int rt, void *addr)
57871462 1620{
643aeae3 1621 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1622 assert(offset<4096);
1623 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1624 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1625}
e2b5e7aa 1626
e2b5e7aa 1627static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1628{
1629 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1630 assert(rs1<16);
1631 assert(rs2<16);
1632 assert(hi<16);
1633 assert(lo<16);
1634 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1635}
e2b5e7aa 1636
1637static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1638{
1639 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1640 assert(rs1<16);
1641 assert(rs2<16);
1642 assert(hi<16);
1643 assert(lo<16);
1644 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1645}
1646
e2b5e7aa 1647static void emit_clz(int rs,int rt)
57871462 1648{
1649 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1650 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1651}
1652
e2b5e7aa 1653static void emit_subcs(int rs1,int rs2,int rt)
57871462 1654{
1655 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1656 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1657}
1658
e2b5e7aa 1659static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1660{
1661 assert(imm>0);
1662 assert(imm<32);
1663 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1664 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1665}
1666
e2b5e7aa 1667static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1668{
1669 assert(imm>0);
1670 assert(imm<32);
1671 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1672 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1673}
1674
e2b5e7aa 1675static void emit_negmi(int rs, int rt)
57871462 1676{
1677 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1678 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1679}
1680
e2b5e7aa 1681static void emit_negsmi(int rs, int rt)
57871462 1682{
1683 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1684 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1685}
1686
e2b5e7aa 1687static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1688{
1689 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1690 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1691}
1692
e2b5e7aa 1693static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1694{
1695 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1696 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1697}
1698
e2b5e7aa 1699static void emit_teq(int rs, int rt)
57871462 1700{
1701 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1702 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1703}
1704
e2b5e7aa 1705static void emit_rsbimm(int rs, int imm, int rt)
57871462 1706{
1707 u_int armval;
cfbd3c6e 1708 genimm_checked(imm,&armval);
57871462 1709 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1710 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1711}
1712
1713// Load 2 immediates optimizing for small code size
e2b5e7aa 1714static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 1715{
1716 emit_movimm(imm1,rt1);
1717 u_int armval;
1718 if(genimm(imm2-imm1,&armval)) {
1719 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1720 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1721 }else if(genimm(imm1-imm2,&armval)) {
1722 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1723 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1724 }
1725 else emit_movimm(imm2,rt2);
1726}
1727
1728// Conditionally select one of two immediates, optimizing for small code size
1729// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1730static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1731{
1732 u_int armval;
1733 if(genimm(imm2-imm1,&armval)) {
1734 emit_movimm(imm1,rt);
1735 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1736 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1737 }else if(genimm(imm1-imm2,&armval)) {
1738 emit_movimm(imm1,rt);
1739 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1740 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1741 }
1742 else {
665f33e1 1743 #ifndef HAVE_ARMV7
57871462 1744 emit_movimm(imm1,rt);
1745 add_literal((int)out,imm2);
1746 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1747 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1748 #else
1749 emit_movw(imm1&0x0000FFFF,rt);
1750 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1751 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1752 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1753 }
1754 emit_movt(imm1&0xFFFF0000,rt);
1755 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1756 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1757 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1758 }
1759 #endif
1760 }
1761}
1762
57871462 1763// special case for checking invalid_code
e2b5e7aa 1764static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1765{
1766 assert(imm<128&&imm>=0);
1767 assert(r>=0&&r<16);
1768 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1769 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1770 emit_cmpimm(HOST_TEMPREG,imm);
1771}
1772
e2b5e7aa 1773static void emit_callne(int a)
0bbd1454 1774{
1775 assem_debug("blne %x\n",a);
1776 u_int offset=genjmp(a);
1777 output_w32(0x1b000000|offset);
1778}
1779
57871462 1780// Used to preload hash table entries
e2b5e7aa 1781static unused void emit_prefetchreg(int r)
57871462 1782{
1783 assem_debug("pld %s\n",regname[r]);
1784 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1785}
1786
1787// Special case for mini_ht
e2b5e7aa 1788static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1789{
1790 assert(offset<4096);
1791 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1792 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1793}
1794
e2b5e7aa 1795static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1796{
1797 u_int armval;
cfbd3c6e 1798 genimm_checked(imm,&armval);
b9b61529 1799 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1800 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1801}
1802
e2b5e7aa 1803static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1804{
1805 u_int armval;
cfbd3c6e 1806 genimm_checked(imm,&armval);
b9b61529 1807 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1808 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1809}
1810
e2b5e7aa 1811static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1812{
1813 u_int armval;
1814 genimm_checked(imm,&armval);
1815 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1816 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1817}
1818
e2b5e7aa 1819static void emit_jno_unlikely(int a)
57871462 1820{
1821 //emit_jno(a);
1822 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1823 output_w32(0x72800000|rd_rn_rm(15,15,0));
1824}
1825
054175e9 1826static void save_regs_all(u_int reglist)
57871462 1827{
054175e9 1828 int i;
57871462 1829 if(!reglist) return;
1830 assem_debug("stmia fp,{");
054175e9 1831 for(i=0;i<16;i++)
1832 if(reglist&(1<<i))
1833 assem_debug("r%d,",i);
57871462 1834 assem_debug("}\n");
1835 output_w32(0xe88b0000|reglist);
1836}
e2b5e7aa 1837
054175e9 1838static void restore_regs_all(u_int reglist)
57871462 1839{
054175e9 1840 int i;
57871462 1841 if(!reglist) return;
1842 assem_debug("ldmia fp,{");
054175e9 1843 for(i=0;i<16;i++)
1844 if(reglist&(1<<i))
1845 assem_debug("r%d,",i);
57871462 1846 assem_debug("}\n");
1847 output_w32(0xe89b0000|reglist);
1848}
e2b5e7aa 1849
054175e9 1850// Save registers before function call
1851static void save_regs(u_int reglist)
1852{
4d646738 1853 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1854 save_regs_all(reglist);
1855}
e2b5e7aa 1856
054175e9 1857// Restore registers after function call
1858static void restore_regs(u_int reglist)
1859{
4d646738 1860 reglist&=CALLER_SAVE_REGS;
054175e9 1861 restore_regs_all(reglist);
1862}
57871462 1863
57871462 1864/* Stubs/epilogue */
1865
e2b5e7aa 1866static void literal_pool(int n)
57871462 1867{
1868 if(!literalcount) return;
1869 if(n) {
1870 if((int)out-literals[0][0]<4096-n) return;
1871 }
1872 u_int *ptr;
1873 int i;
1874 for(i=0;i<literalcount;i++)
1875 {
77750690 1876 u_int l_addr=(u_int)out;
1877 int j;
1878 for(j=0;j<i;j++) {
1879 if(literals[j][1]==literals[i][1]) {
1880 //printf("dup %08x\n",literals[i][1]);
1881 l_addr=literals[j][0];
1882 break;
1883 }
1884 }
57871462 1885 ptr=(u_int *)literals[i][0];
77750690 1886 u_int offset=l_addr-(u_int)ptr-8;
57871462 1887 assert(offset<4096);
1888 assert(!(offset&3));
1889 *ptr|=offset;
77750690 1890 if(l_addr==(u_int)out) {
1891 literals[i][0]=l_addr; // remember for dupes
1892 output_w32(literals[i][1]);
1893 }
57871462 1894 }
1895 literalcount=0;
1896}
1897
e2b5e7aa 1898static void literal_pool_jumpover(int n)
57871462 1899{
1900 if(!literalcount) return;
1901 if(n) {
1902 if((int)out-literals[0][0]<4096-n) return;
1903 }
df4dc2b1 1904 void *jaddr = out;
57871462 1905 emit_jmp(0);
1906 literal_pool(0);
df4dc2b1 1907 set_jump_target(jaddr, out);
57871462 1908}
1909
643aeae3 1910static void emit_extjump2(u_char *addr, int target, void *linker)
57871462 1911{
1912 u_char *ptr=(u_char *)addr;
1913 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 1914 (void)ptr;
1915
57871462 1916 emit_loadlp(target,0);
643aeae3 1917 emit_loadlp((u_int)addr,1);
1918 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
57871462 1919 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
1920//DEBUG >
1921#ifdef DEBUG_CYCLE_COUNT
643aeae3 1922 emit_readword(&last_count,ECX);
57871462 1923 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1924 emit_readword(&next_interupt,ECX);
1925 emit_writeword(HOST_CCREG,&Count);
57871462 1926 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 1927 emit_writeword(ECX,&last_count);
57871462 1928#endif
1929//DEBUG <
1930 emit_jmp(linker);
1931}
1932
643aeae3 1933static void emit_extjump(void *addr, int target)
57871462 1934{
b14b6a8f 1935 emit_extjump2(addr, target, dyna_linker);
57871462 1936}
e2b5e7aa 1937
643aeae3 1938static void emit_extjump_ds(void *addr, int target)
57871462 1939{
b14b6a8f 1940 emit_extjump2(addr, target, dyna_linker_ds);
57871462 1941}
1942
13e35c04 1943// put rt_val into rt, potentially making use of rs with value rs_val
1944static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
1945{
8575a877 1946 u_int armval;
1947 int diff;
1948 if(genimm(rt_val,&armval)) {
1949 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
1950 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1951 return;
1952 }
1953 if(genimm(~rt_val,&armval)) {
1954 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
1955 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1956 return;
1957 }
1958 diff=rt_val-rs_val;
1959 if(genimm(diff,&armval)) {
1960 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
1961 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1962 return;
1963 }else if(genimm(-diff,&armval)) {
1964 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
1965 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1966 return;
1967 }
1968 emit_movimm(rt_val,rt);
1969}
1970
1971// return 1 if above function can do it's job cheaply
1972static int is_similar_value(u_int v1,u_int v2)
1973{
13e35c04 1974 u_int xs;
8575a877 1975 int diff;
1976 if(v1==v2) return 1;
1977 diff=v2-v1;
1978 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 1979 ;
8575a877 1980 if(xs<0x100) return 1;
1981 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
1982 ;
1983 if(xs<0x100) return 1;
1984 return 0;
13e35c04 1985}
cbbab9cd 1986
b96d3df7 1987// trashes r2
1988static void pass_args(int a0, int a1)
1989{
1990 if(a0==1&&a1==0) {
1991 // must swap
1992 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
1993 }
1994 else if(a0!=0&&a1==0) {
1995 emit_mov(a1,1);
1996 if (a0>=0) emit_mov(a0,0);
1997 }
1998 else {
1999 if(a0>=0&&a0!=0) emit_mov(a0,0);
2000 if(a1>=0&&a1!=1) emit_mov(a1,1);
2001 }
2002}
2003
b14b6a8f 2004static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 2005{
2006 switch(type) {
2007 case LOADB_STUB: emit_signextend8(rs,rt); break;
2008 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2009 case LOADH_STUB: emit_signextend16(rs,rt); break;
2010 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2011 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2012 default: assert(0);
2013 }
2014}
2015
b1be1eee 2016#include "pcsxmem.h"
2017#include "pcsxmem_inline.c"
b1be1eee 2018
e2b5e7aa 2019static void do_readstub(int n)
57871462 2020{
b14b6a8f 2021 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 2022 literal_pool(256);
b14b6a8f 2023 set_jump_target(stubs[n].addr, out);
2024 enum stub_type type=stubs[n].type;
2025 int i=stubs[n].a;
2026 int rs=stubs[n].b;
2027 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2028 u_int reglist=stubs[n].e;
57871462 2029 signed char *i_regmap=i_regs->regmap;
581335b0 2030 int rt;
b9b61529 2031 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2032 rt=get_reg(i_regmap,FTEMP);
2033 }else{
57871462 2034 rt=get_reg(i_regmap,rt1[i]);
2035 }
2036 assert(rs>=0);
df4dc2b1 2037 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2038 void *restore_jump = NULL;
c6c3b1b3 2039 reglist|=(1<<rs);
2040 for(r=0;r<=12;r++) {
2041 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2042 temp=r; break;
2043 }
2044 }
db829eeb 2045 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2046 reglist&=~(1<<rt);
2047 if(temp==-1) {
2048 save_regs(reglist);
2049 regs_saved=1;
2050 temp=(rs==0)?2:0;
2051 }
2052 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2053 temp2=1;
643aeae3 2054 emit_readword(&mem_rtab,temp);
c6c3b1b3 2055 emit_shrimm(rs,12,temp2);
2056 emit_readword_dualindexedx4(temp,temp2,temp2);
2057 emit_lsls_imm(temp2,1,temp2);
2058 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2059 switch(type) {
2060 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2061 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2062 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2063 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2064 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 2065 default: assert(0);
c6c3b1b3 2066 }
2067 }
2068 if(regs_saved) {
df4dc2b1 2069 restore_jump=out;
c6c3b1b3 2070 emit_jcc(0); // jump to reg restore
2071 }
2072 else
b14b6a8f 2073 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 2074
2075 if(!regs_saved)
2076 save_regs(reglist);
643aeae3 2077 void *handler=NULL;
c6c3b1b3 2078 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 2079 handler=jump_handler_read8;
c6c3b1b3 2080 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 2081 handler=jump_handler_read16;
c6c3b1b3 2082 if(type==LOADW_STUB)
643aeae3 2083 handler=jump_handler_read32;
2084 assert(handler);
b96d3df7 2085 pass_args(rs,temp2);
c6c3b1b3 2086 int cc=get_reg(i_regmap,CCREG);
2087 if(cc<0)
2088 emit_loadreg(CCREG,2);
b14b6a8f 2089 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
c6c3b1b3 2090 emit_call(handler);
2091 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2092 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2093 }
2094 if(restore_jump)
df4dc2b1 2095 set_jump_target(restore_jump, out);
c6c3b1b3 2096 restore_regs(reglist);
b14b6a8f 2097 emit_jmp(stubs[n].retaddr); // return address
57871462 2098}
2099
c6c3b1b3 2100// return memhandler, or get directly accessable address and return 0
643aeae3 2101static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
c6c3b1b3 2102{
2103 u_int l1,l2=0;
2104 l1=((u_int *)table)[addr>>12];
2105 if((l1&(1<<31))==0) {
2106 u_int v=l1<<1;
2107 *addr_host=v+addr;
643aeae3 2108 return NULL;
c6c3b1b3 2109 }
2110 else {
2111 l1<<=1;
2112 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2113 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2114 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2115 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2116 else
2117 l2=((u_int *)l1)[(addr&0xfff)/4];
2118 if((l2&(1<<31))==0) {
2119 u_int v=l2<<1;
2120 *addr_host=v+(addr&0xfff);
643aeae3 2121 return NULL;
c6c3b1b3 2122 }
643aeae3 2123 return (void *)(l2<<1);
c6c3b1b3 2124 }
2125}
c6c3b1b3 2126
b14b6a8f 2127static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2128{
2129 int rs=get_reg(regmap,target);
57871462 2130 int rt=get_reg(regmap,target);
535d208a 2131 if(rs<0) rs=get_reg(regmap,-1);
57871462 2132 assert(rs>=0);
643aeae3 2133 u_int host_addr=0,is_dynamic,far_call=0;
2134 void *handler;
b1be1eee 2135 int cc=get_reg(regmap,CCREG);
2136 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2137 return;
643aeae3 2138 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2139 if (handler == NULL) {
db829eeb 2140 if(rt<0||rt1[i]==0)
c6c3b1b3 2141 return;
13e35c04 2142 if(addr!=host_addr)
2143 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2144 switch(type) {
2145 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2146 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2147 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2148 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2149 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2150 default: assert(0);
2151 }
2152 return;
2153 }
b1be1eee 2154 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2155 if(is_dynamic) {
2156 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 2157 handler=jump_handler_read8;
b1be1eee 2158 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 2159 handler=jump_handler_read16;
b1be1eee 2160 if(type==LOADW_STUB)
643aeae3 2161 handler=jump_handler_read32;
b1be1eee 2162 }
c6c3b1b3 2163
2164 // call a memhandler
db829eeb 2165 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2166 reglist&=~(1<<rt);
2167 save_regs(reglist);
2168 if(target==0)
2169 emit_movimm(addr,0);
2170 else if(rs!=0)
2171 emit_mov(rs,0);
643aeae3 2172 int offset=(u_char *)handler-out-8;
c6c3b1b3 2173 if(offset<-33554432||offset>=33554432) {
2174 // unreachable memhandler, a plugin func perhaps
643aeae3 2175 emit_movimm((u_int)handler,12);
b1be1eee 2176 far_call=1;
2177 }
2178 if(cc<0)
2179 emit_loadreg(CCREG,2);
2180 if(is_dynamic) {
2181 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2182 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2183 }
b1be1eee 2184 else {
643aeae3 2185 emit_readword(&last_count,3);
b1be1eee 2186 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2187 emit_add(2,3,2);
643aeae3 2188 emit_writeword(2,&Count);
b1be1eee 2189 }
2190
2191 if(far_call)
2192 emit_callreg(12);
c6c3b1b3 2193 else
2194 emit_call(handler);
b1be1eee 2195
db829eeb 2196 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2197 switch(type) {
2198 case LOADB_STUB: emit_signextend8(0,rt); break;
2199 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2200 case LOADH_STUB: emit_signextend16(0,rt); break;
2201 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2202 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2203 default: assert(0);
2204 }
2205 }
2206 restore_regs(reglist);
57871462 2207}
2208
e2b5e7aa 2209static void do_writestub(int n)
57871462 2210{
b14b6a8f 2211 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 2212 literal_pool(256);
b14b6a8f 2213 set_jump_target(stubs[n].addr, out);
2214 enum stub_type type=stubs[n].type;
2215 int i=stubs[n].a;
2216 int rs=stubs[n].b;
2217 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2218 u_int reglist=stubs[n].e;
57871462 2219 signed char *i_regmap=i_regs->regmap;
581335b0 2220 int rt,r;
b9b61529 2221 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2222 rt=get_reg(i_regmap,r=FTEMP);
2223 }else{
57871462 2224 rt=get_reg(i_regmap,r=rs2[i]);
2225 }
2226 assert(rs>=0);
2227 assert(rt>=0);
b14b6a8f 2228 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 2229 void *restore_jump = NULL;
b96d3df7 2230 int reglist2=reglist|(1<<rs)|(1<<rt);
2231 for(rtmp=0;rtmp<=12;rtmp++) {
2232 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2233 temp=rtmp; break;
2234 }
2235 }
2236 if(temp==-1) {
2237 save_regs(reglist);
2238 regs_saved=1;
2239 for(rtmp=0;rtmp<=3;rtmp++)
2240 if(rtmp!=rs&&rtmp!=rt)
2241 {temp=rtmp;break;}
2242 }
2243 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2244 temp2=3;
643aeae3 2245 emit_readword(&mem_wtab,temp);
b96d3df7 2246 emit_shrimm(rs,12,temp2);
2247 emit_readword_dualindexedx4(temp,temp2,temp2);
2248 emit_lsls_imm(temp2,1,temp2);
2249 switch(type) {
2250 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2251 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2252 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2253 default: assert(0);
2254 }
2255 if(regs_saved) {
df4dc2b1 2256 restore_jump=out;
b96d3df7 2257 emit_jcc(0); // jump to reg restore
2258 }
2259 else
b14b6a8f 2260 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 2261
2262 if(!regs_saved)
2263 save_regs(reglist);
643aeae3 2264 void *handler=NULL;
b96d3df7 2265 switch(type) {
643aeae3 2266 case STOREB_STUB: handler=jump_handler_write8; break;
2267 case STOREH_STUB: handler=jump_handler_write16; break;
2268 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 2269 default: assert(0);
b96d3df7 2270 }
643aeae3 2271 assert(handler);
b96d3df7 2272 pass_args(rs,rt);
2273 if(temp2!=3)
2274 emit_mov(temp2,3);
2275 int cc=get_reg(i_regmap,CCREG);
2276 if(cc<0)
2277 emit_loadreg(CCREG,2);
b14b6a8f 2278 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
b96d3df7 2279 // returns new cycle_count
2280 emit_call(handler);
b14b6a8f 2281 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2282 if(cc<0)
2283 emit_storereg(CCREG,2);
2284 if(restore_jump)
df4dc2b1 2285 set_jump_target(restore_jump, out);
b96d3df7 2286 restore_regs(reglist);
b14b6a8f 2287 emit_jmp(stubs[n].retaddr);
57871462 2288}
2289
b14b6a8f 2290static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2291{
2292 int rs=get_reg(regmap,-1);
57871462 2293 int rt=get_reg(regmap,target);
2294 assert(rs>=0);
2295 assert(rt>=0);
643aeae3 2296 u_int host_addr=0;
2297 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2298 if (handler == NULL) {
13e35c04 2299 if(addr!=host_addr)
2300 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2301 switch(type) {
2302 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2303 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2304 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2305 default: assert(0);
2306 }
2307 return;
2308 }
2309
2310 // call a memhandler
2311 save_regs(reglist);
13e35c04 2312 pass_args(rs,rt);
b96d3df7 2313 int cc=get_reg(regmap,CCREG);
2314 if(cc<0)
2315 emit_loadreg(CCREG,2);
2573466a 2316 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
643aeae3 2317 emit_movimm((u_int)handler,3);
b96d3df7 2318 // returns new cycle_count
643aeae3 2319 emit_call(jump_handler_write_h);
2573466a 2320 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2321 if(cc<0)
2322 emit_storereg(CCREG,2);
2323 restore_regs(reglist);
57871462 2324}
2325
e2b5e7aa 2326static void do_unalignedwritestub(int n)
57871462 2327{
b14b6a8f 2328 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
b7918751 2329 literal_pool(256);
b14b6a8f 2330 set_jump_target(stubs[n].addr, out);
b7918751 2331
b14b6a8f 2332 int i=stubs[n].a;
2333 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2334 int addr=stubs[n].b;
2335 u_int reglist=stubs[n].e;
b7918751 2336 signed char *i_regmap=i_regs->regmap;
2337 int temp2=get_reg(i_regmap,FTEMP);
2338 int rt;
b7918751 2339 rt=get_reg(i_regmap,rs2[i]);
2340 assert(rt>=0);
2341 assert(addr>=0);
2342 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2343 reglist|=(1<<addr);
2344 reglist&=~(1<<temp2);
2345
b96d3df7 2346#if 1
2347 // don't bother with it and call write handler
2348 save_regs(reglist);
2349 pass_args(addr,rt);
2350 int cc=get_reg(i_regmap,CCREG);
2351 if(cc<0)
2352 emit_loadreg(CCREG,2);
b14b6a8f 2353 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
643aeae3 2354 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
b14b6a8f 2355 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2356 if(cc<0)
2357 emit_storereg(CCREG,2);
2358 restore_regs(reglist);
b14b6a8f 2359 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2360#else
b7918751 2361 emit_andimm(addr,0xfffffffc,temp2);
643aeae3 2362 emit_writeword(temp2,&address);
b7918751 2363
2364 save_regs(reglist);
b7918751 2365 emit_shrimm(addr,16,1);
2366 int cc=get_reg(i_regmap,CCREG);
2367 if(cc<0) {
2368 emit_loadreg(CCREG,2);
2369 }
2370 emit_movimm((u_int)readmem,0);
b14b6a8f 2371 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
b7918751 2372 emit_call((int)&indirect_jump_indexed);
2373 restore_regs(reglist);
2374
643aeae3 2375 emit_readword(&readmem_dword,temp2);
b7918751 2376 int temp=addr; //hmh
2377 emit_shlimm(addr,3,temp);
2378 emit_andimm(temp,24,temp);
2379#ifdef BIG_ENDIAN_MIPS
2380 if (opcode[i]==0x2e) // SWR
2381#else
2382 if (opcode[i]==0x2a) // SWL
2383#endif
2384 emit_xorimm(temp,24,temp);
2385 emit_movimm(-1,HOST_TEMPREG);
55439448 2386 if (opcode[i]==0x2a) { // SWL
b7918751 2387 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2388 emit_orrshr(rt,temp,temp2);
2389 }else{
2390 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2391 emit_orrshl(rt,temp,temp2);
2392 }
643aeae3 2393 emit_readword(&address,addr);
2394 emit_writeword(temp2,&word);
b7918751 2395 //save_regs(reglist); // don't need to, no state changes
2396 emit_shrimm(addr,16,1);
2397 emit_movimm((u_int)writemem,0);
2398 //emit_call((int)&indirect_jump_indexed);
2399 emit_mov(15,14);
2400 emit_readword_dualindexedx4(0,1,15);
643aeae3 2401 emit_readword(&Count,HOST_TEMPREG);
2402 emit_readword(&next_interupt,2);
b14b6a8f 2403 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
643aeae3 2404 emit_writeword(2,&last_count);
b7918751 2405 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2406 if(cc<0) {
2407 emit_storereg(CCREG,HOST_TEMPREG);
2408 }
2409 restore_regs(reglist);
b14b6a8f 2410 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2411#endif
57871462 2412}
2413
e2b5e7aa 2414static void do_invstub(int n)
57871462 2415{
2416 literal_pool(20);
b14b6a8f 2417 u_int reglist=stubs[n].a;
2418 set_jump_target(stubs[n].addr, out);
57871462 2419 save_regs(reglist);
b14b6a8f 2420 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
643aeae3 2421 emit_call(&invalidate_addr);
57871462 2422 restore_regs(reglist);
b14b6a8f 2423 emit_jmp(stubs[n].retaddr); // return address
57871462 2424}
2425
df4dc2b1 2426void *do_dirty_stub(int i)
57871462 2427{
2428 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2429 u_int addr=(u_int)source;
57871462 2430 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2431 #ifndef HAVE_ARMV7
ac545b3a 2432 emit_loadlp(addr,1);
57871462 2433 emit_loadlp((int)copy,2);
2434 emit_loadlp(slen*4,3);
2435 #else
ac545b3a 2436 emit_movw(addr&0x0000FFFF,1);
57871462 2437 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2438 emit_movt(addr&0xFFFF0000,1);
57871462 2439 emit_movt(((u_int)copy)&0xFFFF0000,2);
2440 emit_movw(slen*4,3);
2441 #endif
2442 emit_movimm(start+i*4,0);
643aeae3 2443 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
df4dc2b1 2444 void *entry = out;
57871462 2445 load_regs_entry(i);
df4dc2b1 2446 if (entry == out)
2447 entry = instr_addr[i];
57871462 2448 emit_jmp(instr_addr[i]);
2449 return entry;
2450}
2451
e2b5e7aa 2452static void do_dirty_stub_ds()
57871462 2453{
2454 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2455 #ifndef HAVE_ARMV7
57871462 2456 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2457 emit_loadlp((int)copy,2);
2458 emit_loadlp(slen*4,3);
2459 #else
2460 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2461 emit_movw(((u_int)copy)&0x0000FFFF,2);
2462 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2463 emit_movt(((u_int)copy)&0xFFFF0000,2);
2464 emit_movw(slen*4,3);
2465 #endif
2466 emit_movimm(start+1,0);
643aeae3 2467 emit_call(&verify_code_ds);
57871462 2468}
2469
00fa9369 2470// FP_STUB
e2b5e7aa 2471static void do_cop1stub(int n)
57871462 2472{
2473 literal_pool(256);
b14b6a8f 2474 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
2475 set_jump_target(stubs[n].addr, out);
2476 int i=stubs[n].a;
2477// int rs=stubs[n].b;
2478 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2479 int ds=stubs[n].d;
57871462 2480 if(!ds) {
ad49de89 2481 load_all_consts(regs[i].regmap_entry,regs[i].wasdirty,i);
57871462 2482 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2483 }
2484 //else {printf("fp exception in delay slot\n");}
ad49de89 2485 wb_dirtys(i_regs->regmap_entry,i_regs->wasdirty);
57871462 2486 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2487 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2488 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
b14b6a8f 2489 emit_jmp(ds?fp_exception_ds:fp_exception);
57871462 2490}
2491
57871462 2492/* Special assem */
2493
e2b5e7aa 2494static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 2495{
2496 if(rt1[i]) {
2497 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2498 {
2499 signed char s,t,shift;
2500 t=get_reg(i_regs->regmap,rt1[i]);
2501 s=get_reg(i_regs->regmap,rs1[i]);
2502 shift=get_reg(i_regs->regmap,rs2[i]);
2503 if(t>=0){
2504 if(rs1[i]==0)
2505 {
2506 emit_zeroreg(t);
2507 }
2508 else if(rs2[i]==0)
2509 {
2510 assert(s>=0);
2511 if(s!=t) emit_mov(s,t);
2512 }
2513 else
2514 {
2515 emit_andimm(shift,31,HOST_TEMPREG);
2516 if(opcode2[i]==4) // SLLV
2517 {
2518 emit_shl(s,HOST_TEMPREG,t);
2519 }
2520 if(opcode2[i]==6) // SRLV
2521 {
2522 emit_shr(s,HOST_TEMPREG,t);
2523 }
2524 if(opcode2[i]==7) // SRAV
2525 {
2526 emit_sar(s,HOST_TEMPREG,t);
2527 }
2528 }
2529 }
2530 } else { // DSLLV/DSRLV/DSRAV
2531 signed char sh,sl,th,tl,shift;
2532 th=get_reg(i_regs->regmap,rt1[i]|64);
2533 tl=get_reg(i_regs->regmap,rt1[i]);
2534 sh=get_reg(i_regs->regmap,rs1[i]|64);
2535 sl=get_reg(i_regs->regmap,rs1[i]);
2536 shift=get_reg(i_regs->regmap,rs2[i]);
2537 if(tl>=0){
2538 if(rs1[i]==0)
2539 {
2540 emit_zeroreg(tl);
2541 if(th>=0) emit_zeroreg(th);
2542 }
2543 else if(rs2[i]==0)
2544 {
2545 assert(sl>=0);
2546 if(sl!=tl) emit_mov(sl,tl);
2547 if(th>=0&&sh!=th) emit_mov(sh,th);
2548 }
2549 else
2550 {
2551 // FIXME: What if shift==tl ?
2552 assert(shift!=tl);
2553 int temp=get_reg(i_regs->regmap,-1);
2554 int real_th=th;
2555 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2556 assert(sl>=0);
2557 assert(sh>=0);
2558 emit_andimm(shift,31,HOST_TEMPREG);
2559 if(opcode2[i]==0x14) // DSLLV
2560 {
2561 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2562 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2563 emit_orrshr(sl,HOST_TEMPREG,th);
2564 emit_andimm(shift,31,HOST_TEMPREG);
2565 emit_testimm(shift,32);
2566 emit_shl(sl,HOST_TEMPREG,tl);
2567 if(th>=0) emit_cmovne_reg(tl,th);
2568 emit_cmovne_imm(0,tl);
2569 }
2570 if(opcode2[i]==0x16) // DSRLV
2571 {
2572 assert(th>=0);
2573 emit_shr(sl,HOST_TEMPREG,tl);
2574 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2575 emit_orrshl(sh,HOST_TEMPREG,tl);
2576 emit_andimm(shift,31,HOST_TEMPREG);
2577 emit_testimm(shift,32);
2578 emit_shr(sh,HOST_TEMPREG,th);
2579 emit_cmovne_reg(th,tl);
2580 if(real_th>=0) emit_cmovne_imm(0,th);
2581 }
2582 if(opcode2[i]==0x17) // DSRAV
2583 {
2584 assert(th>=0);
2585 emit_shr(sl,HOST_TEMPREG,tl);
2586 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2587 if(real_th>=0) {
2588 assert(temp>=0);
2589 emit_sarimm(th,31,temp);
2590 }
2591 emit_orrshl(sh,HOST_TEMPREG,tl);
2592 emit_andimm(shift,31,HOST_TEMPREG);
2593 emit_testimm(shift,32);
2594 emit_sar(sh,HOST_TEMPREG,th);
2595 emit_cmovne_reg(th,tl);
2596 if(real_th>=0) emit_cmovne_reg(temp,th);
2597 }
2598 }
2599 }
2600 }
2601 }
2602}
ffb0b9e0 2603
ffb0b9e0 2604static void speculate_mov(int rs,int rt)
2605{
2606 if(rt!=0) {
2607 smrv_strong_next|=1<<rt;
2608 smrv[rt]=smrv[rs];
2609 }
2610}
2611
2612static void speculate_mov_weak(int rs,int rt)
2613{
2614 if(rt!=0) {
2615 smrv_weak_next|=1<<rt;
2616 smrv[rt]=smrv[rs];
2617 }
2618}
2619
2620static void speculate_register_values(int i)
2621{
2622 if(i==0) {
2623 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
2624 // gp,sp are likely to stay the same throughout the block
2625 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
2626 smrv_weak_next=~smrv_strong_next;
2627 //printf(" llr %08x\n", smrv[4]);
2628 }
2629 smrv_strong=smrv_strong_next;
2630 smrv_weak=smrv_weak_next;
2631 switch(itype[i]) {
2632 case ALU:
2633 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2634 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
2635 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2636 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
2637 else {
2638 smrv_strong_next&=~(1<<rt1[i]);
2639 smrv_weak_next&=~(1<<rt1[i]);
2640 }
2641 break;
2642 case SHIFTIMM:
2643 smrv_strong_next&=~(1<<rt1[i]);
2644 smrv_weak_next&=~(1<<rt1[i]);
2645 // fallthrough
2646 case IMM16:
2647 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
2648 int value,hr=get_reg(regs[i].regmap,rt1[i]);
2649 if(hr>=0) {
2650 if(get_final_value(hr,i,&value))
2651 smrv[rt1[i]]=value;
2652 else smrv[rt1[i]]=constmap[i][hr];
2653 smrv_strong_next|=1<<rt1[i];
2654 }
2655 }
2656 else {
2657 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2658 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2659 }
2660 break;
2661 case LOAD:
2662 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
2663 // special case for BIOS
2664 smrv[rt1[i]]=0xa0000000;
2665 smrv_strong_next|=1<<rt1[i];
2666 break;
2667 }
2668 // fallthrough
2669 case SHIFT:
2670 case LOADLR:
2671 case MOV:
2672 smrv_strong_next&=~(1<<rt1[i]);
2673 smrv_weak_next&=~(1<<rt1[i]);
2674 break;
2675 case COP0:
2676 case COP2:
2677 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
2678 smrv_strong_next&=~(1<<rt1[i]);
2679 smrv_weak_next&=~(1<<rt1[i]);
2680 }
2681 break;
2682 case C2LS:
2683 if (opcode[i]==0x32) { // LWC2
2684 smrv_strong_next&=~(1<<rt1[i]);
2685 smrv_weak_next&=~(1<<rt1[i]);
2686 }
2687 break;
2688 }
2689#if 0
2690 int r=4;
2691 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
2692 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
2693#endif
2694}
2695
2696enum {
2697 MTYPE_8000 = 0,
2698 MTYPE_8020,
2699 MTYPE_0000,
2700 MTYPE_A000,
2701 MTYPE_1F80,
2702};
2703
2704static int get_ptr_mem_type(u_int a)
2705{
2706 if(a < 0x00200000) {
2707 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2708 // return wrong, must use memhandler for BIOS self-test to pass
2709 // 007 does similar stuff from a00 mirror, weird stuff
2710 return MTYPE_8000;
2711 return MTYPE_0000;
2712 }
2713 if(0x1f800000 <= a && a < 0x1f801000)
2714 return MTYPE_1F80;
2715 if(0x80200000 <= a && a < 0x80800000)
2716 return MTYPE_8020;
2717 if(0xa0000000 <= a && a < 0xa0200000)
2718 return MTYPE_A000;
2719 return MTYPE_8000;
2720}
ffb0b9e0 2721
b14b6a8f 2722static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
ffb0b9e0 2723{
b14b6a8f 2724 void *jaddr = NULL;
2725 int type=0;
ffb0b9e0 2726 int mr=rs1[i];
2727 if(((smrv_strong|smrv_weak)>>mr)&1) {
2728 type=get_ptr_mem_type(smrv[mr]);
2729 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2730 }
2731 else {
2732 // use the mirror we are running on
2733 type=get_ptr_mem_type(start);
2734 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2735 }
2736
2737 if(type==MTYPE_8020) { // RAM 80200000+ mirror
2738 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2739 addr=*addr_reg_override=HOST_TEMPREG;
2740 type=0;
2741 }
2742 else if(type==MTYPE_0000) { // RAM 0 mirror
2743 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2744 addr=*addr_reg_override=HOST_TEMPREG;
2745 type=0;
2746 }
2747 else if(type==MTYPE_A000) { // RAM A mirror
2748 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2749 addr=*addr_reg_override=HOST_TEMPREG;
2750 type=0;
2751 }
2752 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 2753 if (psxH == (void *)0x1f800000) {
2754 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
2755 emit_cmpimm(HOST_TEMPREG,0x1000);
b14b6a8f 2756 jaddr=out;
6d760c92 2757 emit_jc(0);
2758 }
2759 else {
2760 // do usual RAM check, jump will go to the right handler
2761 type=0;
2762 }
ffb0b9e0 2763 }
ffb0b9e0 2764
2765 if(type==0)
2766 {
2767 emit_cmpimm(addr,RAM_SIZE);
b14b6a8f 2768 jaddr=out;
ffb0b9e0 2769 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2770 // Hint to branch predictor that the branch is unlikely to be taken
2771 if(rs1[i]>=28)
2772 emit_jno_unlikely(0);
2773 else
2774 #endif
2775 emit_jno(0);
a327ad27 2776 if(ram_offset!=0) {
2777 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2778 addr=*addr_reg_override=HOST_TEMPREG;
2779 }
ffb0b9e0 2780 }
2781
2782 return jaddr;
2783}
2784
57871462 2785#define shift_assemble shift_assemble_arm
2786
e2b5e7aa 2787static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 2788{
9c45ca93 2789 int s,tl,temp,temp2,addr;
57871462 2790 int offset;
b14b6a8f 2791 void *jaddr=0;
af4ee1fe 2792 int memtarget=0,c=0;
ffb0b9e0 2793 int fastload_reg_override=0;
57871462 2794 u_int hr,reglist=0;
57871462 2795 tl=get_reg(i_regs->regmap,rt1[i]);
2796 s=get_reg(i_regs->regmap,rs1[i]);
2797 temp=get_reg(i_regs->regmap,-1);
2798 temp2=get_reg(i_regs->regmap,FTEMP);
2799 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2800 assert(addr<0);
2801 offset=imm[i];
2802 for(hr=0;hr<HOST_REGS;hr++) {
2803 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2804 }
2805 reglist|=1<<temp;
2806 if(offset||s<0||c) addr=temp2;
2807 else addr=s;
2808 if(s>=0) {
2809 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2810 if(c) {
2811 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2812 }
57871462 2813 }
1edfcc68 2814 if(!c) {
1edfcc68 2815 emit_shlimm(addr,3,temp);
2816 if (opcode[i]==0x22||opcode[i]==0x26) {
2817 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2818 }else{
2819 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 2820 }
1edfcc68 2821 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2822 }
2823 else {
2824 if(ram_offset&&memtarget) {
2825 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2826 fastload_reg_override=HOST_TEMPREG;
57871462 2827 }
1edfcc68 2828 if (opcode[i]==0x22||opcode[i]==0x26) {
2829 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 2830 }else{
1edfcc68 2831 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 2832 }
535d208a 2833 }
2834 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2835 if(!c||memtarget) {
ffb0b9e0 2836 int a=temp2;
2837 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 2838 emit_readword_indexed(0,a,temp2);
b14b6a8f 2839 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
535d208a 2840 }
2841 else
2842 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2843 if(rt1[i]) {
2844 assert(tl>=0);
57871462 2845 emit_andimm(temp,24,temp);
2002a1db 2846#ifdef BIG_ENDIAN_MIPS
2847 if (opcode[i]==0x26) // LWR
2848#else
2849 if (opcode[i]==0x22) // LWL
2850#endif
2851 emit_xorimm(temp,24,temp);
57871462 2852 emit_movimm(-1,HOST_TEMPREG);
2853 if (opcode[i]==0x26) {
2854 emit_shr(temp2,temp,temp2);
2855 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2856 }else{
2857 emit_shl(temp2,temp,temp2);
2858 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2859 }
2860 emit_or(temp2,tl,tl);
57871462 2861 }
535d208a 2862 //emit_storereg(rt1[i],tl); // DEBUG
2863 }
2864 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
9c45ca93 2865 assert(0);
57871462 2866 }
2867}
2868#define loadlr_assemble loadlr_assemble_arm
2869
e2b5e7aa 2870static void cop0_assemble(int i,struct regstat *i_regs)
57871462 2871{
2872 if(opcode2[i]==0) // MFC0
2873 {
2874 signed char t=get_reg(i_regs->regmap,rt1[i]);
643aeae3 2875 u_int copr=(source[i]>>11)&0x1f;
57871462 2876 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 2877 if(t>=0&&rt1[i]!=0) {
643aeae3 2878 emit_readword(&reg_cop0[copr],t);
57871462 2879 }
2880 }
2881 else if(opcode2[i]==4) // MTC0
2882 {
2883 signed char s=get_reg(i_regs->regmap,rs1[i]);
2884 char copr=(source[i]>>11)&0x1f;
2885 assert(s>=0);
ad49de89 2886 wb_register(rs1[i],i_regs->regmap,i_regs->dirty);
7139f3c8 2887 if(copr==9||copr==11||copr==12||copr==13) {
643aeae3 2888 emit_readword(&last_count,HOST_TEMPREG);
57871462 2889 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 2890 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 2891 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
643aeae3 2892 emit_writeword(HOST_CCREG,&Count);
57871462 2893 }
2894 // What a mess. The status register (12) can enable interrupts,
2895 // so needs a special case to handle a pending interrupt.
2896 // The interrupt must be taken immediately, because a subsequent
2897 // instruction might disable interrupts again.
7139f3c8 2898 if(copr==12||copr==13) {
fca1aef2 2899 if (is_delayslot) {
2900 // burn cycles to cause cc_interrupt, which will
2901 // reschedule next_interupt. Relies on CCREG from above.
2902 assem_debug("MTC0 DS %d\n", copr);
643aeae3 2903 emit_writeword(HOST_CCREG,&last_count);
fca1aef2 2904 emit_movimm(0,HOST_CCREG);
2905 emit_storereg(CCREG,HOST_CCREG);
caeefe31 2906 emit_loadreg(rs1[i],1);
fca1aef2 2907 emit_movimm(copr,0);
643aeae3 2908 emit_call(pcsx_mtc0_ds);
042c7287 2909 emit_loadreg(rs1[i],s);
fca1aef2 2910 return;
2911 }
63cb0298 2912 emit_movimm(start+i*4+4,HOST_TEMPREG);
643aeae3 2913 emit_writeword(HOST_TEMPREG,&pcaddr);
63cb0298 2914 emit_movimm(0,HOST_TEMPREG);
643aeae3 2915 emit_writeword(HOST_TEMPREG,&pending_exception);
57871462 2916 }
2917 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
2918 //else
caeefe31 2919 if(s==HOST_CCREG)
2920 emit_loadreg(rs1[i],1);
2921 else if(s!=1)
63cb0298 2922 emit_mov(s,1);
fca1aef2 2923 emit_movimm(copr,0);
643aeae3 2924 emit_call(pcsx_mtc0);
7139f3c8 2925 if(copr==9||copr==11||copr==12||copr==13) {
643aeae3 2926 emit_readword(&Count,HOST_CCREG);
2927 emit_readword(&next_interupt,HOST_TEMPREG);
2573466a 2928 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 2929 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
643aeae3 2930 emit_writeword(HOST_TEMPREG,&last_count);
57871462 2931 emit_storereg(CCREG,HOST_CCREG);
2932 }
7139f3c8 2933 if(copr==12||copr==13) {
57871462 2934 assert(!is_delayslot);
643aeae3 2935 emit_readword(&pending_exception,14);
042c7287 2936 emit_test(14,14);
643aeae3 2937 emit_jne(&do_interrupt);
57871462 2938 }
2939 emit_loadreg(rs1[i],s);
2940 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
2941 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 2942 }
2943 else
2944 {
2945 assert(opcode2[i]==0x10);
00fa9369 2946 //if((source[i]&0x3f)==0x10) // RFE
576bbd8f 2947 {
643aeae3 2948 emit_readword(&Status,0);
576bbd8f 2949 emit_andimm(0,0x3c,1);
2950 emit_andimm(0,~0xf,0);
2951 emit_orrshr_imm(1,2,0);
643aeae3 2952 emit_writeword(0,&Status);
576bbd8f 2953 }
57871462 2954 }
2955}
2956
b9b61529 2957static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
2958{
2959 switch (copr) {
2960 case 1:
2961 case 3:
2962 case 5:
2963 case 8:
2964 case 9:
2965 case 10:
2966 case 11:
643aeae3 2967 emit_readword(&reg_cop2d[copr],tl);
b9b61529 2968 emit_signextend16(tl,tl);
643aeae3 2969 emit_writeword(tl,&reg_cop2d[copr]); // hmh
b9b61529 2970 break;
2971 case 7:
2972 case 16:
2973 case 17:
2974 case 18:
2975 case 19:
643aeae3 2976 emit_readword(&reg_cop2d[copr],tl);
b9b61529 2977 emit_andimm(tl,0xffff,tl);
643aeae3 2978 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 2979 break;
2980 case 15:
643aeae3 2981 emit_readword(&reg_cop2d[14],tl); // SXY2
2982 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 2983 break;
2984 case 28:
b9b61529 2985 case 29:
643aeae3 2986 emit_readword(&reg_cop2d[9],temp);
b9b61529 2987 emit_testimm(temp,0x8000); // do we need this?
2988 emit_andimm(temp,0xf80,temp);
2989 emit_andne_imm(temp,0,temp);
f70d384d 2990 emit_shrimm(temp,7,tl);
643aeae3 2991 emit_readword(&reg_cop2d[10],temp);
b9b61529 2992 emit_testimm(temp,0x8000);
2993 emit_andimm(temp,0xf80,temp);
2994 emit_andne_imm(temp,0,temp);
f70d384d 2995 emit_orrshr_imm(temp,2,tl);
643aeae3 2996 emit_readword(&reg_cop2d[11],temp);
b9b61529 2997 emit_testimm(temp,0x8000);
2998 emit_andimm(temp,0xf80,temp);
2999 emit_andne_imm(temp,0,temp);
f70d384d 3000 emit_orrshl_imm(temp,3,tl);
643aeae3 3001 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3002 break;
3003 default:
643aeae3 3004 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3005 break;
3006 }
3007}
3008
3009static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3010{
3011 switch (copr) {
3012 case 15:
643aeae3 3013 emit_readword(&reg_cop2d[13],temp); // SXY1
3014 emit_writeword(sl,&reg_cop2d[copr]);
3015 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3016 emit_readword(&reg_cop2d[14],temp); // SXY2
3017 emit_writeword(sl,&reg_cop2d[14]);
3018 emit_writeword(temp,&reg_cop2d[13]); // SXY1
b9b61529 3019 break;
3020 case 28:
3021 emit_andimm(sl,0x001f,temp);
f70d384d 3022 emit_shlimm(temp,7,temp);
643aeae3 3023 emit_writeword(temp,&reg_cop2d[9]);
b9b61529 3024 emit_andimm(sl,0x03e0,temp);
f70d384d 3025 emit_shlimm(temp,2,temp);
643aeae3 3026 emit_writeword(temp,&reg_cop2d[10]);
b9b61529 3027 emit_andimm(sl,0x7c00,temp);
f70d384d 3028 emit_shrimm(temp,3,temp);
643aeae3 3029 emit_writeword(temp,&reg_cop2d[11]);
3030 emit_writeword(sl,&reg_cop2d[28]);
b9b61529 3031 break;
3032 case 30:
3033 emit_movs(sl,temp);
3034 emit_mvnmi(temp,temp);
665f33e1 3035#ifdef HAVE_ARMV5
b9b61529 3036 emit_clz(temp,temp);
665f33e1 3037#else
3038 emit_movs(temp,HOST_TEMPREG);
3039 emit_movimm(0,temp);
3040 emit_jeq((int)out+4*4);
3041 emit_addpl_imm(temp,1,temp);
3042 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3043 emit_jns((int)out-2*4);
3044#endif
643aeae3 3045 emit_writeword(sl,&reg_cop2d[30]);
3046 emit_writeword(temp,&reg_cop2d[31]);
b9b61529 3047 break;
b9b61529 3048 case 31:
3049 break;
3050 default:
643aeae3 3051 emit_writeword(sl,&reg_cop2d[copr]);
b9b61529 3052 break;
3053 }
3054}
3055
e2b5e7aa 3056static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3057{
3058 u_int copr=(source[i]>>11)&0x1f;
3059 signed char temp=get_reg(i_regs->regmap,-1);
3060 if (opcode2[i]==0) { // MFC2
3061 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3062 if(tl>=0&&rt1[i]!=0)
b9b61529 3063 cop2_get_dreg(copr,tl,temp);
3064 }
3065 else if (opcode2[i]==4) { // MTC2
3066 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3067 cop2_put_dreg(copr,sl,temp);
3068 }
3069 else if (opcode2[i]==2) // CFC2
3070 {
3071 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3072 if(tl>=0&&rt1[i]!=0)
643aeae3 3073 emit_readword(&reg_cop2c[copr],tl);
b9b61529 3074 }
3075 else if (opcode2[i]==6) // CTC2
3076 {
3077 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3078 switch(copr) {
3079 case 4:
3080 case 12:
3081 case 20:
3082 case 26:
3083 case 27:
3084 case 29:
3085 case 30:
3086 emit_signextend16(sl,temp);
3087 break;
3088 case 31:
3089 //value = value & 0x7ffff000;
3090 //if (value & 0x7f87e000) value |= 0x80000000;
3091 emit_shrimm(sl,12,temp);
3092 emit_shlimm(temp,12,temp);
3093 emit_testimm(temp,0x7f000000);
3094 emit_testeqimm(temp,0x00870000);
3095 emit_testeqimm(temp,0x0000e000);
3096 emit_orrne_imm(temp,0x80000000,temp);
3097 break;
3098 default:
3099 temp=sl;
3100 break;
3101 }
643aeae3 3102 emit_writeword(temp,&reg_cop2c[copr]);
b9b61529 3103 assert(sl>=0);
3104 }
3105}
3106
054175e9 3107static void c2op_prologue(u_int op,u_int reglist)
3108{
3109 save_regs_all(reglist);
82ed88eb 3110#ifdef PCNT
3111 emit_movimm(op,0);
3112 emit_call((int)pcnt_gte_start);
3113#endif
054175e9 3114 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3115}
3116
3117static void c2op_epilogue(u_int op,u_int reglist)
3118{
82ed88eb 3119#ifdef PCNT
3120 emit_movimm(op,0);
3121 emit_call((int)pcnt_gte_end);
3122#endif
054175e9 3123 restore_regs_all(reglist);
3124}
3125
6c0eefaf 3126static void c2op_call_MACtoIR(int lm,int need_flags)
3127{
3128 if(need_flags)
643aeae3 3129 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 3130 else
643aeae3 3131 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 3132}
3133
3134static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3135{
643aeae3 3136 emit_call(func);
6c0eefaf 3137 // func is C code and trashes r0
3138 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3139 if(need_flags||need_ir)
3140 c2op_call_MACtoIR(lm,need_flags);
643aeae3 3141 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 3142}
3143
054175e9 3144static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3145{
b9b61529 3146 u_int c2op=source[i]&0x3f;
6c0eefaf 3147 u_int hr,reglist_full=0,reglist;
054175e9 3148 int need_flags,need_ir;
b9b61529 3149 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3150 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3151 }
4d646738 3152 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3153
3154 if (gte_handlers[c2op]!=NULL) {
bedfea38 3155 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3156 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3157 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3158 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3159 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3160 need_flags=0;
6c0eefaf 3161 int shift = (source[i] >> 19) & 1;
3162 int lm = (source[i] >> 10) & 1;
054175e9 3163 switch(c2op) {
19776aef 3164#ifndef DRC_DBG
054175e9 3165 case GTE_MVMVA: {
82336ba3 3166#ifdef HAVE_ARMV5
054175e9 3167 int v = (source[i] >> 15) & 3;
3168 int cv = (source[i] >> 13) & 3;
3169 int mx = (source[i] >> 17) & 3;
4d646738 3170 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3171 c2op_prologue(c2op,reglist);
3172 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3173 if(v<3)
3174 emit_ldrd(v*8,0,4);
3175 else {
3176 emit_movzwl_indexed(9*4,0,4); // gteIR
3177 emit_movzwl_indexed(10*4,0,6);
3178 emit_movzwl_indexed(11*4,0,5);
3179 emit_orrshl_imm(6,16,4);
3180 }
3181 if(mx<3)
3182 emit_addimm(0,32*4+mx*8*4,6);
3183 else
643aeae3 3184 emit_readword(&zeromem_ptr,6);
054175e9 3185 if(cv<3)
3186 emit_addimm(0,32*4+(cv*8+5)*4,7);
3187 else
643aeae3 3188 emit_readword(&zeromem_ptr,7);
054175e9 3189#ifdef __ARM_NEON__
3190 emit_movimm(source[i],1); // opcode
643aeae3 3191 emit_call(gteMVMVA_part_neon);
054175e9 3192 if(need_flags) {
3193 emit_movimm(lm,1);
643aeae3 3194 emit_call(gteMACtoIR_flags_neon);
054175e9 3195 }
3196#else
3197 if(cv==3&&shift)
3198 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3199 else {
3200 emit_movimm(shift,1);
3201 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3202 }
6c0eefaf 3203 if(need_flags||need_ir)
3204 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3205#endif
3206#else /* if not HAVE_ARMV5 */
3207 c2op_prologue(c2op,reglist);
3208 emit_movimm(source[i],1); // opcode
643aeae3 3209 emit_writeword(1,&psxRegs.code);
82336ba3 3210 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3211#endif
3212 break;
3213 }
6c0eefaf 3214 case GTE_OP:
3215 c2op_prologue(c2op,reglist);
643aeae3 3216 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 3217 if(need_flags||need_ir) {
3218 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3219 c2op_call_MACtoIR(lm,need_flags);
3220 }
3221 break;
3222 case GTE_DPCS:
3223 c2op_prologue(c2op,reglist);
3224 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3225 break;
3226 case GTE_INTPL:
3227 c2op_prologue(c2op,reglist);
3228 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3229 break;
3230 case GTE_SQR:
3231 c2op_prologue(c2op,reglist);
643aeae3 3232 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 3233 if(need_flags||need_ir) {
3234 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3235 c2op_call_MACtoIR(lm,need_flags);
3236 }
3237 break;
3238 case GTE_DCPL:
3239 c2op_prologue(c2op,reglist);
3240 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3241 break;
3242 case GTE_GPF:
3243 c2op_prologue(c2op,reglist);
3244 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3245 break;
3246 case GTE_GPL:
3247 c2op_prologue(c2op,reglist);
3248 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3249 break;
19776aef 3250#endif
054175e9 3251 default:
054175e9 3252 c2op_prologue(c2op,reglist);
19776aef 3253#ifdef DRC_DBG
3254 emit_movimm(source[i],1); // opcode
643aeae3 3255 emit_writeword(1,&psxRegs.code);
19776aef 3256#endif
643aeae3 3257 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 3258 break;
3259 }
3260 c2op_epilogue(c2op,reglist);
3261 }
b9b61529 3262}
3263
e2b5e7aa 3264static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3265{
3266 // XXX: should just just do the exception instead
00fa9369 3267 //if(!cop1_usable)
3268 {
b14b6a8f 3269 void *jaddr=out;
3d624f89 3270 emit_jmp(0);
b14b6a8f 3271 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3d624f89 3272 }
3273}
3274
e2b5e7aa 3275static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3276{
3d624f89 3277 cop1_unusable(i, i_regs);
57871462 3278}
3279
e2b5e7aa 3280static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3281{
3282 // case 0x18: MULT
3283 // case 0x19: MULTU
3284 // case 0x1A: DIV
3285 // case 0x1B: DIVU
3286 // case 0x1C: DMULT
3287 // case 0x1D: DMULTU
3288 // case 0x1E: DDIV
3289 // case 0x1F: DDIVU
3290 if(rs1[i]&&rs2[i])
3291 {
3292 if((opcode2[i]&4)==0) // 32-bit
3293 {
3294 if(opcode2[i]==0x18) // MULT
3295 {
3296 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3297 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3298 signed char hi=get_reg(i_regs->regmap,HIREG);
3299 signed char lo=get_reg(i_regs->regmap,LOREG);
3300 assert(m1>=0);
3301 assert(m2>=0);
3302 assert(hi>=0);
3303 assert(lo>=0);
3304 emit_smull(m1,m2,hi,lo);
3305 }
3306 if(opcode2[i]==0x19) // MULTU
3307 {
3308 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3309 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3310 signed char hi=get_reg(i_regs->regmap,HIREG);
3311 signed char lo=get_reg(i_regs->regmap,LOREG);
3312 assert(m1>=0);
3313 assert(m2>=0);
3314 assert(hi>=0);
3315 assert(lo>=0);
3316 emit_umull(m1,m2,hi,lo);
3317 }
3318 if(opcode2[i]==0x1A) // DIV
3319 {
3320 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3321 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3322 assert(d1>=0);
3323 assert(d2>=0);
3324 signed char quotient=get_reg(i_regs->regmap,LOREG);
3325 signed char remainder=get_reg(i_regs->regmap,HIREG);
3326 assert(quotient>=0);
3327 assert(remainder>=0);
3328 emit_movs(d1,remainder);
44a80f6a 3329 emit_movimm(0xffffffff,quotient);
3330 emit_negmi(quotient,quotient); // .. quotient and ..
3331 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3332 emit_movs(d2,HOST_TEMPREG);
3333 emit_jeq((int)out+52); // Division by zero
82336ba3 3334 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3335#ifdef HAVE_ARMV5
57871462 3336 emit_clz(HOST_TEMPREG,quotient);
3337 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3338#else
3339 emit_movimm(0,quotient);
3340 emit_addpl_imm(quotient,1,quotient);
3341 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3342 emit_jns((int)out-2*4);
3343#endif
57871462 3344 emit_orimm(quotient,1<<31,quotient);
3345 emit_shr(quotient,quotient,quotient);
3346 emit_cmp(remainder,HOST_TEMPREG);
3347 emit_subcs(remainder,HOST_TEMPREG,remainder);
3348 emit_adcs(quotient,quotient,quotient);
3349 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 3350 emit_jcc(out-16); // -4
57871462 3351 emit_teq(d1,d2);
3352 emit_negmi(quotient,quotient);
3353 emit_test(d1,d1);
3354 emit_negmi(remainder,remainder);
3355 }
3356 if(opcode2[i]==0x1B) // DIVU
3357 {
3358 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3359 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3360 assert(d1>=0);
3361 assert(d2>=0);
3362 signed char quotient=get_reg(i_regs->regmap,LOREG);
3363 signed char remainder=get_reg(i_regs->regmap,HIREG);
3364 assert(quotient>=0);
3365 assert(remainder>=0);
44a80f6a 3366 emit_mov(d1,remainder);
3367 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3368 emit_test(d2,d2);
44a80f6a 3369 emit_jeq((int)out+40); // Division by zero
665f33e1 3370#ifdef HAVE_ARMV5
57871462 3371 emit_clz(d2,HOST_TEMPREG);
3372 emit_movimm(1<<31,quotient);
3373 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3374#else
3375 emit_movimm(0,HOST_TEMPREG);
82336ba3 3376 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3377 emit_lslpls_imm(d2,1,d2);
665f33e1 3378 emit_jns((int)out-2*4);
3379 emit_movimm(1<<31,quotient);
3380#endif
57871462 3381 emit_shr(quotient,HOST_TEMPREG,quotient);
3382 emit_cmp(remainder,d2);
3383 emit_subcs(remainder,d2,remainder);
3384 emit_adcs(quotient,quotient,quotient);
3385 emit_shrcc_imm(d2,1,d2);
b14b6a8f 3386 emit_jcc(out-16); // -4
57871462 3387 }
3388 }
3389 else // 64-bit
71e490c5 3390 assert(0);
57871462 3391 }
3392 else
3393 {
3394 // Multiply by zero is zero.
3395 // MIPS does not have a divide by zero exception.
3396 // The result is undefined, we return zero.
3397 signed char hr=get_reg(i_regs->regmap,HIREG);
3398 signed char lr=get_reg(i_regs->regmap,LOREG);
3399 if(hr>=0) emit_zeroreg(hr);
3400 if(lr>=0) emit_zeroreg(lr);
3401 }
3402}
3403#define multdiv_assemble multdiv_assemble_arm
3404
e2b5e7aa 3405static void do_preload_rhash(int r) {
57871462 3406 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3407 // register. On ARM the hash can be done with a single instruction (below)
3408}
3409
e2b5e7aa 3410static void do_preload_rhtbl(int ht) {
57871462 3411 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3412}
3413
e2b5e7aa 3414static void do_rhash(int rs,int rh) {
57871462 3415 emit_andimm(rs,0xf8,rh);
3416}
3417
e2b5e7aa 3418static void do_miniht_load(int ht,int rh) {
57871462 3419 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3420 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3421}
3422
e2b5e7aa 3423static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3424 emit_cmp(rh,rs);
3425 emit_ldreq_indexed(ht,4,15);
3426 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3427 emit_mov(rs,7);
3428 emit_jmp(jump_vaddr_reg[7]);
3429 #else
3430 emit_jmp(jump_vaddr_reg[rs]);
3431 #endif
3432}
3433
e2b5e7aa 3434static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3435 #ifndef HAVE_ARMV7
57871462 3436 emit_movimm(return_address,rt); // PC into link register
643aeae3 3437 add_to_linker(out,return_address,1);
57871462 3438 emit_pcreladdr(temp);
643aeae3 3439 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3440 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 3441 #else
3442 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 3443 add_to_linker(out,return_address,1);
57871462 3444 emit_pcreladdr(temp);
643aeae3 3445 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 3446 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 3447 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 3448 #endif
3449}
3450
ad49de89 3451static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t u)
57871462 3452{
3453 //if(dirty_pre==dirty) return;
581335b0 3454 int hr,reg;
57871462 3455 for(hr=0;hr<HOST_REGS;hr++) {
3456 if(hr!=EXCLUDE_REG) {
3457 reg=pre[hr];
3458 if(((~u)>>(reg&63))&1) {
f776eb14 3459 if(reg>0) {
57871462 3460 if(((dirty_pre&~dirty)>>hr)&1) {
3461 if(reg>0&&reg<34) {
3462 emit_storereg(reg,hr);
57871462 3463 }
3464 else if(reg>=64) {
00fa9369 3465 assert(0);
57871462 3466 }
3467 }
3468 }
57871462 3469 }
3470 }
3471 }
3472}
3473
d148d265 3474static void mark_clear_cache(void *target)
3475{
643aeae3 3476 u_long offset = (u_char *)target - translation_cache;
d148d265 3477 u_int mask = 1u << ((offset >> 12) & 31);
3478 if (!(needs_clear_cache[offset >> 17] & mask)) {
3479 char *start = (char *)((u_long)target & ~4095ul);
3480 start_tcache_write(start, start + 4096);
3481 needs_clear_cache[offset >> 17] |= mask;
3482 }
3483}
3484
dd3a91a1 3485// Clearing the cache is rather slow on ARM Linux, so mark the areas
3486// that need to be cleared, and then only clear these areas once.
e2b5e7aa 3487static void do_clear_cache()
dd3a91a1 3488{
3489 int i,j;
3490 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
3491 {
3492 u_int bitmap=needs_clear_cache[i];
3493 if(bitmap) {
643aeae3 3494 u_char *start, *end;
9f51b4b9 3495 for(j=0;j<32;j++)
dd3a91a1 3496 {
3497 if(bitmap&(1<<j)) {
643aeae3 3498 start=translation_cache+i*131072+j*4096;
dd3a91a1 3499 end=start+4095;
3500 j++;
3501 while(j<32) {
3502 if(bitmap&(1<<j)) {
3503 end+=4096;
3504 j++;
3505 }else{
643aeae3 3506 end_tcache_write(start, end);
dd3a91a1 3507 break;
3508 }
3509 }
3510 }
3511 }
3512 needs_clear_cache[i]=0;
3513 }
3514 }
3515}
3516
57871462 3517// CPU-architecture-specific initialization
71e490c5 3518static void arch_init() {
57871462 3519}
b9b61529 3520
3521// vim:shiftwidth=2:expandtab