drc: remove yet more n64 stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
643aeae3 33u_char *translation_cache;
1e212a25 34#else
643aeae3 35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
bdeade46 36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
57871462 58extern void *dynarec_local;
57871462 59extern u_int mini_ht[32][2];
57871462 60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
b14b6a8f 77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
57871462 89 0,
b14b6a8f 90 jump_vaddr_r12,
57871462 91 0,
92 0,
b14b6a8f 93 0
94};
57871462 95
0bbd1454 96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
d148d265 127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 128
57871462 129/* Linker */
130
df4dc2b1 131static void set_jump_target(void *addr, void *target_)
57871462 132{
df4dc2b1 133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
57871462 135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 138 assert(((uintptr_t)addr&3)==0);
57871462 139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 146 assert(((uintptr_t)addr&3)==0);
57871462 147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 151 assert(((uintptr_t)addr&3)==0);
57871462 152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
e2b5e7aa 166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
e2b5e7aa 196#endif
57871462 197
198/* Literal pool */
e2b5e7aa 199static void add_literal(int addr,int val)
57871462 200{
15776b68 201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
9f51b4b9 204 literalcount++;
205}
57871462 206
d148d265 207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
57871462 210{
211 int *ptr=(int *)(stub+4);
d148d265 212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 213 u_int offset=*ptr&0xfff;
d148d265 214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
57871462 216}
217
f968d35d 218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
643aeae3 222static void *get_pointer(void *stub)
57871462 223{
224 //printf("get_pointer(%x)\n",(int)stub);
d148d265 225 int *i_ptr=find_extjump_insn(stub);
57871462 226 assert((*i_ptr&0x0f000000)==0x0a000000);
643aeae3 227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
df4dc2b1 232static void *get_clean_addr(void *addr)
57871462 233{
df4dc2b1 234 signed int *ptr = addr;
665f33e1 235 #ifndef HAVE_ARMV7
57871462 236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 245 }
df4dc2b1 246 return ptr;
57871462 247}
248
e2b5e7aa 249static int verify_dirty(u_int *ptr)
57871462 250{
665f33e1 251 #ifndef HAVE_ARMV7
16c8be17 252 u_int offset;
57871462 253 // get from literal pool
15776b68 254 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
57871462 267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
df4dc2b1 283static int isclean(void *addr)
57871462 284{
665f33e1 285 #ifndef HAVE_ARMV7
581335b0 286 u_int *ptr=((u_int *)addr)+4;
57871462 287 #else
581335b0 288 u_int *ptr=((u_int *)addr)+6;
57871462 289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
4a35de07 298// get source that block at addr was compiled from (host pointers)
01d26796 299static void get_bounds(void *addr, u_char **start, u_char **end)
57871462 300{
643aeae3 301 u_int *ptr = addr;
665f33e1 302 #ifndef HAVE_ARMV7
16c8be17 303 u_int offset;
57871462 304 // get from literal pool
15776b68 305 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
57871462 318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
01d26796 328 *start=(u_char *)source;
329 *end=(u_char *)source+len;
57871462 330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 336static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 342
57871462 343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
9f51b4b9 345
57871462 346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
9f51b4b9 351
57871462 352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 354
57871462 355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
00fa9369 363 assert(r < 64);
364 if((cur->u>>r)&1) {
57871462 365 cur->regmap[preferred_reg]=reg;
366 cur->dirty&=~(1<<preferred_reg);
367 cur->isconst&=~(1<<preferred_reg);
368 return;
369 }
9f51b4b9 370
57871462 371 // Clear any unneeded registers
372 // We try to keep the mapping consistent, if possible, because it
373 // makes branches easier (especially loops). So we try to allocate
374 // first (see above) before removing old mappings. If this is not
375 // possible then go ahead and clear out the registers that are no
376 // longer needed.
377 for(hr=0;hr<HOST_REGS;hr++)
378 {
379 r=cur->regmap[hr];
380 if(r>=0) {
00fa9369 381 assert(r < 64);
382 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
57871462 383 }
384 }
385 // Try to allocate any available register, but prefer
386 // registers that have not been used recently.
387 if(i>0) {
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
397 }
398 }
399 // Try to allocate any available register
400 for(hr=0;hr<HOST_REGS;hr++) {
401 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
402 cur->regmap[hr]=reg;
403 cur->dirty&=~(1<<hr);
404 cur->isconst&=~(1<<hr);
405 return;
406 }
407 }
9f51b4b9 408
57871462 409 // Ok, now we have to evict someone
410 // Pick a register we hopefully won't need soon
411 u_char hsn[MAXREG+1];
412 memset(hsn,10,sizeof(hsn));
413 int j;
414 lsn(hsn,i,&preferred_reg);
415 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
416 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
417 if(i>0) {
418 // Don't evict the cycle count at entry points, otherwise the entry
419 // stub will have to write it.
420 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
421 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
422 for(j=10;j>=3;j--)
423 {
424 // Alloc preferred register if available
425 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
426 for(hr=0;hr<HOST_REGS;hr++) {
427 // Evict both parts of a 64-bit register
428 if((cur->regmap[hr]&63)==r) {
429 cur->regmap[hr]=-1;
430 cur->dirty&=~(1<<hr);
431 cur->isconst&=~(1<<hr);
432 }
433 }
434 cur->regmap[preferred_reg]=reg;
435 return;
436 }
437 for(r=1;r<=MAXREG;r++)
438 {
439 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
440 for(hr=0;hr<HOST_REGS;hr++) {
441 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
442 if(cur->regmap[hr]==r+64) {
443 cur->regmap[hr]=reg;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 return;
447 }
448 }
449 }
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 }
461 }
462 }
463 }
464 for(j=10;j>=0;j--)
465 {
466 for(r=1;r<=MAXREG;r++)
467 {
468 if(hsn[r]==j) {
469 for(hr=0;hr<HOST_REGS;hr++) {
470 if(cur->regmap[hr]==r+64) {
471 cur->regmap[hr]=reg;
472 cur->dirty&=~(1<<hr);
473 cur->isconst&=~(1<<hr);
474 return;
475 }
476 }
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 }
486 }
487 }
c43b5311 488 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 489}
490
e2b5e7aa 491static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 492{
57871462 493 // allocate the lower 32 bits
494 alloc_reg(cur,i,reg);
57871462 495}
496
497// Allocate a temporary register. This is done without regard to
498// dirty status or whether the register we request is on the unneeded list
499// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 500static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 501{
502 int r,hr;
503 int preferred_reg = -1;
9f51b4b9 504
57871462 505 // see if it's already allocated
506 for(hr=0;hr<HOST_REGS;hr++)
507 {
508 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
509 }
9f51b4b9 510
57871462 511 // Try to allocate any available register
512 for(hr=HOST_REGS-1;hr>=0;hr--) {
513 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
514 cur->regmap[hr]=reg;
515 cur->dirty&=~(1<<hr);
516 cur->isconst&=~(1<<hr);
517 return;
518 }
519 }
9f51b4b9 520
57871462 521 // Find an unneeded register
522 for(hr=HOST_REGS-1;hr>=0;hr--)
523 {
524 r=cur->regmap[hr];
525 if(r>=0) {
00fa9369 526 assert(r < 64);
527 if((cur->u>>r)&1) {
528 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
529 cur->regmap[hr]=reg;
530 cur->dirty&=~(1<<hr);
531 cur->isconst&=~(1<<hr);
532 return;
57871462 533 }
534 }
535 }
536 }
9f51b4b9 537
57871462 538 // Ok, now we have to evict someone
539 // Pick a register we hopefully won't need soon
540 // TODO: we might want to follow unconditional jumps here
541 // TODO: get rid of dupe code and make this into a function
542 u_char hsn[MAXREG+1];
543 memset(hsn,10,sizeof(hsn));
544 int j;
545 lsn(hsn,i,&preferred_reg);
546 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
547 if(i>0) {
548 // Don't evict the cycle count at entry points, otherwise the entry
549 // stub will have to write it.
550 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
551 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
552 for(j=10;j>=3;j--)
553 {
554 for(r=1;r<=MAXREG;r++)
555 {
556 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
557 for(hr=0;hr<HOST_REGS;hr++) {
558 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
559 if(cur->regmap[hr]==r+64) {
560 cur->regmap[hr]=reg;
561 cur->dirty&=~(1<<hr);
562 cur->isconst&=~(1<<hr);
563 return;
564 }
565 }
566 }
567 for(hr=0;hr<HOST_REGS;hr++) {
568 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
569 if(cur->regmap[hr]==r) {
570 cur->regmap[hr]=reg;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576 }
577 }
578 }
579 }
580 }
581 for(j=10;j>=0;j--)
582 {
583 for(r=1;r<=MAXREG;r++)
584 {
585 if(hsn[r]==j) {
586 for(hr=0;hr<HOST_REGS;hr++) {
587 if(cur->regmap[hr]==r+64) {
588 cur->regmap[hr]=reg;
589 cur->dirty&=~(1<<hr);
590 cur->isconst&=~(1<<hr);
591 return;
592 }
593 }
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(cur->regmap[hr]==r) {
596 cur->regmap[hr]=reg;
597 cur->dirty&=~(1<<hr);
598 cur->isconst&=~(1<<hr);
599 return;
600 }
601 }
602 }
603 }
604 }
c43b5311 605 SysPrintf("This shouldn't happen");exit(1);
57871462 606}
e2b5e7aa 607
57871462 608// Allocate a specific ARM register.
e2b5e7aa 609static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 610{
611 int n;
f776eb14 612 int dirty=0;
9f51b4b9 613
57871462 614 // see if it's already allocated (and dealloc it)
615 for(n=0;n<HOST_REGS;n++)
616 {
f776eb14 617 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
618 dirty=(cur->dirty>>n)&1;
619 cur->regmap[n]=-1;
620 }
57871462 621 }
9f51b4b9 622
57871462 623 cur->regmap[hr]=reg;
624 cur->dirty&=~(1<<hr);
f776eb14 625 cur->dirty|=dirty<<hr;
57871462 626 cur->isconst&=~(1<<hr);
627}
628
629// Alloc cycle count into dedicated register
e2b5e7aa 630static void alloc_cc(struct regstat *cur,int i)
57871462 631{
632 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
633}
634
635/* Special alloc */
636
637
638/* Assembler */
639
e2b5e7aa 640static unused char regname[16][4] = {
57871462 641 "r0",
642 "r1",
643 "r2",
644 "r3",
645 "r4",
646 "r5",
647 "r6",
648 "r7",
649 "r8",
650 "r9",
651 "r10",
652 "fp",
653 "r12",
654 "sp",
655 "lr",
656 "pc"};
657
e2b5e7aa 658static void output_w32(u_int word)
57871462 659{
660 *((u_int *)out)=word;
661 out+=4;
662}
e2b5e7aa 663
664static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 665{
666 assert(rd<16);
667 assert(rn<16);
668 assert(rm<16);
669 return((rn<<16)|(rd<<12)|rm);
670}
e2b5e7aa 671
672static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 673{
674 assert(rd<16);
675 assert(rn<16);
676 assert(imm<256);
677 assert((shift&1)==0);
678 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
679}
e2b5e7aa 680
681static u_int genimm(u_int imm,u_int *encoded)
57871462 682{
c2e3bd42 683 *encoded=0;
684 if(imm==0) return 1;
57871462 685 int i=32;
686 while(i>0)
687 {
688 if(imm<256) {
689 *encoded=((i&30)<<7)|imm;
690 return 1;
691 }
692 imm=(imm>>2)|(imm<<30);i-=2;
693 }
694 return 0;
695}
e2b5e7aa 696
697static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 698{
699 u_int ret=genimm(imm,encoded);
700 assert(ret);
581335b0 701 (void)ret;
cfbd3c6e 702}
e2b5e7aa 703
704static u_int genjmp(u_int addr)
57871462 705{
706 int offset=addr-(int)out-8;
e80343e2 707 if(offset<-33554432||offset>=33554432) {
708 if (addr>2) {
c43b5311 709 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 710 exit(1);
711 }
712 return 0;
713 }
57871462 714 return ((u_int)offset>>2)&0xffffff;
715}
716
e2b5e7aa 717static void emit_mov(int rs,int rt)
57871462 718{
719 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
720 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
721}
722
e2b5e7aa 723static void emit_movs(int rs,int rt)
57871462 724{
725 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
726 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
727}
728
e2b5e7aa 729static void emit_add(int rs1,int rs2,int rt)
57871462 730{
731 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
732 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
733}
734
e2b5e7aa 735static void emit_adds(int rs1,int rs2,int rt)
57871462 736{
737 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
738 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
739}
740
e2b5e7aa 741static void emit_adcs(int rs1,int rs2,int rt)
57871462 742{
743 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
744 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
745}
746
e2b5e7aa 747static void emit_sbcs(int rs1,int rs2,int rt)
57871462 748{
749 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
750 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
751}
752
e2b5e7aa 753static void emit_neg(int rs, int rt)
57871462 754{
755 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
756 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
757}
758
e2b5e7aa 759static void emit_sub(int rs1,int rs2,int rt)
57871462 760{
761 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
762 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
763}
764
e2b5e7aa 765static void emit_zeroreg(int rt)
57871462 766{
767 assem_debug("mov %s,#0\n",regname[rt]);
768 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
769}
770
e2b5e7aa 771static void emit_loadlp(u_int imm,u_int rt)
790ee18e 772{
773 add_literal((int)out,imm);
774 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
775 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
776}
e2b5e7aa 777
778static void emit_movw(u_int imm,u_int rt)
790ee18e 779{
780 assert(imm<65536);
781 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
782 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
783}
e2b5e7aa 784
785static void emit_movt(u_int imm,u_int rt)
790ee18e 786{
787 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
788 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
789}
e2b5e7aa 790
791static void emit_movimm(u_int imm,u_int rt)
790ee18e 792{
793 u_int armval;
794 if(genimm(imm,&armval)) {
795 assem_debug("mov %s,#%d\n",regname[rt],imm);
796 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
797 }else if(genimm(~imm,&armval)) {
798 assem_debug("mvn %s,#%d\n",regname[rt],imm);
799 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
800 }else if(imm<65536) {
665f33e1 801 #ifndef HAVE_ARMV7
790ee18e 802 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
803 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
804 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
805 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
806 #else
807 emit_movw(imm,rt);
808 #endif
809 }else{
665f33e1 810 #ifndef HAVE_ARMV7
790ee18e 811 emit_loadlp(imm,rt);
812 #else
813 emit_movw(imm&0x0000FFFF,rt);
814 emit_movt(imm&0xFFFF0000,rt);
815 #endif
816 }
817}
e2b5e7aa 818
819static void emit_pcreladdr(u_int rt)
790ee18e 820{
821 assem_debug("add %s,pc,#?\n",regname[rt]);
822 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
823}
824
e2b5e7aa 825static void emit_loadreg(int r, int hr)
57871462 826{
3d624f89 827 if(r&64) {
c43b5311 828 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 829 assert(0);
830 return;
3d624f89 831 }
57871462 832 if((r&63)==0)
833 emit_zeroreg(hr);
834 else {
3d624f89 835 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 836 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
837 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
838 if(r==CCREG) addr=(int)&cycle_count;
839 if(r==CSREG) addr=(int)&Status;
57871462 840 if(r==INVCP) addr=(int)&invc_ptr;
841 u_int offset = addr-(u_int)&dynarec_local;
842 assert(offset<4096);
843 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
844 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
845 }
846}
e2b5e7aa 847
848static void emit_storereg(int r, int hr)
57871462 849{
3d624f89 850 if(r&64) {
c43b5311 851 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 852 assert(0);
853 return;
3d624f89 854 }
3d624f89 855 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 856 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
857 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
858 if(r==CCREG) addr=(int)&cycle_count;
57871462 859 u_int offset = addr-(u_int)&dynarec_local;
860 assert(offset<4096);
861 assem_debug("str %s,fp+%d\n",regname[hr],offset);
862 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
863}
864
e2b5e7aa 865static void emit_test(int rs, int rt)
57871462 866{
867 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
868 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
869}
870
e2b5e7aa 871static void emit_testimm(int rs,int imm)
57871462 872{
873 u_int armval;
5a05d80c 874 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 875 genimm_checked(imm,&armval);
57871462 876 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
877}
878
e2b5e7aa 879static void emit_testeqimm(int rs,int imm)
b9b61529 880{
881 u_int armval;
882 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 883 genimm_checked(imm,&armval);
b9b61529 884 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
885}
886
e2b5e7aa 887static void emit_not(int rs,int rt)
57871462 888{
889 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
890 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
891}
892
e2b5e7aa 893static void emit_mvnmi(int rs,int rt)
b9b61529 894{
895 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
896 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
897}
898
e2b5e7aa 899static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 900{
901 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
902 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
903}
904
e2b5e7aa 905static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 906{
907 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
908 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
909}
e2b5e7aa 910
911static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 912{
913 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
915}
916
e2b5e7aa 917static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 918{
919 assert(rs<16);
920 assert(rt<16);
921 assert(imm<32);
922 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
923 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
924}
925
e2b5e7aa 926static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 927{
928 assert(rs<16);
929 assert(rt<16);
930 assert(imm<32);
931 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
932 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
933}
934
e2b5e7aa 935static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 936{
937 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
939}
940
e2b5e7aa 941static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 942{
943 assert(rs<16);
944 assert(rt<16);
945 if(imm!=0) {
57871462 946 u_int armval;
947 if(genimm(imm,&armval)) {
948 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
949 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
950 }else if(genimm(-imm,&armval)) {
8a0a8423 951 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 952 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 953 #ifdef HAVE_ARMV7
954 }else if(rt!=rs&&(u_int)imm<65536) {
955 emit_movw(imm&0x0000ffff,rt);
956 emit_add(rs,rt,rt);
957 }else if(rt!=rs&&(u_int)-imm<65536) {
958 emit_movw(-imm&0x0000ffff,rt);
959 emit_sub(rs,rt,rt);
960 #endif
961 }else if((u_int)-imm<65536) {
57871462 962 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
963 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
964 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
965 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 966 }else {
967 do {
968 int shift = (ffs(imm) - 1) & ~1;
969 int imm8 = imm & (0xff << shift);
970 genimm_checked(imm8,&armval);
971 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
972 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
973 rs = rt;
974 imm &= ~imm8;
975 }
976 while (imm != 0);
57871462 977 }
978 }
979 else if(rs!=rt) emit_mov(rs,rt);
980}
981
e2b5e7aa 982static void emit_addimm_and_set_flags(int imm,int rt)
57871462 983{
984 assert(imm>-65536&&imm<65536);
985 u_int armval;
986 if(genimm(imm,&armval)) {
987 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
988 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
989 }else if(genimm(-imm,&armval)) {
990 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
991 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
992 }else if(imm<0) {
993 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
994 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
995 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
996 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
997 }else{
998 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
999 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1000 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1001 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1002 }
1003}
e2b5e7aa 1004
1005static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1006{
1007 emit_addimm(rt,imm,rt);
1008}
1009
e2b5e7aa 1010static void emit_addnop(u_int r)
57871462 1011{
1012 assert(r<16);
1013 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1014 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1015}
1016
e2b5e7aa 1017static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1018{
1019 u_int armval;
cfbd3c6e 1020 genimm_checked(imm,&armval);
57871462 1021 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1022 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1023}
1edfcc68 1024
e2b5e7aa 1025static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1026{
1027 // TODO: if(genimm(imm,&armval)) ...
1028 // else
1029 emit_movimm(imm,HOST_TEMPREG);
1030 emit_adds(HOST_TEMPREG,rsl,rtl);
1031 emit_adcimm(rsh,0,rth);
1032}
1033
e2b5e7aa 1034static void emit_andimm(int rs,int imm,int rt)
57871462 1035{
1036 u_int armval;
790ee18e 1037 if(imm==0) {
1038 emit_zeroreg(rt);
1039 }else if(genimm(imm,&armval)) {
57871462 1040 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1041 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1042 }else if(genimm(~imm,&armval)) {
1043 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1044 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1045 }else if(imm==65535) {
332a4533 1046 #ifndef HAVE_ARMV6
57871462 1047 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1048 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1049 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1050 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1051 #else
1052 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1053 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1054 #endif
1055 }else{
1056 assert(imm>0&&imm<65535);
665f33e1 1057 #ifndef HAVE_ARMV7
57871462 1058 assem_debug("mov r14,#%d\n",imm&0xFF00);
1059 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1060 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1061 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1062 #else
1063 emit_movw(imm,HOST_TEMPREG);
1064 #endif
1065 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1066 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1067 }
1068}
1069
e2b5e7aa 1070static void emit_orimm(int rs,int imm,int rt)
57871462 1071{
1072 u_int armval;
790ee18e 1073 if(imm==0) {
1074 if(rs!=rt) emit_mov(rs,rt);
1075 }else if(genimm(imm,&armval)) {
57871462 1076 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1077 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1078 }else{
1079 assert(imm>0&&imm<65536);
1080 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1081 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1082 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1083 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1084 }
1085}
1086
e2b5e7aa 1087static void emit_xorimm(int rs,int imm,int rt)
57871462 1088{
57871462 1089 u_int armval;
790ee18e 1090 if(imm==0) {
1091 if(rs!=rt) emit_mov(rs,rt);
1092 }else if(genimm(imm,&armval)) {
57871462 1093 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1094 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1095 }else{
514ed0d9 1096 assert(imm>0&&imm<65536);
57871462 1097 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1098 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1099 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1100 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1101 }
1102}
1103
e2b5e7aa 1104static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1105{
1106 assert(imm>0);
1107 assert(imm<32);
1108 //if(imm==1) ...
1109 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1110 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1111}
1112
e2b5e7aa 1113static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1114{
1115 assert(imm>0);
1116 assert(imm<32);
1117 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1118 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1119}
1120
e2b5e7aa 1121static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1122{
1123 assert(imm>0);
1124 assert(imm<32);
1125 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1126 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1127}
1128
e2b5e7aa 1129static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1130{
1131 assert(imm>0);
1132 assert(imm<32);
1133 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1134 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1135}
1136
e2b5e7aa 1137static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1138{
1139 assert(imm>0);
1140 assert(imm<32);
1141 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1142 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1143}
1144
e2b5e7aa 1145static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1146{
1147 assert(imm>0);
1148 assert(imm<32);
1149 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1151}
1152
e2b5e7aa 1153static void emit_signextend16(int rs,int rt)
b9b61529 1154{
332a4533 1155 #ifndef HAVE_ARMV6
b9b61529 1156 emit_shlimm(rs,16,rt);
1157 emit_sarimm(rt,16,rt);
1158 #else
1159 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1160 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1161 #endif
1162}
1163
e2b5e7aa 1164static void emit_signextend8(int rs,int rt)
c6c3b1b3 1165{
332a4533 1166 #ifndef HAVE_ARMV6
c6c3b1b3 1167 emit_shlimm(rs,24,rt);
1168 emit_sarimm(rt,24,rt);
1169 #else
1170 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1171 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1172 #endif
1173}
1174
e2b5e7aa 1175static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1176{
1177 assert(rs<16);
1178 assert(rt<16);
1179 assert(shift<16);
1180 //if(imm==1) ...
1181 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1182 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1183}
e2b5e7aa 1184
1185static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1186{
1187 assert(rs<16);
1188 assert(rt<16);
1189 assert(shift<16);
1190 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1191 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1192}
e2b5e7aa 1193
1194static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1195{
1196 assert(rs<16);
1197 assert(rt<16);
1198 assert(shift<16);
1199 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1200 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1201}
57871462 1202
e2b5e7aa 1203static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1204{
1205 assert(rs<16);
1206 assert(rt<16);
1207 assert(shift<16);
1208 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1209 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1210}
e2b5e7aa 1211
1212static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1213{
1214 assert(rs<16);
1215 assert(rt<16);
1216 assert(shift<16);
1217 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1218 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1219}
1220
e2b5e7aa 1221static void emit_cmpimm(int rs,int imm)
57871462 1222{
1223 u_int armval;
1224 if(genimm(imm,&armval)) {
5a05d80c 1225 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1226 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1227 }else if(genimm(-imm,&armval)) {
5a05d80c 1228 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1229 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1230 }else if(imm>0) {
1231 assert(imm<65536);
57871462 1232 emit_movimm(imm,HOST_TEMPREG);
57871462 1233 assem_debug("cmp %s,r14\n",regname[rs]);
1234 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1235 }else{
1236 assert(imm>-65536);
57871462 1237 emit_movimm(-imm,HOST_TEMPREG);
57871462 1238 assem_debug("cmn %s,r14\n",regname[rs]);
1239 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1240 }
1241}
1242
e2b5e7aa 1243static void emit_cmovne_imm(int imm,int rt)
57871462 1244{
1245 assem_debug("movne %s,#%d\n",regname[rt],imm);
1246 u_int armval;
cfbd3c6e 1247 genimm_checked(imm,&armval);
57871462 1248 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1249}
e2b5e7aa 1250
1251static void emit_cmovl_imm(int imm,int rt)
57871462 1252{
1253 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1254 u_int armval;
cfbd3c6e 1255 genimm_checked(imm,&armval);
57871462 1256 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1257}
e2b5e7aa 1258
1259static void emit_cmovb_imm(int imm,int rt)
57871462 1260{
1261 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1262 u_int armval;
cfbd3c6e 1263 genimm_checked(imm,&armval);
57871462 1264 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1265}
e2b5e7aa 1266
1267static void emit_cmovs_imm(int imm,int rt)
57871462 1268{
1269 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1270 u_int armval;
cfbd3c6e 1271 genimm_checked(imm,&armval);
57871462 1272 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1273}
e2b5e7aa 1274
e2b5e7aa 1275static void emit_cmovne_reg(int rs,int rt)
57871462 1276{
1277 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1278 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1279}
e2b5e7aa 1280
1281static void emit_cmovl_reg(int rs,int rt)
57871462 1282{
1283 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1284 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1285}
e2b5e7aa 1286
1287static void emit_cmovs_reg(int rs,int rt)
57871462 1288{
1289 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1290 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1291}
1292
e2b5e7aa 1293static void emit_slti32(int rs,int imm,int rt)
57871462 1294{
1295 if(rs!=rt) emit_zeroreg(rt);
1296 emit_cmpimm(rs,imm);
1297 if(rs==rt) emit_movimm(0,rt);
1298 emit_cmovl_imm(1,rt);
1299}
e2b5e7aa 1300
1301static void emit_sltiu32(int rs,int imm,int rt)
57871462 1302{
1303 if(rs!=rt) emit_zeroreg(rt);
1304 emit_cmpimm(rs,imm);
1305 if(rs==rt) emit_movimm(0,rt);
1306 emit_cmovb_imm(1,rt);
1307}
e2b5e7aa 1308
1309static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1310{
1311 assert(rsh!=rt);
1312 emit_slti32(rsl,imm,rt);
1313 if(imm>=0)
1314 {
1315 emit_test(rsh,rsh);
1316 emit_cmovne_imm(0,rt);
1317 emit_cmovs_imm(1,rt);
1318 }
1319 else
1320 {
1321 emit_cmpimm(rsh,-1);
1322 emit_cmovne_imm(0,rt);
1323 emit_cmovl_imm(1,rt);
1324 }
1325}
e2b5e7aa 1326
1327static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1328{
1329 assert(rsh!=rt);
1330 emit_sltiu32(rsl,imm,rt);
1331 if(imm>=0)
1332 {
1333 emit_test(rsh,rsh);
1334 emit_cmovne_imm(0,rt);
1335 }
1336 else
1337 {
1338 emit_cmpimm(rsh,-1);
1339 emit_cmovne_imm(1,rt);
1340 }
1341}
1342
e2b5e7aa 1343static void emit_cmp(int rs,int rt)
57871462 1344{
1345 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1346 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1347}
e2b5e7aa 1348
1349static void emit_set_gz32(int rs, int rt)
57871462 1350{
1351 //assem_debug("set_gz32\n");
1352 emit_cmpimm(rs,1);
1353 emit_movimm(1,rt);
1354 emit_cmovl_imm(0,rt);
1355}
e2b5e7aa 1356
1357static void emit_set_nz32(int rs, int rt)
57871462 1358{
1359 //assem_debug("set_nz32\n");
1360 if(rs!=rt) emit_movs(rs,rt);
1361 else emit_test(rs,rs);
1362 emit_cmovne_imm(1,rt);
1363}
e2b5e7aa 1364
1365static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1366{
1367 //assem_debug("set_gz64\n");
1368 emit_set_gz32(rsl,rt);
1369 emit_test(rsh,rsh);
1370 emit_cmovne_imm(1,rt);
1371 emit_cmovs_imm(0,rt);
1372}
e2b5e7aa 1373
1374static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1375{
1376 //assem_debug("set_nz64\n");
1377 emit_or_and_set_flags(rsh,rsl,rt);
1378 emit_cmovne_imm(1,rt);
1379}
e2b5e7aa 1380
1381static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1382{
1383 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1384 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1385 emit_cmp(rs1,rs2);
1386 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1387 emit_cmovl_imm(1,rt);
1388}
e2b5e7aa 1389
1390static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1391{
1392 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1393 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1394 emit_cmp(rs1,rs2);
1395 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1396 emit_cmovb_imm(1,rt);
1397}
e2b5e7aa 1398
1399static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1400{
1401 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1402 assert(u1!=rt);
1403 assert(u2!=rt);
1404 emit_cmp(l1,l2);
1405 emit_movimm(0,rt);
1406 emit_sbcs(u1,u2,HOST_TEMPREG);
1407 emit_cmovl_imm(1,rt);
1408}
e2b5e7aa 1409
1410static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1411{
1412 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1413 assert(u1!=rt);
1414 assert(u2!=rt);
1415 emit_cmp(l1,l2);
1416 emit_movimm(0,rt);
1417 emit_sbcs(u1,u2,HOST_TEMPREG);
1418 emit_cmovb_imm(1,rt);
1419}
1420
dd114d7d 1421#ifdef DRC_DBG
1422extern void gen_interupt();
1423extern void do_insn_cmp();
1424#define FUNCNAME(f) { (intptr_t)f, " " #f }
1425static const struct {
1426 intptr_t addr;
1427 const char *name;
1428} function_names[] = {
1429 FUNCNAME(cc_interrupt),
1430 FUNCNAME(gen_interupt),
1431 FUNCNAME(get_addr_ht),
1432 FUNCNAME(get_addr),
1433 FUNCNAME(jump_handler_read8),
1434 FUNCNAME(jump_handler_read16),
1435 FUNCNAME(jump_handler_read32),
1436 FUNCNAME(jump_handler_write8),
1437 FUNCNAME(jump_handler_write16),
1438 FUNCNAME(jump_handler_write32),
1439 FUNCNAME(invalidate_addr),
1440 FUNCNAME(verify_code_vm),
1441 FUNCNAME(verify_code),
1442 FUNCNAME(jump_hlecall),
1443 FUNCNAME(jump_syscall_hle),
1444 FUNCNAME(new_dyna_leave),
1445 FUNCNAME(pcsx_mtc0),
1446 FUNCNAME(pcsx_mtc0_ds),
1447 FUNCNAME(do_insn_cmp),
1448};
1449
1450static const char *func_name(intptr_t a)
1451{
1452 int i;
1453 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1454 if (function_names[i].addr == a)
1455 return function_names[i].name;
1456 return "";
1457}
1458#else
1459#define func_name(x) ""
1460#endif
1461
643aeae3 1462static void emit_call(const void *a_)
57871462 1463{
643aeae3 1464 int a = (int)a_;
dd114d7d 1465 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1466 u_int offset=genjmp(a);
1467 output_w32(0xeb000000|offset);
1468}
e2b5e7aa 1469
b14b6a8f 1470static void emit_jmp(const void *a_)
57871462 1471{
b14b6a8f 1472 int a = (int)a_;
dd114d7d 1473 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1474 u_int offset=genjmp(a);
1475 output_w32(0xea000000|offset);
1476}
e2b5e7aa 1477
643aeae3 1478static void emit_jne(const void *a_)
57871462 1479{
643aeae3 1480 int a = (int)a_;
57871462 1481 assem_debug("bne %x\n",a);
1482 u_int offset=genjmp(a);
1483 output_w32(0x1a000000|offset);
1484}
e2b5e7aa 1485
1486static void emit_jeq(int a)
57871462 1487{
1488 assem_debug("beq %x\n",a);
1489 u_int offset=genjmp(a);
1490 output_w32(0x0a000000|offset);
1491}
e2b5e7aa 1492
1493static void emit_js(int a)
57871462 1494{
1495 assem_debug("bmi %x\n",a);
1496 u_int offset=genjmp(a);
1497 output_w32(0x4a000000|offset);
1498}
e2b5e7aa 1499
1500static void emit_jns(int a)
57871462 1501{
1502 assem_debug("bpl %x\n",a);
1503 u_int offset=genjmp(a);
1504 output_w32(0x5a000000|offset);
1505}
e2b5e7aa 1506
1507static void emit_jl(int a)
57871462 1508{
1509 assem_debug("blt %x\n",a);
1510 u_int offset=genjmp(a);
1511 output_w32(0xba000000|offset);
1512}
e2b5e7aa 1513
1514static void emit_jge(int a)
57871462 1515{
1516 assem_debug("bge %x\n",a);
1517 u_int offset=genjmp(a);
1518 output_w32(0xaa000000|offset);
1519}
e2b5e7aa 1520
1521static void emit_jno(int a)
57871462 1522{
1523 assem_debug("bvc %x\n",a);
1524 u_int offset=genjmp(a);
1525 output_w32(0x7a000000|offset);
1526}
e2b5e7aa 1527
1528static void emit_jc(int a)
57871462 1529{
1530 assem_debug("bcs %x\n",a);
1531 u_int offset=genjmp(a);
1532 output_w32(0x2a000000|offset);
1533}
e2b5e7aa 1534
b14b6a8f 1535static void emit_jcc(void *a_)
57871462 1536{
b14b6a8f 1537 int a = (int)a_;
57871462 1538 assem_debug("bcc %x\n",a);
1539 u_int offset=genjmp(a);
1540 output_w32(0x3a000000|offset);
1541}
1542
e2b5e7aa 1543static void emit_callreg(u_int r)
57871462 1544{
c6c3b1b3 1545 assert(r<15);
1546 assem_debug("blx %s\n",regname[r]);
1547 output_w32(0xe12fff30|r);
57871462 1548}
e2b5e7aa 1549
1550static void emit_jmpreg(u_int r)
57871462 1551{
1552 assem_debug("mov pc,%s\n",regname[r]);
1553 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1554}
1555
e2b5e7aa 1556static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1557{
1558 assert(offset>-4096&&offset<4096);
1559 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1560 if(offset>=0) {
1561 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1562 }else{
1563 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1564 }
1565}
e2b5e7aa 1566
1567static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1568{
1569 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1570 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1571}
e2b5e7aa 1572
1573static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1574{
1575 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1576 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1577}
e2b5e7aa 1578
1579static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1580{
1581 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1582 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1583}
e2b5e7aa 1584
1585static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1586{
1587 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1588 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1589}
e2b5e7aa 1590
1591static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1592{
1593 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1594 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1595}
e2b5e7aa 1596
1597static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1598{
1599 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1600 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1601}
e2b5e7aa 1602
e2b5e7aa 1603static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1604{
1605 assert(offset>-256&&offset<256);
1606 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1607 if(offset>=0) {
1608 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1609 }else{
1610 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1611 }
1612}
e2b5e7aa 1613
e2b5e7aa 1614static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1615{
1616 assert(offset>-256&&offset<256);
1617 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1618 if(offset>=0) {
1619 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1620 }else{
1621 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1622 }
1623}
e2b5e7aa 1624
1625static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1626{
1627 assert(offset>-4096&&offset<4096);
1628 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1629 if(offset>=0) {
1630 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1631 }else{
1632 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1633 }
1634}
e2b5e7aa 1635
e2b5e7aa 1636static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1637{
1638 assert(offset>-256&&offset<256);
1639 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1640 if(offset>=0) {
1641 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1642 }else{
1643 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1644 }
1645}
e2b5e7aa 1646
054175e9 1647static void emit_ldrd(int offset, int rs, int rt)
1648{
1649 assert(offset>-256&&offset<256);
1650 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1651 if(offset>=0) {
1652 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1653 }else{
1654 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1655 }
1656}
e2b5e7aa 1657
643aeae3 1658static void emit_readword(void *addr, int rt)
57871462 1659{
643aeae3 1660 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1661 assert(offset<4096);
1662 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1663 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1664}
e2b5e7aa 1665
e2b5e7aa 1666static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1667{
1668 assert(offset>-4096&&offset<4096);
1669 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1670 if(offset>=0) {
1671 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1672 }else{
1673 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1674 }
1675}
e2b5e7aa 1676
e2b5e7aa 1677static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1678{
1679 assert(offset>-256&&offset<256);
1680 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1681 if(offset>=0) {
1682 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1683 }else{
1684 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1685 }
1686}
e2b5e7aa 1687
1688static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1689{
1690 assert(offset>-4096&&offset<4096);
1691 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1692 if(offset>=0) {
1693 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1694 }else{
1695 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1696 }
1697}
e2b5e7aa 1698
e2b5e7aa 1699static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1700{
1701 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1702 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1703}
e2b5e7aa 1704
1705static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1706{
1707 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1708 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1709}
e2b5e7aa 1710
1711static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1712{
1713 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1714 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1715}
e2b5e7aa 1716
643aeae3 1717static void emit_writeword(int rt, void *addr)
57871462 1718{
643aeae3 1719 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1720 assert(offset<4096);
1721 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1722 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1723}
e2b5e7aa 1724
e2b5e7aa 1725static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1726{
1727 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1728 assert(rs1<16);
1729 assert(rs2<16);
1730 assert(hi<16);
1731 assert(lo<16);
1732 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1733}
e2b5e7aa 1734
1735static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1736{
1737 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1738 assert(rs1<16);
1739 assert(rs2<16);
1740 assert(hi<16);
1741 assert(lo<16);
1742 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1743}
1744
e2b5e7aa 1745static void emit_clz(int rs,int rt)
57871462 1746{
1747 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1748 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1749}
1750
e2b5e7aa 1751static void emit_subcs(int rs1,int rs2,int rt)
57871462 1752{
1753 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1754 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1755}
1756
e2b5e7aa 1757static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1758{
1759 assert(imm>0);
1760 assert(imm<32);
1761 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1762 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1763}
1764
e2b5e7aa 1765static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1766{
1767 assert(imm>0);
1768 assert(imm<32);
1769 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1770 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1771}
1772
e2b5e7aa 1773static void emit_negmi(int rs, int rt)
57871462 1774{
1775 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1776 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1777}
1778
e2b5e7aa 1779static void emit_negsmi(int rs, int rt)
57871462 1780{
1781 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1782 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1783}
1784
e2b5e7aa 1785static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1786{
1787 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1788 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
1789}
1790
e2b5e7aa 1791static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1792{
1793 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1794 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
1795}
1796
e2b5e7aa 1797static void emit_teq(int rs, int rt)
57871462 1798{
1799 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
1800 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
1801}
1802
e2b5e7aa 1803static void emit_rsbimm(int rs, int imm, int rt)
57871462 1804{
1805 u_int armval;
cfbd3c6e 1806 genimm_checked(imm,&armval);
57871462 1807 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
1808 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
1809}
1810
1811// Load 2 immediates optimizing for small code size
e2b5e7aa 1812static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 1813{
1814 emit_movimm(imm1,rt1);
1815 u_int armval;
1816 if(genimm(imm2-imm1,&armval)) {
1817 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
1818 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
1819 }else if(genimm(imm1-imm2,&armval)) {
1820 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
1821 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
1822 }
1823 else emit_movimm(imm2,rt2);
1824}
1825
1826// Conditionally select one of two immediates, optimizing for small code size
1827// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 1828static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 1829{
1830 u_int armval;
1831 if(genimm(imm2-imm1,&armval)) {
1832 emit_movimm(imm1,rt);
1833 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
1834 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
1835 }else if(genimm(imm1-imm2,&armval)) {
1836 emit_movimm(imm1,rt);
1837 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
1838 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
1839 }
1840 else {
665f33e1 1841 #ifndef HAVE_ARMV7
57871462 1842 emit_movimm(imm1,rt);
1843 add_literal((int)out,imm2);
1844 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
1845 output_w32(0x15900000|rd_rn_rm(rt,15,0));
1846 #else
1847 emit_movw(imm1&0x0000FFFF,rt);
1848 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
1849 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
1850 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
1851 }
1852 emit_movt(imm1&0xFFFF0000,rt);
1853 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
1854 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
1855 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
1856 }
1857 #endif
1858 }
1859}
1860
57871462 1861// special case for checking invalid_code
e2b5e7aa 1862static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 1863{
1864 assert(imm<128&&imm>=0);
1865 assert(r>=0&&r<16);
1866 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
1867 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
1868 emit_cmpimm(HOST_TEMPREG,imm);
1869}
1870
e2b5e7aa 1871static void emit_callne(int a)
0bbd1454 1872{
1873 assem_debug("blne %x\n",a);
1874 u_int offset=genjmp(a);
1875 output_w32(0x1b000000|offset);
1876}
1877
57871462 1878// Used to preload hash table entries
e2b5e7aa 1879static unused void emit_prefetchreg(int r)
57871462 1880{
1881 assem_debug("pld %s\n",regname[r]);
1882 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
1883}
1884
1885// Special case for mini_ht
e2b5e7aa 1886static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 1887{
1888 assert(offset<4096);
1889 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
1890 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
1891}
1892
e2b5e7aa 1893static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 1894{
1895 u_int armval;
cfbd3c6e 1896 genimm_checked(imm,&armval);
b9b61529 1897 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1898 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
1899}
1900
e2b5e7aa 1901static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 1902{
1903 u_int armval;
cfbd3c6e 1904 genimm_checked(imm,&armval);
b9b61529 1905 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1906 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
1907}
1908
e2b5e7aa 1909static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 1910{
1911 u_int armval;
1912 genimm_checked(imm,&armval);
1913 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1914 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
1915}
1916
e2b5e7aa 1917static void emit_jno_unlikely(int a)
57871462 1918{
1919 //emit_jno(a);
1920 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
1921 output_w32(0x72800000|rd_rn_rm(15,15,0));
1922}
1923
054175e9 1924static void save_regs_all(u_int reglist)
57871462 1925{
054175e9 1926 int i;
57871462 1927 if(!reglist) return;
1928 assem_debug("stmia fp,{");
054175e9 1929 for(i=0;i<16;i++)
1930 if(reglist&(1<<i))
1931 assem_debug("r%d,",i);
57871462 1932 assem_debug("}\n");
1933 output_w32(0xe88b0000|reglist);
1934}
e2b5e7aa 1935
054175e9 1936static void restore_regs_all(u_int reglist)
57871462 1937{
054175e9 1938 int i;
57871462 1939 if(!reglist) return;
1940 assem_debug("ldmia fp,{");
054175e9 1941 for(i=0;i<16;i++)
1942 if(reglist&(1<<i))
1943 assem_debug("r%d,",i);
57871462 1944 assem_debug("}\n");
1945 output_w32(0xe89b0000|reglist);
1946}
e2b5e7aa 1947
054175e9 1948// Save registers before function call
1949static void save_regs(u_int reglist)
1950{
4d646738 1951 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 1952 save_regs_all(reglist);
1953}
e2b5e7aa 1954
054175e9 1955// Restore registers after function call
1956static void restore_regs(u_int reglist)
1957{
4d646738 1958 reglist&=CALLER_SAVE_REGS;
054175e9 1959 restore_regs_all(reglist);
1960}
57871462 1961
57871462 1962/* Stubs/epilogue */
1963
e2b5e7aa 1964static void literal_pool(int n)
57871462 1965{
1966 if(!literalcount) return;
1967 if(n) {
1968 if((int)out-literals[0][0]<4096-n) return;
1969 }
1970 u_int *ptr;
1971 int i;
1972 for(i=0;i<literalcount;i++)
1973 {
77750690 1974 u_int l_addr=(u_int)out;
1975 int j;
1976 for(j=0;j<i;j++) {
1977 if(literals[j][1]==literals[i][1]) {
1978 //printf("dup %08x\n",literals[i][1]);
1979 l_addr=literals[j][0];
1980 break;
1981 }
1982 }
57871462 1983 ptr=(u_int *)literals[i][0];
77750690 1984 u_int offset=l_addr-(u_int)ptr-8;
57871462 1985 assert(offset<4096);
1986 assert(!(offset&3));
1987 *ptr|=offset;
77750690 1988 if(l_addr==(u_int)out) {
1989 literals[i][0]=l_addr; // remember for dupes
1990 output_w32(literals[i][1]);
1991 }
57871462 1992 }
1993 literalcount=0;
1994}
1995
e2b5e7aa 1996static void literal_pool_jumpover(int n)
57871462 1997{
1998 if(!literalcount) return;
1999 if(n) {
2000 if((int)out-literals[0][0]<4096-n) return;
2001 }
df4dc2b1 2002 void *jaddr = out;
57871462 2003 emit_jmp(0);
2004 literal_pool(0);
df4dc2b1 2005 set_jump_target(jaddr, out);
57871462 2006}
2007
643aeae3 2008static void emit_extjump2(u_char *addr, int target, void *linker)
57871462 2009{
2010 u_char *ptr=(u_char *)addr;
2011 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2012 (void)ptr;
2013
57871462 2014 emit_loadlp(target,0);
643aeae3 2015 emit_loadlp((u_int)addr,1);
2016 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
57871462 2017 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2018//DEBUG >
2019#ifdef DEBUG_CYCLE_COUNT
643aeae3 2020 emit_readword(&last_count,ECX);
57871462 2021 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 2022 emit_readword(&next_interupt,ECX);
2023 emit_writeword(HOST_CCREG,&Count);
57871462 2024 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 2025 emit_writeword(ECX,&last_count);
57871462 2026#endif
2027//DEBUG <
2028 emit_jmp(linker);
2029}
2030
643aeae3 2031static void emit_extjump(void *addr, int target)
57871462 2032{
b14b6a8f 2033 emit_extjump2(addr, target, dyna_linker);
57871462 2034}
e2b5e7aa 2035
643aeae3 2036static void emit_extjump_ds(void *addr, int target)
57871462 2037{
b14b6a8f 2038 emit_extjump2(addr, target, dyna_linker_ds);
57871462 2039}
2040
13e35c04 2041// put rt_val into rt, potentially making use of rs with value rs_val
2042static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2043{
8575a877 2044 u_int armval;
2045 int diff;
2046 if(genimm(rt_val,&armval)) {
2047 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2048 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2049 return;
2050 }
2051 if(genimm(~rt_val,&armval)) {
2052 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2053 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2054 return;
2055 }
2056 diff=rt_val-rs_val;
2057 if(genimm(diff,&armval)) {
2058 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2059 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2060 return;
2061 }else if(genimm(-diff,&armval)) {
2062 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2063 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2064 return;
2065 }
2066 emit_movimm(rt_val,rt);
2067}
2068
2069// return 1 if above function can do it's job cheaply
2070static int is_similar_value(u_int v1,u_int v2)
2071{
13e35c04 2072 u_int xs;
8575a877 2073 int diff;
2074 if(v1==v2) return 1;
2075 diff=v2-v1;
2076 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2077 ;
8575a877 2078 if(xs<0x100) return 1;
2079 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2080 ;
2081 if(xs<0x100) return 1;
2082 return 0;
13e35c04 2083}
cbbab9cd 2084
b96d3df7 2085// trashes r2
2086static void pass_args(int a0, int a1)
2087{
2088 if(a0==1&&a1==0) {
2089 // must swap
2090 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2091 }
2092 else if(a0!=0&&a1==0) {
2093 emit_mov(a1,1);
2094 if (a0>=0) emit_mov(a0,0);
2095 }
2096 else {
2097 if(a0>=0&&a0!=0) emit_mov(a0,0);
2098 if(a1>=0&&a1!=1) emit_mov(a1,1);
2099 }
2100}
2101
b14b6a8f 2102static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 2103{
2104 switch(type) {
2105 case LOADB_STUB: emit_signextend8(rs,rt); break;
2106 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2107 case LOADH_STUB: emit_signextend16(rs,rt); break;
2108 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2109 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2110 default: assert(0);
2111 }
2112}
2113
b1be1eee 2114#include "pcsxmem.h"
2115#include "pcsxmem_inline.c"
b1be1eee 2116
e2b5e7aa 2117static void do_readstub(int n)
57871462 2118{
b14b6a8f 2119 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 2120 literal_pool(256);
b14b6a8f 2121 set_jump_target(stubs[n].addr, out);
2122 enum stub_type type=stubs[n].type;
2123 int i=stubs[n].a;
2124 int rs=stubs[n].b;
2125 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2126 u_int reglist=stubs[n].e;
57871462 2127 signed char *i_regmap=i_regs->regmap;
581335b0 2128 int rt;
b9b61529 2129 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2130 rt=get_reg(i_regmap,FTEMP);
2131 }else{
57871462 2132 rt=get_reg(i_regmap,rt1[i]);
2133 }
2134 assert(rs>=0);
df4dc2b1 2135 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2136 void *restore_jump = NULL;
c6c3b1b3 2137 reglist|=(1<<rs);
2138 for(r=0;r<=12;r++) {
2139 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2140 temp=r; break;
2141 }
2142 }
db829eeb 2143 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2144 reglist&=~(1<<rt);
2145 if(temp==-1) {
2146 save_regs(reglist);
2147 regs_saved=1;
2148 temp=(rs==0)?2:0;
2149 }
2150 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2151 temp2=1;
643aeae3 2152 emit_readword(&mem_rtab,temp);
c6c3b1b3 2153 emit_shrimm(rs,12,temp2);
2154 emit_readword_dualindexedx4(temp,temp2,temp2);
2155 emit_lsls_imm(temp2,1,temp2);
2156 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2157 switch(type) {
2158 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2159 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2160 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2161 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2162 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 2163 default: assert(0);
c6c3b1b3 2164 }
2165 }
2166 if(regs_saved) {
df4dc2b1 2167 restore_jump=out;
c6c3b1b3 2168 emit_jcc(0); // jump to reg restore
2169 }
2170 else
b14b6a8f 2171 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 2172
2173 if(!regs_saved)
2174 save_regs(reglist);
643aeae3 2175 void *handler=NULL;
c6c3b1b3 2176 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 2177 handler=jump_handler_read8;
c6c3b1b3 2178 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 2179 handler=jump_handler_read16;
c6c3b1b3 2180 if(type==LOADW_STUB)
643aeae3 2181 handler=jump_handler_read32;
2182 assert(handler);
b96d3df7 2183 pass_args(rs,temp2);
c6c3b1b3 2184 int cc=get_reg(i_regmap,CCREG);
2185 if(cc<0)
2186 emit_loadreg(CCREG,2);
b14b6a8f 2187 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
c6c3b1b3 2188 emit_call(handler);
2189 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2190 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2191 }
2192 if(restore_jump)
df4dc2b1 2193 set_jump_target(restore_jump, out);
c6c3b1b3 2194 restore_regs(reglist);
b14b6a8f 2195 emit_jmp(stubs[n].retaddr); // return address
57871462 2196}
2197
c6c3b1b3 2198// return memhandler, or get directly accessable address and return 0
643aeae3 2199static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
c6c3b1b3 2200{
2201 u_int l1,l2=0;
2202 l1=((u_int *)table)[addr>>12];
2203 if((l1&(1<<31))==0) {
2204 u_int v=l1<<1;
2205 *addr_host=v+addr;
643aeae3 2206 return NULL;
c6c3b1b3 2207 }
2208 else {
2209 l1<<=1;
2210 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2211 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2212 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2213 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2214 else
2215 l2=((u_int *)l1)[(addr&0xfff)/4];
2216 if((l2&(1<<31))==0) {
2217 u_int v=l2<<1;
2218 *addr_host=v+(addr&0xfff);
643aeae3 2219 return NULL;
c6c3b1b3 2220 }
643aeae3 2221 return (void *)(l2<<1);
c6c3b1b3 2222 }
2223}
c6c3b1b3 2224
b14b6a8f 2225static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2226{
2227 int rs=get_reg(regmap,target);
57871462 2228 int rt=get_reg(regmap,target);
535d208a 2229 if(rs<0) rs=get_reg(regmap,-1);
57871462 2230 assert(rs>=0);
643aeae3 2231 u_int host_addr=0,is_dynamic,far_call=0;
2232 void *handler;
b1be1eee 2233 int cc=get_reg(regmap,CCREG);
2234 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2235 return;
643aeae3 2236 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2237 if (handler == NULL) {
db829eeb 2238 if(rt<0||rt1[i]==0)
c6c3b1b3 2239 return;
13e35c04 2240 if(addr!=host_addr)
2241 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2242 switch(type) {
2243 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2244 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2245 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2246 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2247 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2248 default: assert(0);
2249 }
2250 return;
2251 }
b1be1eee 2252 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2253 if(is_dynamic) {
2254 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 2255 handler=jump_handler_read8;
b1be1eee 2256 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 2257 handler=jump_handler_read16;
b1be1eee 2258 if(type==LOADW_STUB)
643aeae3 2259 handler=jump_handler_read32;
b1be1eee 2260 }
c6c3b1b3 2261
2262 // call a memhandler
db829eeb 2263 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2264 reglist&=~(1<<rt);
2265 save_regs(reglist);
2266 if(target==0)
2267 emit_movimm(addr,0);
2268 else if(rs!=0)
2269 emit_mov(rs,0);
643aeae3 2270 int offset=(u_char *)handler-out-8;
c6c3b1b3 2271 if(offset<-33554432||offset>=33554432) {
2272 // unreachable memhandler, a plugin func perhaps
643aeae3 2273 emit_movimm((u_int)handler,12);
b1be1eee 2274 far_call=1;
2275 }
2276 if(cc<0)
2277 emit_loadreg(CCREG,2);
2278 if(is_dynamic) {
2279 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2280 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2281 }
b1be1eee 2282 else {
643aeae3 2283 emit_readword(&last_count,3);
b1be1eee 2284 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2285 emit_add(2,3,2);
643aeae3 2286 emit_writeword(2,&Count);
b1be1eee 2287 }
2288
2289 if(far_call)
2290 emit_callreg(12);
c6c3b1b3 2291 else
2292 emit_call(handler);
b1be1eee 2293
db829eeb 2294 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2295 switch(type) {
2296 case LOADB_STUB: emit_signextend8(0,rt); break;
2297 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2298 case LOADH_STUB: emit_signextend16(0,rt); break;
2299 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2300 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2301 default: assert(0);
2302 }
2303 }
2304 restore_regs(reglist);
57871462 2305}
2306
e2b5e7aa 2307static void do_writestub(int n)
57871462 2308{
b14b6a8f 2309 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 2310 literal_pool(256);
b14b6a8f 2311 set_jump_target(stubs[n].addr, out);
2312 enum stub_type type=stubs[n].type;
2313 int i=stubs[n].a;
2314 int rs=stubs[n].b;
2315 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2316 u_int reglist=stubs[n].e;
57871462 2317 signed char *i_regmap=i_regs->regmap;
581335b0 2318 int rt,r;
b9b61529 2319 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2320 rt=get_reg(i_regmap,r=FTEMP);
2321 }else{
57871462 2322 rt=get_reg(i_regmap,r=rs2[i]);
2323 }
2324 assert(rs>=0);
2325 assert(rt>=0);
b14b6a8f 2326 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 2327 void *restore_jump = NULL;
b96d3df7 2328 int reglist2=reglist|(1<<rs)|(1<<rt);
2329 for(rtmp=0;rtmp<=12;rtmp++) {
2330 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2331 temp=rtmp; break;
2332 }
2333 }
2334 if(temp==-1) {
2335 save_regs(reglist);
2336 regs_saved=1;
2337 for(rtmp=0;rtmp<=3;rtmp++)
2338 if(rtmp!=rs&&rtmp!=rt)
2339 {temp=rtmp;break;}
2340 }
2341 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2342 temp2=3;
643aeae3 2343 emit_readword(&mem_wtab,temp);
b96d3df7 2344 emit_shrimm(rs,12,temp2);
2345 emit_readword_dualindexedx4(temp,temp2,temp2);
2346 emit_lsls_imm(temp2,1,temp2);
2347 switch(type) {
2348 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2349 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2350 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2351 default: assert(0);
2352 }
2353 if(regs_saved) {
df4dc2b1 2354 restore_jump=out;
b96d3df7 2355 emit_jcc(0); // jump to reg restore
2356 }
2357 else
b14b6a8f 2358 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 2359
2360 if(!regs_saved)
2361 save_regs(reglist);
643aeae3 2362 void *handler=NULL;
b96d3df7 2363 switch(type) {
643aeae3 2364 case STOREB_STUB: handler=jump_handler_write8; break;
2365 case STOREH_STUB: handler=jump_handler_write16; break;
2366 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 2367 default: assert(0);
b96d3df7 2368 }
643aeae3 2369 assert(handler);
b96d3df7 2370 pass_args(rs,rt);
2371 if(temp2!=3)
2372 emit_mov(temp2,3);
2373 int cc=get_reg(i_regmap,CCREG);
2374 if(cc<0)
2375 emit_loadreg(CCREG,2);
b14b6a8f 2376 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
b96d3df7 2377 // returns new cycle_count
2378 emit_call(handler);
b14b6a8f 2379 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2380 if(cc<0)
2381 emit_storereg(CCREG,2);
2382 if(restore_jump)
df4dc2b1 2383 set_jump_target(restore_jump, out);
b96d3df7 2384 restore_regs(reglist);
b14b6a8f 2385 emit_jmp(stubs[n].retaddr);
57871462 2386}
2387
b14b6a8f 2388static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2389{
2390 int rs=get_reg(regmap,-1);
57871462 2391 int rt=get_reg(regmap,target);
2392 assert(rs>=0);
2393 assert(rt>=0);
643aeae3 2394 u_int host_addr=0;
2395 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2396 if (handler == NULL) {
13e35c04 2397 if(addr!=host_addr)
2398 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2399 switch(type) {
2400 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2401 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2402 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2403 default: assert(0);
2404 }
2405 return;
2406 }
2407
2408 // call a memhandler
2409 save_regs(reglist);
13e35c04 2410 pass_args(rs,rt);
b96d3df7 2411 int cc=get_reg(regmap,CCREG);
2412 if(cc<0)
2413 emit_loadreg(CCREG,2);
2573466a 2414 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
643aeae3 2415 emit_movimm((u_int)handler,3);
b96d3df7 2416 // returns new cycle_count
643aeae3 2417 emit_call(jump_handler_write_h);
2573466a 2418 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2419 if(cc<0)
2420 emit_storereg(CCREG,2);
2421 restore_regs(reglist);
57871462 2422}
2423
e2b5e7aa 2424static void do_unalignedwritestub(int n)
57871462 2425{
b14b6a8f 2426 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
b7918751 2427 literal_pool(256);
b14b6a8f 2428 set_jump_target(stubs[n].addr, out);
b7918751 2429
b14b6a8f 2430 int i=stubs[n].a;
2431 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2432 int addr=stubs[n].b;
2433 u_int reglist=stubs[n].e;
b7918751 2434 signed char *i_regmap=i_regs->regmap;
2435 int temp2=get_reg(i_regmap,FTEMP);
2436 int rt;
b7918751 2437 rt=get_reg(i_regmap,rs2[i]);
2438 assert(rt>=0);
2439 assert(addr>=0);
2440 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2441 reglist|=(1<<addr);
2442 reglist&=~(1<<temp2);
2443
b96d3df7 2444#if 1
2445 // don't bother with it and call write handler
2446 save_regs(reglist);
2447 pass_args(addr,rt);
2448 int cc=get_reg(i_regmap,CCREG);
2449 if(cc<0)
2450 emit_loadreg(CCREG,2);
b14b6a8f 2451 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
643aeae3 2452 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
b14b6a8f 2453 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2454 if(cc<0)
2455 emit_storereg(CCREG,2);
2456 restore_regs(reglist);
b14b6a8f 2457 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2458#else
b7918751 2459 emit_andimm(addr,0xfffffffc,temp2);
643aeae3 2460 emit_writeword(temp2,&address);
b7918751 2461
2462 save_regs(reglist);
b7918751 2463 emit_shrimm(addr,16,1);
2464 int cc=get_reg(i_regmap,CCREG);
2465 if(cc<0) {
2466 emit_loadreg(CCREG,2);
2467 }
2468 emit_movimm((u_int)readmem,0);
b14b6a8f 2469 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
b7918751 2470 emit_call((int)&indirect_jump_indexed);
2471 restore_regs(reglist);
2472
643aeae3 2473 emit_readword(&readmem_dword,temp2);
b7918751 2474 int temp=addr; //hmh
2475 emit_shlimm(addr,3,temp);
2476 emit_andimm(temp,24,temp);
2477#ifdef BIG_ENDIAN_MIPS
2478 if (opcode[i]==0x2e) // SWR
2479#else
2480 if (opcode[i]==0x2a) // SWL
2481#endif
2482 emit_xorimm(temp,24,temp);
2483 emit_movimm(-1,HOST_TEMPREG);
55439448 2484 if (opcode[i]==0x2a) { // SWL
b7918751 2485 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2486 emit_orrshr(rt,temp,temp2);
2487 }else{
2488 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2489 emit_orrshl(rt,temp,temp2);
2490 }
643aeae3 2491 emit_readword(&address,addr);
2492 emit_writeword(temp2,&word);
b7918751 2493 //save_regs(reglist); // don't need to, no state changes
2494 emit_shrimm(addr,16,1);
2495 emit_movimm((u_int)writemem,0);
2496 //emit_call((int)&indirect_jump_indexed);
2497 emit_mov(15,14);
2498 emit_readword_dualindexedx4(0,1,15);
643aeae3 2499 emit_readword(&Count,HOST_TEMPREG);
2500 emit_readword(&next_interupt,2);
b14b6a8f 2501 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
643aeae3 2502 emit_writeword(2,&last_count);
b7918751 2503 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2504 if(cc<0) {
2505 emit_storereg(CCREG,HOST_TEMPREG);
2506 }
2507 restore_regs(reglist);
b14b6a8f 2508 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2509#endif
57871462 2510}
2511
e2b5e7aa 2512static void do_invstub(int n)
57871462 2513{
2514 literal_pool(20);
b14b6a8f 2515 u_int reglist=stubs[n].a;
2516 set_jump_target(stubs[n].addr, out);
57871462 2517 save_regs(reglist);
b14b6a8f 2518 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
643aeae3 2519 emit_call(&invalidate_addr);
57871462 2520 restore_regs(reglist);
b14b6a8f 2521 emit_jmp(stubs[n].retaddr); // return address
57871462 2522}
2523
df4dc2b1 2524void *do_dirty_stub(int i)
57871462 2525{
2526 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2527 u_int addr=(u_int)source;
57871462 2528 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2529 #ifndef HAVE_ARMV7
ac545b3a 2530 emit_loadlp(addr,1);
57871462 2531 emit_loadlp((int)copy,2);
2532 emit_loadlp(slen*4,3);
2533 #else
ac545b3a 2534 emit_movw(addr&0x0000FFFF,1);
57871462 2535 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2536 emit_movt(addr&0xFFFF0000,1);
57871462 2537 emit_movt(((u_int)copy)&0xFFFF0000,2);
2538 emit_movw(slen*4,3);
2539 #endif
2540 emit_movimm(start+i*4,0);
643aeae3 2541 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
df4dc2b1 2542 void *entry = out;
57871462 2543 load_regs_entry(i);
df4dc2b1 2544 if (entry == out)
2545 entry = instr_addr[i];
57871462 2546 emit_jmp(instr_addr[i]);
2547 return entry;
2548}
2549
e2b5e7aa 2550static void do_dirty_stub_ds()
57871462 2551{
2552 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2553 #ifndef HAVE_ARMV7
57871462 2554 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2555 emit_loadlp((int)copy,2);
2556 emit_loadlp(slen*4,3);
2557 #else
2558 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2559 emit_movw(((u_int)copy)&0x0000FFFF,2);
2560 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2561 emit_movt(((u_int)copy)&0xFFFF0000,2);
2562 emit_movw(slen*4,3);
2563 #endif
2564 emit_movimm(start+1,0);
643aeae3 2565 emit_call(&verify_code_ds);
57871462 2566}
2567
00fa9369 2568// FP_STUB
e2b5e7aa 2569static void do_cop1stub(int n)
57871462 2570{
2571 literal_pool(256);
b14b6a8f 2572 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
2573 set_jump_target(stubs[n].addr, out);
2574 int i=stubs[n].a;
2575// int rs=stubs[n].b;
2576 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2577 int ds=stubs[n].d;
57871462 2578 if(!ds) {
2579 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2580 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2581 }
2582 //else {printf("fp exception in delay slot\n");}
2583 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2584 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2585 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2586 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
b14b6a8f 2587 emit_jmp(ds?fp_exception_ds:fp_exception);
57871462 2588}
2589
57871462 2590/* Special assem */
2591
e2b5e7aa 2592static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 2593{
2594 if(rt1[i]) {
2595 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2596 {
2597 signed char s,t,shift;
2598 t=get_reg(i_regs->regmap,rt1[i]);
2599 s=get_reg(i_regs->regmap,rs1[i]);
2600 shift=get_reg(i_regs->regmap,rs2[i]);
2601 if(t>=0){
2602 if(rs1[i]==0)
2603 {
2604 emit_zeroreg(t);
2605 }
2606 else if(rs2[i]==0)
2607 {
2608 assert(s>=0);
2609 if(s!=t) emit_mov(s,t);
2610 }
2611 else
2612 {
2613 emit_andimm(shift,31,HOST_TEMPREG);
2614 if(opcode2[i]==4) // SLLV
2615 {
2616 emit_shl(s,HOST_TEMPREG,t);
2617 }
2618 if(opcode2[i]==6) // SRLV
2619 {
2620 emit_shr(s,HOST_TEMPREG,t);
2621 }
2622 if(opcode2[i]==7) // SRAV
2623 {
2624 emit_sar(s,HOST_TEMPREG,t);
2625 }
2626 }
2627 }
2628 } else { // DSLLV/DSRLV/DSRAV
2629 signed char sh,sl,th,tl,shift;
2630 th=get_reg(i_regs->regmap,rt1[i]|64);
2631 tl=get_reg(i_regs->regmap,rt1[i]);
2632 sh=get_reg(i_regs->regmap,rs1[i]|64);
2633 sl=get_reg(i_regs->regmap,rs1[i]);
2634 shift=get_reg(i_regs->regmap,rs2[i]);
2635 if(tl>=0){
2636 if(rs1[i]==0)
2637 {
2638 emit_zeroreg(tl);
2639 if(th>=0) emit_zeroreg(th);
2640 }
2641 else if(rs2[i]==0)
2642 {
2643 assert(sl>=0);
2644 if(sl!=tl) emit_mov(sl,tl);
2645 if(th>=0&&sh!=th) emit_mov(sh,th);
2646 }
2647 else
2648 {
2649 // FIXME: What if shift==tl ?
2650 assert(shift!=tl);
2651 int temp=get_reg(i_regs->regmap,-1);
2652 int real_th=th;
2653 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2654 assert(sl>=0);
2655 assert(sh>=0);
2656 emit_andimm(shift,31,HOST_TEMPREG);
2657 if(opcode2[i]==0x14) // DSLLV
2658 {
2659 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2660 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2661 emit_orrshr(sl,HOST_TEMPREG,th);
2662 emit_andimm(shift,31,HOST_TEMPREG);
2663 emit_testimm(shift,32);
2664 emit_shl(sl,HOST_TEMPREG,tl);
2665 if(th>=0) emit_cmovne_reg(tl,th);
2666 emit_cmovne_imm(0,tl);
2667 }
2668 if(opcode2[i]==0x16) // DSRLV
2669 {
2670 assert(th>=0);
2671 emit_shr(sl,HOST_TEMPREG,tl);
2672 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2673 emit_orrshl(sh,HOST_TEMPREG,tl);
2674 emit_andimm(shift,31,HOST_TEMPREG);
2675 emit_testimm(shift,32);
2676 emit_shr(sh,HOST_TEMPREG,th);
2677 emit_cmovne_reg(th,tl);
2678 if(real_th>=0) emit_cmovne_imm(0,th);
2679 }
2680 if(opcode2[i]==0x17) // DSRAV
2681 {
2682 assert(th>=0);
2683 emit_shr(sl,HOST_TEMPREG,tl);
2684 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2685 if(real_th>=0) {
2686 assert(temp>=0);
2687 emit_sarimm(th,31,temp);
2688 }
2689 emit_orrshl(sh,HOST_TEMPREG,tl);
2690 emit_andimm(shift,31,HOST_TEMPREG);
2691 emit_testimm(shift,32);
2692 emit_sar(sh,HOST_TEMPREG,th);
2693 emit_cmovne_reg(th,tl);
2694 if(real_th>=0) emit_cmovne_reg(temp,th);
2695 }
2696 }
2697 }
2698 }
2699 }
2700}
ffb0b9e0 2701
ffb0b9e0 2702static void speculate_mov(int rs,int rt)
2703{
2704 if(rt!=0) {
2705 smrv_strong_next|=1<<rt;
2706 smrv[rt]=smrv[rs];
2707 }
2708}
2709
2710static void speculate_mov_weak(int rs,int rt)
2711{
2712 if(rt!=0) {
2713 smrv_weak_next|=1<<rt;
2714 smrv[rt]=smrv[rs];
2715 }
2716}
2717
2718static void speculate_register_values(int i)
2719{
2720 if(i==0) {
2721 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
2722 // gp,sp are likely to stay the same throughout the block
2723 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
2724 smrv_weak_next=~smrv_strong_next;
2725 //printf(" llr %08x\n", smrv[4]);
2726 }
2727 smrv_strong=smrv_strong_next;
2728 smrv_weak=smrv_weak_next;
2729 switch(itype[i]) {
2730 case ALU:
2731 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2732 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
2733 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2734 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
2735 else {
2736 smrv_strong_next&=~(1<<rt1[i]);
2737 smrv_weak_next&=~(1<<rt1[i]);
2738 }
2739 break;
2740 case SHIFTIMM:
2741 smrv_strong_next&=~(1<<rt1[i]);
2742 smrv_weak_next&=~(1<<rt1[i]);
2743 // fallthrough
2744 case IMM16:
2745 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
2746 int value,hr=get_reg(regs[i].regmap,rt1[i]);
2747 if(hr>=0) {
2748 if(get_final_value(hr,i,&value))
2749 smrv[rt1[i]]=value;
2750 else smrv[rt1[i]]=constmap[i][hr];
2751 smrv_strong_next|=1<<rt1[i];
2752 }
2753 }
2754 else {
2755 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2756 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2757 }
2758 break;
2759 case LOAD:
2760 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
2761 // special case for BIOS
2762 smrv[rt1[i]]=0xa0000000;
2763 smrv_strong_next|=1<<rt1[i];
2764 break;
2765 }
2766 // fallthrough
2767 case SHIFT:
2768 case LOADLR:
2769 case MOV:
2770 smrv_strong_next&=~(1<<rt1[i]);
2771 smrv_weak_next&=~(1<<rt1[i]);
2772 break;
2773 case COP0:
2774 case COP2:
2775 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
2776 smrv_strong_next&=~(1<<rt1[i]);
2777 smrv_weak_next&=~(1<<rt1[i]);
2778 }
2779 break;
2780 case C2LS:
2781 if (opcode[i]==0x32) { // LWC2
2782 smrv_strong_next&=~(1<<rt1[i]);
2783 smrv_weak_next&=~(1<<rt1[i]);
2784 }
2785 break;
2786 }
2787#if 0
2788 int r=4;
2789 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
2790 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
2791#endif
2792}
2793
2794enum {
2795 MTYPE_8000 = 0,
2796 MTYPE_8020,
2797 MTYPE_0000,
2798 MTYPE_A000,
2799 MTYPE_1F80,
2800};
2801
2802static int get_ptr_mem_type(u_int a)
2803{
2804 if(a < 0x00200000) {
2805 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
2806 // return wrong, must use memhandler for BIOS self-test to pass
2807 // 007 does similar stuff from a00 mirror, weird stuff
2808 return MTYPE_8000;
2809 return MTYPE_0000;
2810 }
2811 if(0x1f800000 <= a && a < 0x1f801000)
2812 return MTYPE_1F80;
2813 if(0x80200000 <= a && a < 0x80800000)
2814 return MTYPE_8020;
2815 if(0xa0000000 <= a && a < 0xa0200000)
2816 return MTYPE_A000;
2817 return MTYPE_8000;
2818}
ffb0b9e0 2819
b14b6a8f 2820static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
ffb0b9e0 2821{
b14b6a8f 2822 void *jaddr = NULL;
2823 int type=0;
ffb0b9e0 2824 int mr=rs1[i];
2825 if(((smrv_strong|smrv_weak)>>mr)&1) {
2826 type=get_ptr_mem_type(smrv[mr]);
2827 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
2828 }
2829 else {
2830 // use the mirror we are running on
2831 type=get_ptr_mem_type(start);
2832 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
2833 }
2834
2835 if(type==MTYPE_8020) { // RAM 80200000+ mirror
2836 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
2837 addr=*addr_reg_override=HOST_TEMPREG;
2838 type=0;
2839 }
2840 else if(type==MTYPE_0000) { // RAM 0 mirror
2841 emit_orimm(addr,0x80000000,HOST_TEMPREG);
2842 addr=*addr_reg_override=HOST_TEMPREG;
2843 type=0;
2844 }
2845 else if(type==MTYPE_A000) { // RAM A mirror
2846 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
2847 addr=*addr_reg_override=HOST_TEMPREG;
2848 type=0;
2849 }
2850 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 2851 if (psxH == (void *)0x1f800000) {
2852 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
2853 emit_cmpimm(HOST_TEMPREG,0x1000);
b14b6a8f 2854 jaddr=out;
6d760c92 2855 emit_jc(0);
2856 }
2857 else {
2858 // do usual RAM check, jump will go to the right handler
2859 type=0;
2860 }
ffb0b9e0 2861 }
ffb0b9e0 2862
2863 if(type==0)
2864 {
2865 emit_cmpimm(addr,RAM_SIZE);
b14b6a8f 2866 jaddr=out;
ffb0b9e0 2867 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
2868 // Hint to branch predictor that the branch is unlikely to be taken
2869 if(rs1[i]>=28)
2870 emit_jno_unlikely(0);
2871 else
2872 #endif
2873 emit_jno(0);
a327ad27 2874 if(ram_offset!=0) {
2875 emit_addimm(addr,ram_offset,HOST_TEMPREG);
2876 addr=*addr_reg_override=HOST_TEMPREG;
2877 }
ffb0b9e0 2878 }
2879
2880 return jaddr;
2881}
2882
57871462 2883#define shift_assemble shift_assemble_arm
2884
e2b5e7aa 2885static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 2886{
9c45ca93 2887 int s,tl,temp,temp2,addr;
57871462 2888 int offset;
b14b6a8f 2889 void *jaddr=0;
af4ee1fe 2890 int memtarget=0,c=0;
ffb0b9e0 2891 int fastload_reg_override=0;
57871462 2892 u_int hr,reglist=0;
57871462 2893 tl=get_reg(i_regs->regmap,rt1[i]);
2894 s=get_reg(i_regs->regmap,rs1[i]);
2895 temp=get_reg(i_regs->regmap,-1);
2896 temp2=get_reg(i_regs->regmap,FTEMP);
2897 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
2898 assert(addr<0);
2899 offset=imm[i];
2900 for(hr=0;hr<HOST_REGS;hr++) {
2901 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
2902 }
2903 reglist|=1<<temp;
2904 if(offset||s<0||c) addr=temp2;
2905 else addr=s;
2906 if(s>=0) {
2907 c=(i_regs->wasconst>>s)&1;
af4ee1fe 2908 if(c) {
2909 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 2910 }
57871462 2911 }
1edfcc68 2912 if(!c) {
1edfcc68 2913 emit_shlimm(addr,3,temp);
2914 if (opcode[i]==0x22||opcode[i]==0x26) {
2915 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
2916 }else{
2917 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 2918 }
1edfcc68 2919 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
2920 }
2921 else {
2922 if(ram_offset&&memtarget) {
2923 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
2924 fastload_reg_override=HOST_TEMPREG;
57871462 2925 }
1edfcc68 2926 if (opcode[i]==0x22||opcode[i]==0x26) {
2927 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 2928 }else{
1edfcc68 2929 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 2930 }
535d208a 2931 }
2932 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
2933 if(!c||memtarget) {
ffb0b9e0 2934 int a=temp2;
2935 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 2936 emit_readword_indexed(0,a,temp2);
b14b6a8f 2937 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
535d208a 2938 }
2939 else
2940 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
2941 if(rt1[i]) {
2942 assert(tl>=0);
57871462 2943 emit_andimm(temp,24,temp);
2002a1db 2944#ifdef BIG_ENDIAN_MIPS
2945 if (opcode[i]==0x26) // LWR
2946#else
2947 if (opcode[i]==0x22) // LWL
2948#endif
2949 emit_xorimm(temp,24,temp);
57871462 2950 emit_movimm(-1,HOST_TEMPREG);
2951 if (opcode[i]==0x26) {
2952 emit_shr(temp2,temp,temp2);
2953 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
2954 }else{
2955 emit_shl(temp2,temp,temp2);
2956 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
2957 }
2958 emit_or(temp2,tl,tl);
57871462 2959 }
535d208a 2960 //emit_storereg(rt1[i],tl); // DEBUG
2961 }
2962 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
9c45ca93 2963 assert(0);
57871462 2964 }
2965}
2966#define loadlr_assemble loadlr_assemble_arm
2967
e2b5e7aa 2968static void cop0_assemble(int i,struct regstat *i_regs)
57871462 2969{
2970 if(opcode2[i]==0) // MFC0
2971 {
2972 signed char t=get_reg(i_regs->regmap,rt1[i]);
643aeae3 2973 u_int copr=(source[i]>>11)&0x1f;
57871462 2974 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 2975 if(t>=0&&rt1[i]!=0) {
643aeae3 2976 emit_readword(&reg_cop0[copr],t);
57871462 2977 }
2978 }
2979 else if(opcode2[i]==4) // MTC0
2980 {
2981 signed char s=get_reg(i_regs->regmap,rs1[i]);
2982 char copr=(source[i]>>11)&0x1f;
2983 assert(s>=0);
63cb0298 2984 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 2985 if(copr==9||copr==11||copr==12||copr==13) {
643aeae3 2986 emit_readword(&last_count,HOST_TEMPREG);
57871462 2987 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 2988 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 2989 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
643aeae3 2990 emit_writeword(HOST_CCREG,&Count);
57871462 2991 }
2992 // What a mess. The status register (12) can enable interrupts,
2993 // so needs a special case to handle a pending interrupt.
2994 // The interrupt must be taken immediately, because a subsequent
2995 // instruction might disable interrupts again.
7139f3c8 2996 if(copr==12||copr==13) {
fca1aef2 2997 if (is_delayslot) {
2998 // burn cycles to cause cc_interrupt, which will
2999 // reschedule next_interupt. Relies on CCREG from above.
3000 assem_debug("MTC0 DS %d\n", copr);
643aeae3 3001 emit_writeword(HOST_CCREG,&last_count);
fca1aef2 3002 emit_movimm(0,HOST_CCREG);
3003 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3004 emit_loadreg(rs1[i],1);
fca1aef2 3005 emit_movimm(copr,0);
643aeae3 3006 emit_call(pcsx_mtc0_ds);
042c7287 3007 emit_loadreg(rs1[i],s);
fca1aef2 3008 return;
3009 }
63cb0298 3010 emit_movimm(start+i*4+4,HOST_TEMPREG);
643aeae3 3011 emit_writeword(HOST_TEMPREG,&pcaddr);
63cb0298 3012 emit_movimm(0,HOST_TEMPREG);
643aeae3 3013 emit_writeword(HOST_TEMPREG,&pending_exception);
57871462 3014 }
3015 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3016 //else
caeefe31 3017 if(s==HOST_CCREG)
3018 emit_loadreg(rs1[i],1);
3019 else if(s!=1)
63cb0298 3020 emit_mov(s,1);
fca1aef2 3021 emit_movimm(copr,0);
643aeae3 3022 emit_call(pcsx_mtc0);
7139f3c8 3023 if(copr==9||copr==11||copr==12||copr==13) {
643aeae3 3024 emit_readword(&Count,HOST_CCREG);
3025 emit_readword(&next_interupt,HOST_TEMPREG);
2573466a 3026 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3027 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
643aeae3 3028 emit_writeword(HOST_TEMPREG,&last_count);
57871462 3029 emit_storereg(CCREG,HOST_CCREG);
3030 }
7139f3c8 3031 if(copr==12||copr==13) {
57871462 3032 assert(!is_delayslot);
643aeae3 3033 emit_readword(&pending_exception,14);
042c7287 3034 emit_test(14,14);
643aeae3 3035 emit_jne(&do_interrupt);
57871462 3036 }
3037 emit_loadreg(rs1[i],s);
3038 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3039 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3040 }
3041 else
3042 {
3043 assert(opcode2[i]==0x10);
00fa9369 3044 //if((source[i]&0x3f)==0x10) // RFE
576bbd8f 3045 {
643aeae3 3046 emit_readword(&Status,0);
576bbd8f 3047 emit_andimm(0,0x3c,1);
3048 emit_andimm(0,~0xf,0);
3049 emit_orrshr_imm(1,2,0);
643aeae3 3050 emit_writeword(0,&Status);
576bbd8f 3051 }
57871462 3052 }
3053}
3054
b9b61529 3055static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3056{
3057 switch (copr) {
3058 case 1:
3059 case 3:
3060 case 5:
3061 case 8:
3062 case 9:
3063 case 10:
3064 case 11:
643aeae3 3065 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3066 emit_signextend16(tl,tl);
643aeae3 3067 emit_writeword(tl,&reg_cop2d[copr]); // hmh
b9b61529 3068 break;
3069 case 7:
3070 case 16:
3071 case 17:
3072 case 18:
3073 case 19:
643aeae3 3074 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3075 emit_andimm(tl,0xffff,tl);
643aeae3 3076 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3077 break;
3078 case 15:
643aeae3 3079 emit_readword(&reg_cop2d[14],tl); // SXY2
3080 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3081 break;
3082 case 28:
b9b61529 3083 case 29:
643aeae3 3084 emit_readword(&reg_cop2d[9],temp);
b9b61529 3085 emit_testimm(temp,0x8000); // do we need this?
3086 emit_andimm(temp,0xf80,temp);
3087 emit_andne_imm(temp,0,temp);
f70d384d 3088 emit_shrimm(temp,7,tl);
643aeae3 3089 emit_readword(&reg_cop2d[10],temp);
b9b61529 3090 emit_testimm(temp,0x8000);
3091 emit_andimm(temp,0xf80,temp);
3092 emit_andne_imm(temp,0,temp);
f70d384d 3093 emit_orrshr_imm(temp,2,tl);
643aeae3 3094 emit_readword(&reg_cop2d[11],temp);
b9b61529 3095 emit_testimm(temp,0x8000);
3096 emit_andimm(temp,0xf80,temp);
3097 emit_andne_imm(temp,0,temp);
f70d384d 3098 emit_orrshl_imm(temp,3,tl);
643aeae3 3099 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3100 break;
3101 default:
643aeae3 3102 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3103 break;
3104 }
3105}
3106
3107static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3108{
3109 switch (copr) {
3110 case 15:
643aeae3 3111 emit_readword(&reg_cop2d[13],temp); // SXY1
3112 emit_writeword(sl,&reg_cop2d[copr]);
3113 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3114 emit_readword(&reg_cop2d[14],temp); // SXY2
3115 emit_writeword(sl,&reg_cop2d[14]);
3116 emit_writeword(temp,&reg_cop2d[13]); // SXY1
b9b61529 3117 break;
3118 case 28:
3119 emit_andimm(sl,0x001f,temp);
f70d384d 3120 emit_shlimm(temp,7,temp);
643aeae3 3121 emit_writeword(temp,&reg_cop2d[9]);
b9b61529 3122 emit_andimm(sl,0x03e0,temp);
f70d384d 3123 emit_shlimm(temp,2,temp);
643aeae3 3124 emit_writeword(temp,&reg_cop2d[10]);
b9b61529 3125 emit_andimm(sl,0x7c00,temp);
f70d384d 3126 emit_shrimm(temp,3,temp);
643aeae3 3127 emit_writeword(temp,&reg_cop2d[11]);
3128 emit_writeword(sl,&reg_cop2d[28]);
b9b61529 3129 break;
3130 case 30:
3131 emit_movs(sl,temp);
3132 emit_mvnmi(temp,temp);
665f33e1 3133#ifdef HAVE_ARMV5
b9b61529 3134 emit_clz(temp,temp);
665f33e1 3135#else
3136 emit_movs(temp,HOST_TEMPREG);
3137 emit_movimm(0,temp);
3138 emit_jeq((int)out+4*4);
3139 emit_addpl_imm(temp,1,temp);
3140 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3141 emit_jns((int)out-2*4);
3142#endif
643aeae3 3143 emit_writeword(sl,&reg_cop2d[30]);
3144 emit_writeword(temp,&reg_cop2d[31]);
b9b61529 3145 break;
b9b61529 3146 case 31:
3147 break;
3148 default:
643aeae3 3149 emit_writeword(sl,&reg_cop2d[copr]);
b9b61529 3150 break;
3151 }
3152}
3153
e2b5e7aa 3154static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3155{
3156 u_int copr=(source[i]>>11)&0x1f;
3157 signed char temp=get_reg(i_regs->regmap,-1);
3158 if (opcode2[i]==0) { // MFC2
3159 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3160 if(tl>=0&&rt1[i]!=0)
b9b61529 3161 cop2_get_dreg(copr,tl,temp);
3162 }
3163 else if (opcode2[i]==4) { // MTC2
3164 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3165 cop2_put_dreg(copr,sl,temp);
3166 }
3167 else if (opcode2[i]==2) // CFC2
3168 {
3169 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3170 if(tl>=0&&rt1[i]!=0)
643aeae3 3171 emit_readword(&reg_cop2c[copr],tl);
b9b61529 3172 }
3173 else if (opcode2[i]==6) // CTC2
3174 {
3175 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3176 switch(copr) {
3177 case 4:
3178 case 12:
3179 case 20:
3180 case 26:
3181 case 27:
3182 case 29:
3183 case 30:
3184 emit_signextend16(sl,temp);
3185 break;
3186 case 31:
3187 //value = value & 0x7ffff000;
3188 //if (value & 0x7f87e000) value |= 0x80000000;
3189 emit_shrimm(sl,12,temp);
3190 emit_shlimm(temp,12,temp);
3191 emit_testimm(temp,0x7f000000);
3192 emit_testeqimm(temp,0x00870000);
3193 emit_testeqimm(temp,0x0000e000);
3194 emit_orrne_imm(temp,0x80000000,temp);
3195 break;
3196 default:
3197 temp=sl;
3198 break;
3199 }
643aeae3 3200 emit_writeword(temp,&reg_cop2c[copr]);
b9b61529 3201 assert(sl>=0);
3202 }
3203}
3204
054175e9 3205static void c2op_prologue(u_int op,u_int reglist)
3206{
3207 save_regs_all(reglist);
82ed88eb 3208#ifdef PCNT
3209 emit_movimm(op,0);
3210 emit_call((int)pcnt_gte_start);
3211#endif
054175e9 3212 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3213}
3214
3215static void c2op_epilogue(u_int op,u_int reglist)
3216{
82ed88eb 3217#ifdef PCNT
3218 emit_movimm(op,0);
3219 emit_call((int)pcnt_gte_end);
3220#endif
054175e9 3221 restore_regs_all(reglist);
3222}
3223
6c0eefaf 3224static void c2op_call_MACtoIR(int lm,int need_flags)
3225{
3226 if(need_flags)
643aeae3 3227 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 3228 else
643aeae3 3229 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 3230}
3231
3232static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3233{
643aeae3 3234 emit_call(func);
6c0eefaf 3235 // func is C code and trashes r0
3236 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3237 if(need_flags||need_ir)
3238 c2op_call_MACtoIR(lm,need_flags);
643aeae3 3239 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 3240}
3241
054175e9 3242static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3243{
b9b61529 3244 u_int c2op=source[i]&0x3f;
6c0eefaf 3245 u_int hr,reglist_full=0,reglist;
054175e9 3246 int need_flags,need_ir;
b9b61529 3247 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3248 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3249 }
4d646738 3250 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3251
3252 if (gte_handlers[c2op]!=NULL) {
bedfea38 3253 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3254 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3255 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3256 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3257 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3258 need_flags=0;
6c0eefaf 3259 int shift = (source[i] >> 19) & 1;
3260 int lm = (source[i] >> 10) & 1;
054175e9 3261 switch(c2op) {
19776aef 3262#ifndef DRC_DBG
054175e9 3263 case GTE_MVMVA: {
82336ba3 3264#ifdef HAVE_ARMV5
054175e9 3265 int v = (source[i] >> 15) & 3;
3266 int cv = (source[i] >> 13) & 3;
3267 int mx = (source[i] >> 17) & 3;
4d646738 3268 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3269 c2op_prologue(c2op,reglist);
3270 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3271 if(v<3)
3272 emit_ldrd(v*8,0,4);
3273 else {
3274 emit_movzwl_indexed(9*4,0,4); // gteIR
3275 emit_movzwl_indexed(10*4,0,6);
3276 emit_movzwl_indexed(11*4,0,5);
3277 emit_orrshl_imm(6,16,4);
3278 }
3279 if(mx<3)
3280 emit_addimm(0,32*4+mx*8*4,6);
3281 else
643aeae3 3282 emit_readword(&zeromem_ptr,6);
054175e9 3283 if(cv<3)
3284 emit_addimm(0,32*4+(cv*8+5)*4,7);
3285 else
643aeae3 3286 emit_readword(&zeromem_ptr,7);
054175e9 3287#ifdef __ARM_NEON__
3288 emit_movimm(source[i],1); // opcode
643aeae3 3289 emit_call(gteMVMVA_part_neon);
054175e9 3290 if(need_flags) {
3291 emit_movimm(lm,1);
643aeae3 3292 emit_call(gteMACtoIR_flags_neon);
054175e9 3293 }
3294#else
3295 if(cv==3&&shift)
3296 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3297 else {
3298 emit_movimm(shift,1);
3299 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3300 }
6c0eefaf 3301 if(need_flags||need_ir)
3302 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3303#endif
3304#else /* if not HAVE_ARMV5 */
3305 c2op_prologue(c2op,reglist);
3306 emit_movimm(source[i],1); // opcode
643aeae3 3307 emit_writeword(1,&psxRegs.code);
82336ba3 3308 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3309#endif
3310 break;
3311 }
6c0eefaf 3312 case GTE_OP:
3313 c2op_prologue(c2op,reglist);
643aeae3 3314 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 3315 if(need_flags||need_ir) {
3316 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3317 c2op_call_MACtoIR(lm,need_flags);
3318 }
3319 break;
3320 case GTE_DPCS:
3321 c2op_prologue(c2op,reglist);
3322 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3323 break;
3324 case GTE_INTPL:
3325 c2op_prologue(c2op,reglist);
3326 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3327 break;
3328 case GTE_SQR:
3329 c2op_prologue(c2op,reglist);
643aeae3 3330 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 3331 if(need_flags||need_ir) {
3332 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3333 c2op_call_MACtoIR(lm,need_flags);
3334 }
3335 break;
3336 case GTE_DCPL:
3337 c2op_prologue(c2op,reglist);
3338 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3339 break;
3340 case GTE_GPF:
3341 c2op_prologue(c2op,reglist);
3342 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3343 break;
3344 case GTE_GPL:
3345 c2op_prologue(c2op,reglist);
3346 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3347 break;
19776aef 3348#endif
054175e9 3349 default:
054175e9 3350 c2op_prologue(c2op,reglist);
19776aef 3351#ifdef DRC_DBG
3352 emit_movimm(source[i],1); // opcode
643aeae3 3353 emit_writeword(1,&psxRegs.code);
19776aef 3354#endif
643aeae3 3355 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 3356 break;
3357 }
3358 c2op_epilogue(c2op,reglist);
3359 }
b9b61529 3360}
3361
e2b5e7aa 3362static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3363{
3364 // XXX: should just just do the exception instead
00fa9369 3365 //if(!cop1_usable)
3366 {
b14b6a8f 3367 void *jaddr=out;
3d624f89 3368 emit_jmp(0);
b14b6a8f 3369 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3d624f89 3370 }
3371}
3372
e2b5e7aa 3373static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3374{
3d624f89 3375 cop1_unusable(i, i_regs);
57871462 3376}
3377
e2b5e7aa 3378static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3379{
3380 // case 0x18: MULT
3381 // case 0x19: MULTU
3382 // case 0x1A: DIV
3383 // case 0x1B: DIVU
3384 // case 0x1C: DMULT
3385 // case 0x1D: DMULTU
3386 // case 0x1E: DDIV
3387 // case 0x1F: DDIVU
3388 if(rs1[i]&&rs2[i])
3389 {
3390 if((opcode2[i]&4)==0) // 32-bit
3391 {
3392 if(opcode2[i]==0x18) // MULT
3393 {
3394 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3395 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3396 signed char hi=get_reg(i_regs->regmap,HIREG);
3397 signed char lo=get_reg(i_regs->regmap,LOREG);
3398 assert(m1>=0);
3399 assert(m2>=0);
3400 assert(hi>=0);
3401 assert(lo>=0);
3402 emit_smull(m1,m2,hi,lo);
3403 }
3404 if(opcode2[i]==0x19) // MULTU
3405 {
3406 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3407 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3408 signed char hi=get_reg(i_regs->regmap,HIREG);
3409 signed char lo=get_reg(i_regs->regmap,LOREG);
3410 assert(m1>=0);
3411 assert(m2>=0);
3412 assert(hi>=0);
3413 assert(lo>=0);
3414 emit_umull(m1,m2,hi,lo);
3415 }
3416 if(opcode2[i]==0x1A) // DIV
3417 {
3418 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3419 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3420 assert(d1>=0);
3421 assert(d2>=0);
3422 signed char quotient=get_reg(i_regs->regmap,LOREG);
3423 signed char remainder=get_reg(i_regs->regmap,HIREG);
3424 assert(quotient>=0);
3425 assert(remainder>=0);
3426 emit_movs(d1,remainder);
44a80f6a 3427 emit_movimm(0xffffffff,quotient);
3428 emit_negmi(quotient,quotient); // .. quotient and ..
3429 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3430 emit_movs(d2,HOST_TEMPREG);
3431 emit_jeq((int)out+52); // Division by zero
82336ba3 3432 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3433#ifdef HAVE_ARMV5
57871462 3434 emit_clz(HOST_TEMPREG,quotient);
3435 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3436#else
3437 emit_movimm(0,quotient);
3438 emit_addpl_imm(quotient,1,quotient);
3439 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3440 emit_jns((int)out-2*4);
3441#endif
57871462 3442 emit_orimm(quotient,1<<31,quotient);
3443 emit_shr(quotient,quotient,quotient);
3444 emit_cmp(remainder,HOST_TEMPREG);
3445 emit_subcs(remainder,HOST_TEMPREG,remainder);
3446 emit_adcs(quotient,quotient,quotient);
3447 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 3448 emit_jcc(out-16); // -4
57871462 3449 emit_teq(d1,d2);
3450 emit_negmi(quotient,quotient);
3451 emit_test(d1,d1);
3452 emit_negmi(remainder,remainder);
3453 }
3454 if(opcode2[i]==0x1B) // DIVU
3455 {
3456 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3457 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3458 assert(d1>=0);
3459 assert(d2>=0);
3460 signed char quotient=get_reg(i_regs->regmap,LOREG);
3461 signed char remainder=get_reg(i_regs->regmap,HIREG);
3462 assert(quotient>=0);
3463 assert(remainder>=0);
44a80f6a 3464 emit_mov(d1,remainder);
3465 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3466 emit_test(d2,d2);
44a80f6a 3467 emit_jeq((int)out+40); // Division by zero
665f33e1 3468#ifdef HAVE_ARMV5
57871462 3469 emit_clz(d2,HOST_TEMPREG);
3470 emit_movimm(1<<31,quotient);
3471 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3472#else
3473 emit_movimm(0,HOST_TEMPREG);
82336ba3 3474 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3475 emit_lslpls_imm(d2,1,d2);
665f33e1 3476 emit_jns((int)out-2*4);
3477 emit_movimm(1<<31,quotient);
3478#endif
57871462 3479 emit_shr(quotient,HOST_TEMPREG,quotient);
3480 emit_cmp(remainder,d2);
3481 emit_subcs(remainder,d2,remainder);
3482 emit_adcs(quotient,quotient,quotient);
3483 emit_shrcc_imm(d2,1,d2);
b14b6a8f 3484 emit_jcc(out-16); // -4
57871462 3485 }
3486 }
3487 else // 64-bit
71e490c5 3488 assert(0);
57871462 3489 }
3490 else
3491 {
3492 // Multiply by zero is zero.
3493 // MIPS does not have a divide by zero exception.
3494 // The result is undefined, we return zero.
3495 signed char hr=get_reg(i_regs->regmap,HIREG);
3496 signed char lr=get_reg(i_regs->regmap,LOREG);
3497 if(hr>=0) emit_zeroreg(hr);
3498 if(lr>=0) emit_zeroreg(lr);
3499 }
3500}
3501#define multdiv_assemble multdiv_assemble_arm
3502
e2b5e7aa 3503static void do_preload_rhash(int r) {
57871462 3504 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3505 // register. On ARM the hash can be done with a single instruction (below)
3506}
3507
e2b5e7aa 3508static void do_preload_rhtbl(int ht) {
57871462 3509 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3510}
3511
e2b5e7aa 3512static void do_rhash(int rs,int rh) {
57871462 3513 emit_andimm(rs,0xf8,rh);
3514}
3515
e2b5e7aa 3516static void do_miniht_load(int ht,int rh) {
57871462 3517 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3518 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3519}
3520
e2b5e7aa 3521static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3522 emit_cmp(rh,rs);
3523 emit_ldreq_indexed(ht,4,15);
3524 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3525 emit_mov(rs,7);
3526 emit_jmp(jump_vaddr_reg[7]);
3527 #else
3528 emit_jmp(jump_vaddr_reg[rs]);
3529 #endif
3530}
3531
e2b5e7aa 3532static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3533 #ifndef HAVE_ARMV7
57871462 3534 emit_movimm(return_address,rt); // PC into link register
643aeae3 3535 add_to_linker(out,return_address,1);
57871462 3536 emit_pcreladdr(temp);
643aeae3 3537 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3538 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 3539 #else
3540 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 3541 add_to_linker(out,return_address,1);
57871462 3542 emit_pcreladdr(temp);
643aeae3 3543 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 3544 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 3545 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 3546 #endif
3547}
3548
00fa9369 3549static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u)
57871462 3550{
3551 //if(dirty_pre==dirty) return;
581335b0 3552 int hr,reg;
57871462 3553 for(hr=0;hr<HOST_REGS;hr++) {
3554 if(hr!=EXCLUDE_REG) {
3555 reg=pre[hr];
3556 if(((~u)>>(reg&63))&1) {
f776eb14 3557 if(reg>0) {
57871462 3558 if(((dirty_pre&~dirty)>>hr)&1) {
3559 if(reg>0&&reg<34) {
3560 emit_storereg(reg,hr);
57871462 3561 }
3562 else if(reg>=64) {
00fa9369 3563 assert(0);
57871462 3564 }
3565 }
3566 }
57871462 3567 }
3568 }
3569 }
3570}
3571
d148d265 3572static void mark_clear_cache(void *target)
3573{
643aeae3 3574 u_long offset = (u_char *)target - translation_cache;
d148d265 3575 u_int mask = 1u << ((offset >> 12) & 31);
3576 if (!(needs_clear_cache[offset >> 17] & mask)) {
3577 char *start = (char *)((u_long)target & ~4095ul);
3578 start_tcache_write(start, start + 4096);
3579 needs_clear_cache[offset >> 17] |= mask;
3580 }
3581}
3582
dd3a91a1 3583// Clearing the cache is rather slow on ARM Linux, so mark the areas
3584// that need to be cleared, and then only clear these areas once.
e2b5e7aa 3585static void do_clear_cache()
dd3a91a1 3586{
3587 int i,j;
3588 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
3589 {
3590 u_int bitmap=needs_clear_cache[i];
3591 if(bitmap) {
643aeae3 3592 u_char *start, *end;
9f51b4b9 3593 for(j=0;j<32;j++)
dd3a91a1 3594 {
3595 if(bitmap&(1<<j)) {
643aeae3 3596 start=translation_cache+i*131072+j*4096;
dd3a91a1 3597 end=start+4095;
3598 j++;
3599 while(j<32) {
3600 if(bitmap&(1<<j)) {
3601 end+=4096;
3602 j++;
3603 }else{
643aeae3 3604 end_tcache_write(start, end);
dd3a91a1 3605 break;
3606 }
3607 }
3608 }
3609 }
3610 needs_clear_cache[i]=0;
3611 }
3612 }
3613}
3614
57871462 3615// CPU-architecture-specific initialization
71e490c5 3616static void arch_init() {
57871462 3617}
b9b61529 3618
3619// vim:shiftwidth=2:expandtab