drc: convert 'stubs' to a struct with proper types (rework part 2)
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
bdeade46 35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
57871462 58extern void *dynarec_local;
57871462 59extern u_int mini_ht[32][2];
57871462 60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
b14b6a8f 77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
57871462 89 0,
b14b6a8f 90 jump_vaddr_r12,
57871462 91 0,
92 0,
b14b6a8f 93 0
94};
57871462 95
0bbd1454 96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
d148d265 127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 128
57871462 129/* Linker */
130
df4dc2b1 131static void set_jump_target(void *addr, void *target_)
57871462 132{
df4dc2b1 133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
57871462 135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 138 assert(((uintptr_t)addr&3)==0);
57871462 139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 146 assert(((uintptr_t)addr&3)==0);
57871462 147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 151 assert(((uintptr_t)addr&3)==0);
57871462 152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
e2b5e7aa 166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
e2b5e7aa 196#endif
57871462 197
198/* Literal pool */
e2b5e7aa 199static void add_literal(int addr,int val)
57871462 200{
15776b68 201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
9f51b4b9 204 literalcount++;
205}
57871462 206
d148d265 207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
57871462 210{
211 int *ptr=(int *)(stub+4);
d148d265 212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 213 u_int offset=*ptr&0xfff;
d148d265 214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
57871462 216}
217
f968d35d 218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
e2b5e7aa 222static int get_pointer(void *stub)
57871462 223{
224 //printf("get_pointer(%x)\n",(int)stub);
d148d265 225 int *i_ptr=find_extjump_insn(stub);
57871462 226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
df4dc2b1 232static void *get_clean_addr(void *addr)
57871462 233{
df4dc2b1 234 signed int *ptr = addr;
665f33e1 235 #ifndef HAVE_ARMV7
57871462 236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 245 }
df4dc2b1 246 return ptr;
57871462 247}
248
e2b5e7aa 249static int verify_dirty(u_int *ptr)
57871462 250{
665f33e1 251 #ifndef HAVE_ARMV7
16c8be17 252 u_int offset;
57871462 253 // get from literal pool
15776b68 254 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
57871462 267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
df4dc2b1 283static int isclean(void *addr)
57871462 284{
665f33e1 285 #ifndef HAVE_ARMV7
581335b0 286 u_int *ptr=((u_int *)addr)+4;
57871462 287 #else
581335b0 288 u_int *ptr=((u_int *)addr)+6;
57871462 289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
4a35de07 298// get source that block at addr was compiled from (host pointers)
e2b5e7aa 299static void get_bounds(int addr,u_int *start,u_int *end)
57871462 300{
301 u_int *ptr=(u_int *)addr;
665f33e1 302 #ifndef HAVE_ARMV7
16c8be17 303 u_int offset;
57871462 304 // get from literal pool
15776b68 305 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
57871462 318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 328 *start=source;
329 *end=source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 336static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 342
57871462 343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
9f51b4b9 345
57871462 346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
9f51b4b9 351
57871462 352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 354
57871462 355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 if(r<64&&((cur->u>>r)&1)) {
364 cur->regmap[preferred_reg]=reg;
365 cur->dirty&=~(1<<preferred_reg);
366 cur->isconst&=~(1<<preferred_reg);
367 return;
368 }
369 if(r>=64&&((cur->uu>>(r&63))&1)) {
370 cur->regmap[preferred_reg]=reg;
371 cur->dirty&=~(1<<preferred_reg);
372 cur->isconst&=~(1<<preferred_reg);
373 return;
374 }
9f51b4b9 375
57871462 376 // Clear any unneeded registers
377 // We try to keep the mapping consistent, if possible, because it
378 // makes branches easier (especially loops). So we try to allocate
379 // first (see above) before removing old mappings. If this is not
380 // possible then go ahead and clear out the registers that are no
381 // longer needed.
382 for(hr=0;hr<HOST_REGS;hr++)
383 {
384 r=cur->regmap[hr];
385 if(r>=0) {
386 if(r<64) {
387 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
388 }
389 else
390 {
391 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
392 }
393 }
394 }
395 // Try to allocate any available register, but prefer
396 // registers that have not been used recently.
397 if(i>0) {
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 // Try to allocate any available register
410 for(hr=0;hr<HOST_REGS;hr++) {
411 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
412 cur->regmap[hr]=reg;
413 cur->dirty&=~(1<<hr);
414 cur->isconst&=~(1<<hr);
415 return;
416 }
417 }
9f51b4b9 418
57871462 419 // Ok, now we have to evict someone
420 // Pick a register we hopefully won't need soon
421 u_char hsn[MAXREG+1];
422 memset(hsn,10,sizeof(hsn));
423 int j;
424 lsn(hsn,i,&preferred_reg);
425 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
426 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
427 if(i>0) {
428 // Don't evict the cycle count at entry points, otherwise the entry
429 // stub will have to write it.
430 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
431 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
432 for(j=10;j>=3;j--)
433 {
434 // Alloc preferred register if available
435 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
436 for(hr=0;hr<HOST_REGS;hr++) {
437 // Evict both parts of a 64-bit register
438 if((cur->regmap[hr]&63)==r) {
439 cur->regmap[hr]=-1;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 }
443 }
444 cur->regmap[preferred_reg]=reg;
445 return;
446 }
447 for(r=1;r<=MAXREG;r++)
448 {
449 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 for(hr=0;hr<HOST_REGS;hr++) {
461 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
462 if(cur->regmap[hr]==r) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 }
470 }
471 }
472 }
473 }
474 for(j=10;j>=0;j--)
475 {
476 for(r=1;r<=MAXREG;r++)
477 {
478 if(hsn[r]==j) {
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r+64) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 for(hr=0;hr<HOST_REGS;hr++) {
488 if(cur->regmap[hr]==r) {
489 cur->regmap[hr]=reg;
490 cur->dirty&=~(1<<hr);
491 cur->isconst&=~(1<<hr);
492 return;
493 }
494 }
495 }
496 }
497 }
c43b5311 498 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 499}
500
e2b5e7aa 501static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 502{
503 int preferred_reg = 8+(reg&1);
504 int r,hr;
9f51b4b9 505
57871462 506 // allocate the lower 32 bits
507 alloc_reg(cur,i,reg);
9f51b4b9 508
57871462 509 // Don't allocate unused registers
510 if((cur->uu>>reg)&1) return;
9f51b4b9 511
57871462 512 // see if the upper half is already allocated
513 for(hr=0;hr<HOST_REGS;hr++)
514 {
515 if(cur->regmap[hr]==reg+64) return;
516 }
9f51b4b9 517
57871462 518 // Keep the same mapping if the register was already allocated in a loop
519 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 520
57871462 521 // Try to allocate the preferred register
522 if(cur->regmap[preferred_reg]==-1) {
523 cur->regmap[preferred_reg]=reg|64;
524 cur->dirty&=~(1<<preferred_reg);
525 cur->isconst&=~(1<<preferred_reg);
526 return;
527 }
528 r=cur->regmap[preferred_reg];
529 if(r<64&&((cur->u>>r)&1)) {
530 cur->regmap[preferred_reg]=reg|64;
531 cur->dirty&=~(1<<preferred_reg);
532 cur->isconst&=~(1<<preferred_reg);
533 return;
534 }
535 if(r>=64&&((cur->uu>>(r&63))&1)) {
536 cur->regmap[preferred_reg]=reg|64;
537 cur->dirty&=~(1<<preferred_reg);
538 cur->isconst&=~(1<<preferred_reg);
539 return;
540 }
9f51b4b9 541
57871462 542 // Clear any unneeded registers
543 // We try to keep the mapping consistent, if possible, because it
544 // makes branches easier (especially loops). So we try to allocate
545 // first (see above) before removing old mappings. If this is not
546 // possible then go ahead and clear out the registers that are no
547 // longer needed.
548 for(hr=HOST_REGS-1;hr>=0;hr--)
549 {
550 r=cur->regmap[hr];
551 if(r>=0) {
552 if(r<64) {
553 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
554 }
555 else
556 {
557 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
558 }
559 }
560 }
561 // Try to allocate any available register, but prefer
562 // registers that have not been used recently.
563 if(i>0) {
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 // Try to allocate any available register
576 for(hr=0;hr<HOST_REGS;hr++) {
577 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
578 cur->regmap[hr]=reg|64;
579 cur->dirty&=~(1<<hr);
580 cur->isconst&=~(1<<hr);
581 return;
582 }
583 }
9f51b4b9 584
57871462 585 // Ok, now we have to evict someone
586 // Pick a register we hopefully won't need soon
587 u_char hsn[MAXREG+1];
588 memset(hsn,10,sizeof(hsn));
589 int j;
590 lsn(hsn,i,&preferred_reg);
591 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
592 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
593 if(i>0) {
594 // Don't evict the cycle count at entry points, otherwise the entry
595 // stub will have to write it.
596 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
597 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
598 for(j=10;j>=3;j--)
599 {
600 // Alloc preferred register if available
601 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
602 for(hr=0;hr<HOST_REGS;hr++) {
603 // Evict both parts of a 64-bit register
604 if((cur->regmap[hr]&63)==r) {
605 cur->regmap[hr]=-1;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 }
609 }
610 cur->regmap[preferred_reg]=reg|64;
611 return;
612 }
613 for(r=1;r<=MAXREG;r++)
614 {
615 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 }
626 for(hr=0;hr<HOST_REGS;hr++) {
627 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
628 if(cur->regmap[hr]==r) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 }
636 }
637 }
638 }
639 }
640 for(j=10;j>=0;j--)
641 {
642 for(r=1;r<=MAXREG;r++)
643 {
644 if(hsn[r]==j) {
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r+64) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 for(hr=0;hr<HOST_REGS;hr++) {
654 if(cur->regmap[hr]==r) {
655 cur->regmap[hr]=reg|64;
656 cur->dirty&=~(1<<hr);
657 cur->isconst&=~(1<<hr);
658 return;
659 }
660 }
661 }
662 }
663 }
c43b5311 664 SysPrintf("This shouldn't happen");exit(1);
57871462 665}
666
667// Allocate a temporary register. This is done without regard to
668// dirty status or whether the register we request is on the unneeded list
669// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 670static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 671{
672 int r,hr;
673 int preferred_reg = -1;
9f51b4b9 674
57871462 675 // see if it's already allocated
676 for(hr=0;hr<HOST_REGS;hr++)
677 {
678 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
679 }
9f51b4b9 680
57871462 681 // Try to allocate any available register
682 for(hr=HOST_REGS-1;hr>=0;hr--) {
683 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
684 cur->regmap[hr]=reg;
685 cur->dirty&=~(1<<hr);
686 cur->isconst&=~(1<<hr);
687 return;
688 }
689 }
9f51b4b9 690
57871462 691 // Find an unneeded register
692 for(hr=HOST_REGS-1;hr>=0;hr--)
693 {
694 r=cur->regmap[hr];
695 if(r>=0) {
696 if(r<64) {
697 if((cur->u>>r)&1) {
698 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
699 cur->regmap[hr]=reg;
700 cur->dirty&=~(1<<hr);
701 cur->isconst&=~(1<<hr);
702 return;
703 }
704 }
705 }
706 else
707 {
708 if((cur->uu>>(r&63))&1) {
709 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
710 cur->regmap[hr]=reg;
711 cur->dirty&=~(1<<hr);
712 cur->isconst&=~(1<<hr);
713 return;
714 }
715 }
716 }
717 }
718 }
9f51b4b9 719
57871462 720 // Ok, now we have to evict someone
721 // Pick a register we hopefully won't need soon
722 // TODO: we might want to follow unconditional jumps here
723 // TODO: get rid of dupe code and make this into a function
724 u_char hsn[MAXREG+1];
725 memset(hsn,10,sizeof(hsn));
726 int j;
727 lsn(hsn,i,&preferred_reg);
728 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
729 if(i>0) {
730 // Don't evict the cycle count at entry points, otherwise the entry
731 // stub will have to write it.
732 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
733 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
734 for(j=10;j>=3;j--)
735 {
736 for(r=1;r<=MAXREG;r++)
737 {
738 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
739 for(hr=0;hr<HOST_REGS;hr++) {
740 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 }
749 for(hr=0;hr<HOST_REGS;hr++) {
750 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
751 if(cur->regmap[hr]==r) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 }
759 }
760 }
761 }
762 }
763 for(j=10;j>=0;j--)
764 {
765 for(r=1;r<=MAXREG;r++)
766 {
767 if(hsn[r]==j) {
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r+64) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 for(hr=0;hr<HOST_REGS;hr++) {
777 if(cur->regmap[hr]==r) {
778 cur->regmap[hr]=reg;
779 cur->dirty&=~(1<<hr);
780 cur->isconst&=~(1<<hr);
781 return;
782 }
783 }
784 }
785 }
786 }
c43b5311 787 SysPrintf("This shouldn't happen");exit(1);
57871462 788}
e2b5e7aa 789
57871462 790// Allocate a specific ARM register.
e2b5e7aa 791static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 792{
793 int n;
f776eb14 794 int dirty=0;
9f51b4b9 795
57871462 796 // see if it's already allocated (and dealloc it)
797 for(n=0;n<HOST_REGS;n++)
798 {
f776eb14 799 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
800 dirty=(cur->dirty>>n)&1;
801 cur->regmap[n]=-1;
802 }
57871462 803 }
9f51b4b9 804
57871462 805 cur->regmap[hr]=reg;
806 cur->dirty&=~(1<<hr);
f776eb14 807 cur->dirty|=dirty<<hr;
57871462 808 cur->isconst&=~(1<<hr);
809}
810
811// Alloc cycle count into dedicated register
e2b5e7aa 812static void alloc_cc(struct regstat *cur,int i)
57871462 813{
814 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
815}
816
817/* Special alloc */
818
819
820/* Assembler */
821
e2b5e7aa 822static unused char regname[16][4] = {
57871462 823 "r0",
824 "r1",
825 "r2",
826 "r3",
827 "r4",
828 "r5",
829 "r6",
830 "r7",
831 "r8",
832 "r9",
833 "r10",
834 "fp",
835 "r12",
836 "sp",
837 "lr",
838 "pc"};
839
e2b5e7aa 840static void output_w32(u_int word)
57871462 841{
842 *((u_int *)out)=word;
843 out+=4;
844}
e2b5e7aa 845
846static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
e2b5e7aa 853
854static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 855{
856 assert(rd<16);
857 assert(rn<16);
858 assert(imm<256);
859 assert((shift&1)==0);
860 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
861}
e2b5e7aa 862
863static u_int genimm(u_int imm,u_int *encoded)
57871462 864{
c2e3bd42 865 *encoded=0;
866 if(imm==0) return 1;
57871462 867 int i=32;
868 while(i>0)
869 {
870 if(imm<256) {
871 *encoded=((i&30)<<7)|imm;
872 return 1;
873 }
874 imm=(imm>>2)|(imm<<30);i-=2;
875 }
876 return 0;
877}
e2b5e7aa 878
879static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 880{
881 u_int ret=genimm(imm,encoded);
882 assert(ret);
581335b0 883 (void)ret;
cfbd3c6e 884}
e2b5e7aa 885
886static u_int genjmp(u_int addr)
57871462 887{
888 int offset=addr-(int)out-8;
e80343e2 889 if(offset<-33554432||offset>=33554432) {
890 if (addr>2) {
c43b5311 891 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 892 exit(1);
893 }
894 return 0;
895 }
57871462 896 return ((u_int)offset>>2)&0xffffff;
897}
898
e2b5e7aa 899static void emit_mov(int rs,int rt)
57871462 900{
901 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
902 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
903}
904
e2b5e7aa 905static void emit_movs(int rs,int rt)
57871462 906{
907 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
908 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
909}
910
e2b5e7aa 911static void emit_add(int rs1,int rs2,int rt)
57871462 912{
913 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
915}
916
e2b5e7aa 917static void emit_adds(int rs1,int rs2,int rt)
57871462 918{
919 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
921}
922
e2b5e7aa 923static void emit_adcs(int rs1,int rs2,int rt)
57871462 924{
925 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
926 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
927}
928
e2b5e7aa 929static void emit_sbc(int rs1,int rs2,int rt)
57871462 930{
931 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
932 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
933}
934
e2b5e7aa 935static void emit_sbcs(int rs1,int rs2,int rt)
57871462 936{
937 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
939}
940
e2b5e7aa 941static void emit_neg(int rs, int rt)
57871462 942{
943 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
944 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
945}
946
e2b5e7aa 947static void emit_negs(int rs, int rt)
57871462 948{
949 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
950 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
951}
952
e2b5e7aa 953static void emit_sub(int rs1,int rs2,int rt)
57871462 954{
955 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
956 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
957}
958
e2b5e7aa 959static void emit_subs(int rs1,int rs2,int rt)
57871462 960{
961 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
962 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
963}
964
e2b5e7aa 965static void emit_zeroreg(int rt)
57871462 966{
967 assem_debug("mov %s,#0\n",regname[rt]);
968 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
969}
970
e2b5e7aa 971static void emit_loadlp(u_int imm,u_int rt)
790ee18e 972{
973 add_literal((int)out,imm);
974 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
975 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
976}
e2b5e7aa 977
978static void emit_movw(u_int imm,u_int rt)
790ee18e 979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
e2b5e7aa 984
985static void emit_movt(u_int imm,u_int rt)
790ee18e 986{
987 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
988 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
989}
e2b5e7aa 990
991static void emit_movimm(u_int imm,u_int rt)
790ee18e 992{
993 u_int armval;
994 if(genimm(imm,&armval)) {
995 assem_debug("mov %s,#%d\n",regname[rt],imm);
996 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
997 }else if(genimm(~imm,&armval)) {
998 assem_debug("mvn %s,#%d\n",regname[rt],imm);
999 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1000 }else if(imm<65536) {
665f33e1 1001 #ifndef HAVE_ARMV7
790ee18e 1002 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1003 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1004 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1005 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1006 #else
1007 emit_movw(imm,rt);
1008 #endif
1009 }else{
665f33e1 1010 #ifndef HAVE_ARMV7
790ee18e 1011 emit_loadlp(imm,rt);
1012 #else
1013 emit_movw(imm&0x0000FFFF,rt);
1014 emit_movt(imm&0xFFFF0000,rt);
1015 #endif
1016 }
1017}
e2b5e7aa 1018
1019static void emit_pcreladdr(u_int rt)
790ee18e 1020{
1021 assem_debug("add %s,pc,#?\n",regname[rt]);
1022 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1023}
1024
e2b5e7aa 1025static void emit_loadreg(int r, int hr)
57871462 1026{
3d624f89 1027 if(r&64) {
c43b5311 1028 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1029 assert(0);
1030 return;
3d624f89 1031 }
57871462 1032 if((r&63)==0)
1033 emit_zeroreg(hr);
1034 else {
3d624f89 1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==CSREG) addr=(int)&Status;
1040 if(r==FSREG) addr=(int)&FCR31;
1041 if(r==INVCP) addr=(int)&invc_ptr;
1042 u_int offset = addr-(u_int)&dynarec_local;
1043 assert(offset<4096);
1044 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1045 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1046 }
1047}
e2b5e7aa 1048
1049static void emit_storereg(int r, int hr)
57871462 1050{
3d624f89 1051 if(r&64) {
c43b5311 1052 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1053 assert(0);
1054 return;
3d624f89 1055 }
3d624f89 1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
e2b5e7aa 1067static void emit_test(int rs, int rt)
57871462 1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
e2b5e7aa 1073static void emit_testimm(int rs,int imm)
57871462 1074{
1075 u_int armval;
5a05d80c 1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1077 genimm_checked(imm,&armval);
57871462 1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
e2b5e7aa 1081static void emit_testeqimm(int rs,int imm)
b9b61529 1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
b9b61529 1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
e2b5e7aa 1089static void emit_not(int rs,int rt)
57871462 1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
e2b5e7aa 1095static void emit_mvnmi(int rs,int rt)
b9b61529 1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
e2b5e7aa 1101static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
e2b5e7aa 1107static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
e2b5e7aa 1112
1113static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1114{
1115 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1116 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1117}
1118
e2b5e7aa 1119static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1120{
1121 assert(rs<16);
1122 assert(rt<16);
1123 assert(imm<32);
1124 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1125 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1126}
1127
e2b5e7aa 1128static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 assert(imm<32);
1133 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1134 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1135}
1136
e2b5e7aa 1137static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1138{
1139 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1140 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1141}
1142
e2b5e7aa 1143static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 if(imm!=0) {
57871462 1148 u_int armval;
1149 if(genimm(imm,&armval)) {
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1151 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1152 }else if(genimm(-imm,&armval)) {
8a0a8423 1153 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1154 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1155 #ifdef HAVE_ARMV7
1156 }else if(rt!=rs&&(u_int)imm<65536) {
1157 emit_movw(imm&0x0000ffff,rt);
1158 emit_add(rs,rt,rt);
1159 }else if(rt!=rs&&(u_int)-imm<65536) {
1160 emit_movw(-imm&0x0000ffff,rt);
1161 emit_sub(rs,rt,rt);
1162 #endif
1163 }else if((u_int)-imm<65536) {
57871462 1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1168 }else {
1169 do {
1170 int shift = (ffs(imm) - 1) & ~1;
1171 int imm8 = imm & (0xff << shift);
1172 genimm_checked(imm8,&armval);
1173 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1174 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1175 rs = rt;
1176 imm &= ~imm8;
1177 }
1178 while (imm != 0);
57871462 1179 }
1180 }
1181 else if(rs!=rt) emit_mov(rs,rt);
1182}
1183
e2b5e7aa 1184static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1185{
1186 assert(imm>-65536&&imm<65536);
1187 u_int armval;
1188 if(genimm(imm,&armval)) {
1189 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1190 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1191 }else if(genimm(-imm,&armval)) {
1192 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1193 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1194 }else if(imm<0) {
1195 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1196 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1197 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1198 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1199 }else{
1200 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1201 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1202 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1203 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1204 }
1205}
e2b5e7aa 1206
1207static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1208{
1209 emit_addimm(rt,imm,rt);
1210}
1211
e2b5e7aa 1212static void emit_addnop(u_int r)
57871462 1213{
1214 assert(r<16);
1215 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1216 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1217}
1218
e2b5e7aa 1219static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1220{
1221 u_int armval;
cfbd3c6e 1222 genimm_checked(imm,&armval);
57871462 1223 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1edfcc68 1226
e2b5e7aa 1227static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1228{
1229 assert(0);
1230 u_int armval;
cfbd3c6e 1231 genimm_checked(imm,&armval);
57871462 1232 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1234}
1235
e2b5e7aa 1236static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1237{
1238 // TODO: if(genimm(imm,&armval)) ...
1239 // else
1240 emit_movimm(imm,HOST_TEMPREG);
1241 emit_adds(HOST_TEMPREG,rsl,rtl);
1242 emit_adcimm(rsh,0,rth);
1243}
1244
e2b5e7aa 1245static void emit_andimm(int rs,int imm,int rt)
57871462 1246{
1247 u_int armval;
790ee18e 1248 if(imm==0) {
1249 emit_zeroreg(rt);
1250 }else if(genimm(imm,&armval)) {
57871462 1251 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1252 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1253 }else if(genimm(~imm,&armval)) {
1254 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(imm==65535) {
332a4533 1257 #ifndef HAVE_ARMV6
57871462 1258 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1260 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1261 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1262 #else
1263 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1265 #endif
1266 }else{
1267 assert(imm>0&&imm<65535);
665f33e1 1268 #ifndef HAVE_ARMV7
57871462 1269 assem_debug("mov r14,#%d\n",imm&0xFF00);
1270 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1271 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1272 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1273 #else
1274 emit_movw(imm,HOST_TEMPREG);
1275 #endif
1276 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1277 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1278 }
1279}
1280
e2b5e7aa 1281static void emit_orimm(int rs,int imm,int rt)
57871462 1282{
1283 u_int armval;
790ee18e 1284 if(imm==0) {
1285 if(rs!=rt) emit_mov(rs,rt);
1286 }else if(genimm(imm,&armval)) {
57871462 1287 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1289 }else{
1290 assert(imm>0&&imm<65536);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1294 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1295 }
1296}
1297
e2b5e7aa 1298static void emit_xorimm(int rs,int imm,int rt)
57871462 1299{
57871462 1300 u_int armval;
790ee18e 1301 if(imm==0) {
1302 if(rs!=rt) emit_mov(rs,rt);
1303 }else if(genimm(imm,&armval)) {
57871462 1304 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1305 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1306 }else{
514ed0d9 1307 assert(imm>0&&imm<65536);
57871462 1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1311 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1312 }
1313}
1314
e2b5e7aa 1315static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 //if(imm==1) ...
1320 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
e2b5e7aa 1324static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
e2b5e7aa 1332static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1338}
1339
e2b5e7aa 1340static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1346}
1347
e2b5e7aa 1348static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1354}
1355
e2b5e7aa 1356static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1362}
1363
e2b5e7aa 1364static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1365{
1366 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1367 assert(imm>0);
1368 assert(imm<32);
1369 //if(imm==1) ...
1370 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1371 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1372 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1373 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1374}
1375
e2b5e7aa 1376static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1377{
1378 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1379 assert(imm>0);
1380 assert(imm<32);
1381 //if(imm==1) ...
1382 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1383 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1384 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1385 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1386}
1387
e2b5e7aa 1388static void emit_signextend16(int rs,int rt)
b9b61529 1389{
332a4533 1390 #ifndef HAVE_ARMV6
b9b61529 1391 emit_shlimm(rs,16,rt);
1392 emit_sarimm(rt,16,rt);
1393 #else
1394 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1395 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1396 #endif
1397}
1398
e2b5e7aa 1399static void emit_signextend8(int rs,int rt)
c6c3b1b3 1400{
332a4533 1401 #ifndef HAVE_ARMV6
c6c3b1b3 1402 emit_shlimm(rs,24,rt);
1403 emit_sarimm(rt,24,rt);
1404 #else
1405 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1406 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1407 #endif
1408}
1409
e2b5e7aa 1410static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1411{
1412 assert(rs<16);
1413 assert(rt<16);
1414 assert(shift<16);
1415 //if(imm==1) ...
1416 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1417 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1418}
e2b5e7aa 1419
1420static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1421{
1422 assert(rs<16);
1423 assert(rt<16);
1424 assert(shift<16);
1425 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1426 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1427}
e2b5e7aa 1428
1429static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1430{
1431 assert(rs<16);
1432 assert(rt<16);
1433 assert(shift<16);
1434 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1435 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1436}
57871462 1437
e2b5e7aa 1438static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1439{
1440 assert(rs<16);
1441 assert(rt<16);
1442 assert(shift<16);
1443 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1444 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1445}
e2b5e7aa 1446
1447static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1448{
1449 assert(rs<16);
1450 assert(rt<16);
1451 assert(shift<16);
1452 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1453 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1454}
1455
e2b5e7aa 1456static void emit_cmpimm(int rs,int imm)
57871462 1457{
1458 u_int armval;
1459 if(genimm(imm,&armval)) {
5a05d80c 1460 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1461 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1462 }else if(genimm(-imm,&armval)) {
5a05d80c 1463 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1464 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1465 }else if(imm>0) {
1466 assert(imm<65536);
57871462 1467 emit_movimm(imm,HOST_TEMPREG);
57871462 1468 assem_debug("cmp %s,r14\n",regname[rs]);
1469 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1470 }else{
1471 assert(imm>-65536);
57871462 1472 emit_movimm(-imm,HOST_TEMPREG);
57871462 1473 assem_debug("cmn %s,r14\n",regname[rs]);
1474 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1475 }
1476}
1477
e2b5e7aa 1478static void emit_cmovne_imm(int imm,int rt)
57871462 1479{
1480 assem_debug("movne %s,#%d\n",regname[rt],imm);
1481 u_int armval;
cfbd3c6e 1482 genimm_checked(imm,&armval);
57871462 1483 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1484}
e2b5e7aa 1485
1486static void emit_cmovl_imm(int imm,int rt)
57871462 1487{
1488 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1489 u_int armval;
cfbd3c6e 1490 genimm_checked(imm,&armval);
57871462 1491 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1492}
e2b5e7aa 1493
1494static void emit_cmovb_imm(int imm,int rt)
57871462 1495{
1496 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1497 u_int armval;
cfbd3c6e 1498 genimm_checked(imm,&armval);
57871462 1499 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1500}
e2b5e7aa 1501
1502static void emit_cmovs_imm(int imm,int rt)
57871462 1503{
1504 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1505 u_int armval;
cfbd3c6e 1506 genimm_checked(imm,&armval);
57871462 1507 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1508}
e2b5e7aa 1509
1510static void emit_cmove_reg(int rs,int rt)
57871462 1511{
1512 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1513 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1514}
e2b5e7aa 1515
1516static void emit_cmovne_reg(int rs,int rt)
57871462 1517{
1518 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1519 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1520}
e2b5e7aa 1521
1522static void emit_cmovl_reg(int rs,int rt)
57871462 1523{
1524 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1525 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1526}
e2b5e7aa 1527
1528static void emit_cmovs_reg(int rs,int rt)
57871462 1529{
1530 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1531 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1532}
1533
e2b5e7aa 1534static void emit_slti32(int rs,int imm,int rt)
57871462 1535{
1536 if(rs!=rt) emit_zeroreg(rt);
1537 emit_cmpimm(rs,imm);
1538 if(rs==rt) emit_movimm(0,rt);
1539 emit_cmovl_imm(1,rt);
1540}
e2b5e7aa 1541
1542static void emit_sltiu32(int rs,int imm,int rt)
57871462 1543{
1544 if(rs!=rt) emit_zeroreg(rt);
1545 emit_cmpimm(rs,imm);
1546 if(rs==rt) emit_movimm(0,rt);
1547 emit_cmovb_imm(1,rt);
1548}
e2b5e7aa 1549
1550static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1551{
1552 assert(rsh!=rt);
1553 emit_slti32(rsl,imm,rt);
1554 if(imm>=0)
1555 {
1556 emit_test(rsh,rsh);
1557 emit_cmovne_imm(0,rt);
1558 emit_cmovs_imm(1,rt);
1559 }
1560 else
1561 {
1562 emit_cmpimm(rsh,-1);
1563 emit_cmovne_imm(0,rt);
1564 emit_cmovl_imm(1,rt);
1565 }
1566}
e2b5e7aa 1567
1568static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1569{
1570 assert(rsh!=rt);
1571 emit_sltiu32(rsl,imm,rt);
1572 if(imm>=0)
1573 {
1574 emit_test(rsh,rsh);
1575 emit_cmovne_imm(0,rt);
1576 }
1577 else
1578 {
1579 emit_cmpimm(rsh,-1);
1580 emit_cmovne_imm(1,rt);
1581 }
1582}
1583
e2b5e7aa 1584static void emit_cmp(int rs,int rt)
57871462 1585{
1586 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1587 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1588}
e2b5e7aa 1589
1590static void emit_set_gz32(int rs, int rt)
57871462 1591{
1592 //assem_debug("set_gz32\n");
1593 emit_cmpimm(rs,1);
1594 emit_movimm(1,rt);
1595 emit_cmovl_imm(0,rt);
1596}
e2b5e7aa 1597
1598static void emit_set_nz32(int rs, int rt)
57871462 1599{
1600 //assem_debug("set_nz32\n");
1601 if(rs!=rt) emit_movs(rs,rt);
1602 else emit_test(rs,rs);
1603 emit_cmovne_imm(1,rt);
1604}
e2b5e7aa 1605
1606static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1607{
1608 //assem_debug("set_gz64\n");
1609 emit_set_gz32(rsl,rt);
1610 emit_test(rsh,rsh);
1611 emit_cmovne_imm(1,rt);
1612 emit_cmovs_imm(0,rt);
1613}
e2b5e7aa 1614
1615static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1616{
1617 //assem_debug("set_nz64\n");
1618 emit_or_and_set_flags(rsh,rsl,rt);
1619 emit_cmovne_imm(1,rt);
1620}
e2b5e7aa 1621
1622static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1623{
1624 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1625 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1626 emit_cmp(rs1,rs2);
1627 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1628 emit_cmovl_imm(1,rt);
1629}
e2b5e7aa 1630
1631static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1632{
1633 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1634 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1635 emit_cmp(rs1,rs2);
1636 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1637 emit_cmovb_imm(1,rt);
1638}
e2b5e7aa 1639
1640static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1641{
1642 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1643 assert(u1!=rt);
1644 assert(u2!=rt);
1645 emit_cmp(l1,l2);
1646 emit_movimm(0,rt);
1647 emit_sbcs(u1,u2,HOST_TEMPREG);
1648 emit_cmovl_imm(1,rt);
1649}
e2b5e7aa 1650
1651static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1652{
1653 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1654 assert(u1!=rt);
1655 assert(u2!=rt);
1656 emit_cmp(l1,l2);
1657 emit_movimm(0,rt);
1658 emit_sbcs(u1,u2,HOST_TEMPREG);
1659 emit_cmovb_imm(1,rt);
1660}
1661
dd114d7d 1662#ifdef DRC_DBG
1663extern void gen_interupt();
1664extern void do_insn_cmp();
1665#define FUNCNAME(f) { (intptr_t)f, " " #f }
1666static const struct {
1667 intptr_t addr;
1668 const char *name;
1669} function_names[] = {
1670 FUNCNAME(cc_interrupt),
1671 FUNCNAME(gen_interupt),
1672 FUNCNAME(get_addr_ht),
1673 FUNCNAME(get_addr),
1674 FUNCNAME(jump_handler_read8),
1675 FUNCNAME(jump_handler_read16),
1676 FUNCNAME(jump_handler_read32),
1677 FUNCNAME(jump_handler_write8),
1678 FUNCNAME(jump_handler_write16),
1679 FUNCNAME(jump_handler_write32),
1680 FUNCNAME(invalidate_addr),
1681 FUNCNAME(verify_code_vm),
1682 FUNCNAME(verify_code),
1683 FUNCNAME(jump_hlecall),
1684 FUNCNAME(jump_syscall_hle),
1685 FUNCNAME(new_dyna_leave),
1686 FUNCNAME(pcsx_mtc0),
1687 FUNCNAME(pcsx_mtc0_ds),
1688 FUNCNAME(do_insn_cmp),
1689};
1690
1691static const char *func_name(intptr_t a)
1692{
1693 int i;
1694 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1695 if (function_names[i].addr == a)
1696 return function_names[i].name;
1697 return "";
1698}
1699#else
1700#define func_name(x) ""
1701#endif
1702
e2b5e7aa 1703static void emit_call(int a)
57871462 1704{
dd114d7d 1705 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1706 u_int offset=genjmp(a);
1707 output_w32(0xeb000000|offset);
1708}
e2b5e7aa 1709
b14b6a8f 1710static void emit_jmp(const void *a_)
57871462 1711{
b14b6a8f 1712 int a = (int)a_;
dd114d7d 1713 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1714 u_int offset=genjmp(a);
1715 output_w32(0xea000000|offset);
1716}
e2b5e7aa 1717
1718static void emit_jne(int a)
57871462 1719{
1720 assem_debug("bne %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x1a000000|offset);
1723}
e2b5e7aa 1724
1725static void emit_jeq(int a)
57871462 1726{
1727 assem_debug("beq %x\n",a);
1728 u_int offset=genjmp(a);
1729 output_w32(0x0a000000|offset);
1730}
e2b5e7aa 1731
1732static void emit_js(int a)
57871462 1733{
1734 assem_debug("bmi %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0x4a000000|offset);
1737}
e2b5e7aa 1738
1739static void emit_jns(int a)
57871462 1740{
1741 assem_debug("bpl %x\n",a);
1742 u_int offset=genjmp(a);
1743 output_w32(0x5a000000|offset);
1744}
e2b5e7aa 1745
1746static void emit_jl(int a)
57871462 1747{
1748 assem_debug("blt %x\n",a);
1749 u_int offset=genjmp(a);
1750 output_w32(0xba000000|offset);
1751}
e2b5e7aa 1752
1753static void emit_jge(int a)
57871462 1754{
1755 assem_debug("bge %x\n",a);
1756 u_int offset=genjmp(a);
1757 output_w32(0xaa000000|offset);
1758}
e2b5e7aa 1759
1760static void emit_jno(int a)
57871462 1761{
1762 assem_debug("bvc %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x7a000000|offset);
1765}
e2b5e7aa 1766
1767static void emit_jc(int a)
57871462 1768{
1769 assem_debug("bcs %x\n",a);
1770 u_int offset=genjmp(a);
1771 output_w32(0x2a000000|offset);
1772}
e2b5e7aa 1773
b14b6a8f 1774static void emit_jcc(void *a_)
57871462 1775{
b14b6a8f 1776 int a = (int)a_;
57871462 1777 assem_debug("bcc %x\n",a);
1778 u_int offset=genjmp(a);
1779 output_w32(0x3a000000|offset);
1780}
1781
e2b5e7aa 1782static void emit_callreg(u_int r)
57871462 1783{
c6c3b1b3 1784 assert(r<15);
1785 assem_debug("blx %s\n",regname[r]);
1786 output_w32(0xe12fff30|r);
57871462 1787}
e2b5e7aa 1788
1789static void emit_jmpreg(u_int r)
57871462 1790{
1791 assem_debug("mov pc,%s\n",regname[r]);
1792 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1793}
1794
e2b5e7aa 1795static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1796{
1797 assert(offset>-4096&&offset<4096);
1798 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1799 if(offset>=0) {
1800 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1801 }else{
1802 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1803 }
1804}
e2b5e7aa 1805
1806static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1807{
1808 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1809 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1810}
e2b5e7aa 1811
1812static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1813{
1814 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1815 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1816}
e2b5e7aa 1817
1818static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1819{
1820 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1821 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1822}
e2b5e7aa 1823
1824static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1825{
1826 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1828}
e2b5e7aa 1829
1830static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1831{
1832 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1833 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1834}
e2b5e7aa 1835
1836static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1837{
1838 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1839 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1840}
e2b5e7aa 1841
1842static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1843{
1844 if(map<0) emit_readword_indexed(addr, rs, rt);
1845 else {
1846 assert(addr==0);
1847 emit_readword_dualindexedx4(rs, map, rt);
1848 }
1849}
e2b5e7aa 1850
1851static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1852{
1853 if(map<0) {
1854 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1855 emit_readword_indexed(addr+4, rs, rl);
1856 }else{
1857 assert(rh!=rs);
1858 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1859 emit_addimm(map,1,map);
1860 emit_readword_indexed_tlb(addr, rs, map, rl);
1861 }
1862}
e2b5e7aa 1863
1864static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1865{
1866 assert(offset>-256&&offset<256);
1867 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1868 if(offset>=0) {
1869 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1870 }else{
1871 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1872 }
1873}
e2b5e7aa 1874
1875static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1876{
1877 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1878 else {
1879 if(addr==0) {
1880 emit_shlimm(map,2,map);
1881 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1882 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1883 }else{
1884 assert(addr>-256&&addr<256);
1885 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1886 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1887 emit_movsbl_indexed(addr, rt, rt);
1888 }
1889 }
1890}
e2b5e7aa 1891
1892static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1893{
1894 assert(offset>-256&&offset<256);
1895 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1896 if(offset>=0) {
1897 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1898 }else{
1899 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1900 }
1901}
e2b5e7aa 1902
1903static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1904{
1905 assert(offset>-4096&&offset<4096);
1906 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1907 if(offset>=0) {
1908 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1909 }else{
1910 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1911 }
1912}
e2b5e7aa 1913
1914static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1915{
1916 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1917 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1918}
e2b5e7aa 1919
1920static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1921{
1922 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1923 else {
1924 if(addr==0) {
1925 emit_movzbl_dualindexedx4(rs, map, rt);
1926 }else{
1927 emit_addimm(rs,addr,rt);
1928 emit_movzbl_dualindexedx4(rt, map, rt);
1929 }
1930 }
1931}
e2b5e7aa 1932
1933static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1934{
1935 assert(offset>-256&&offset<256);
1936 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1937 if(offset>=0) {
1938 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1939 }else{
1940 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1941 }
1942}
e2b5e7aa 1943
054175e9 1944static void emit_ldrd(int offset, int rs, int rt)
1945{
1946 assert(offset>-256&&offset<256);
1947 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1948 if(offset>=0) {
1949 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1950 }else{
1951 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1952 }
1953}
e2b5e7aa 1954
1955static void emit_readword(int addr, int rt)
57871462 1956{
1957 u_int offset = addr-(u_int)&dynarec_local;
1958 assert(offset<4096);
1959 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1960 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1961}
e2b5e7aa 1962
1963static unused void emit_movsbl(int addr, int rt)
57871462 1964{
1965 u_int offset = addr-(u_int)&dynarec_local;
1966 assert(offset<256);
1967 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1968 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1969}
e2b5e7aa 1970
1971static unused void emit_movswl(int addr, int rt)
57871462 1972{
1973 u_int offset = addr-(u_int)&dynarec_local;
1974 assert(offset<256);
1975 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1976 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1977}
e2b5e7aa 1978
1979static unused void emit_movzbl(int addr, int rt)
57871462 1980{
1981 u_int offset = addr-(u_int)&dynarec_local;
1982 assert(offset<4096);
1983 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1984 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1985}
e2b5e7aa 1986
1987static unused void emit_movzwl(int addr, int rt)
57871462 1988{
1989 u_int offset = addr-(u_int)&dynarec_local;
1990 assert(offset<256);
1991 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1992 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1993}
57871462 1994
e2b5e7aa 1995static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1996{
1997 assert(offset>-4096&&offset<4096);
1998 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1999 if(offset>=0) {
2000 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2001 }else{
2002 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2003 }
2004}
e2b5e7aa 2005
2006static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 2007{
2008 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2009 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2010}
e2b5e7aa 2011
2012static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2013{
2014 if(map<0) emit_writeword_indexed(rt, addr, rs);
2015 else {
2016 assert(addr==0);
2017 emit_writeword_dualindexedx4(rt, rs, map);
2018 }
2019}
e2b5e7aa 2020
2021static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 2022{
2023 if(map<0) {
2024 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2025 emit_writeword_indexed(rl, addr+4, rs);
2026 }else{
2027 assert(rh>=0);
2028 if(temp!=rs) emit_addimm(map,1,temp);
2029 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2030 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2031 else {
2032 emit_addimm(rs,4,rs);
2033 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2034 }
2035 }
2036}
e2b5e7aa 2037
2038static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 2039{
2040 assert(offset>-256&&offset<256);
2041 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2042 if(offset>=0) {
2043 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2044 }else{
2045 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2046 }
2047}
e2b5e7aa 2048
2049static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 2050{
2051 assert(offset>-4096&&offset<4096);
2052 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2053 if(offset>=0) {
2054 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2055 }else{
2056 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2057 }
2058}
e2b5e7aa 2059
2060static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 2061{
2062 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2063 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2064}
e2b5e7aa 2065
2066static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2067{
2068 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2069 else {
2070 if(addr==0) {
2071 emit_writebyte_dualindexedx4(rt, rs, map);
2072 }else{
2073 emit_addimm(rs,addr,temp);
2074 emit_writebyte_dualindexedx4(rt, temp, map);
2075 }
2076 }
2077}
e2b5e7aa 2078
2079static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2080{
2081 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2083}
e2b5e7aa 2084
2085static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2086{
2087 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2088 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2089}
e2b5e7aa 2090
2091static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2092{
2093 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2094 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2095}
e2b5e7aa 2096
2097static void emit_writeword(int rt, int addr)
57871462 2098{
2099 u_int offset = addr-(u_int)&dynarec_local;
2100 assert(offset<4096);
2101 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2102 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2103}
e2b5e7aa 2104
2105static unused void emit_writehword(int rt, int addr)
57871462 2106{
2107 u_int offset = addr-(u_int)&dynarec_local;
2108 assert(offset<256);
2109 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2110 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2111}
e2b5e7aa 2112
2113static unused void emit_writebyte(int rt, int addr)
57871462 2114{
2115 u_int offset = addr-(u_int)&dynarec_local;
2116 assert(offset<4096);
74426039 2117 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2118 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2119}
57871462 2120
e2b5e7aa 2121static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2122{
2123 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2124 assert(rs1<16);
2125 assert(rs2<16);
2126 assert(hi<16);
2127 assert(lo<16);
2128 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2129}
e2b5e7aa 2130
2131static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2132{
2133 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2134 assert(rs1<16);
2135 assert(rs2<16);
2136 assert(hi<16);
2137 assert(lo<16);
2138 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2139}
2140
e2b5e7aa 2141static void emit_clz(int rs,int rt)
57871462 2142{
2143 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2144 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2145}
2146
e2b5e7aa 2147static void emit_subcs(int rs1,int rs2,int rt)
57871462 2148{
2149 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2150 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2151}
2152
e2b5e7aa 2153static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2154{
2155 assert(imm>0);
2156 assert(imm<32);
2157 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2158 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2159}
2160
e2b5e7aa 2161static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2162{
2163 assert(imm>0);
2164 assert(imm<32);
2165 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2166 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2167}
2168
e2b5e7aa 2169static void emit_negmi(int rs, int rt)
57871462 2170{
2171 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2172 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2173}
2174
e2b5e7aa 2175static void emit_negsmi(int rs, int rt)
57871462 2176{
2177 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2178 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2179}
2180
e2b5e7aa 2181static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2182{
2183 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2184 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2185}
2186
e2b5e7aa 2187static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2188{
2189 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2190 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2191}
2192
e2b5e7aa 2193static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2194{
2195 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2196 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2197}
2198
e2b5e7aa 2199static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2200{
2201 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2202 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2203}
2204
e2b5e7aa 2205static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2206{
2207 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2208 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2209}
2210
e2b5e7aa 2211static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2212{
2213 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2214 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2215}
2216
e2b5e7aa 2217static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2218{
2219 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2220 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2221}
2222
e2b5e7aa 2223static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2224{
2225 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2226 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2227}
2228
e2b5e7aa 2229static void emit_teq(int rs, int rt)
57871462 2230{
2231 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2232 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2233}
2234
e2b5e7aa 2235static void emit_rsbimm(int rs, int imm, int rt)
57871462 2236{
2237 u_int armval;
cfbd3c6e 2238 genimm_checked(imm,&armval);
57871462 2239 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2240 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2241}
2242
2243// Load 2 immediates optimizing for small code size
e2b5e7aa 2244static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2245{
2246 emit_movimm(imm1,rt1);
2247 u_int armval;
2248 if(genimm(imm2-imm1,&armval)) {
2249 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2250 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2251 }else if(genimm(imm1-imm2,&armval)) {
2252 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2253 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2254 }
2255 else emit_movimm(imm2,rt2);
2256}
2257
2258// Conditionally select one of two immediates, optimizing for small code size
2259// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2260static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2261{
2262 u_int armval;
2263 if(genimm(imm2-imm1,&armval)) {
2264 emit_movimm(imm1,rt);
2265 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2266 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2267 }else if(genimm(imm1-imm2,&armval)) {
2268 emit_movimm(imm1,rt);
2269 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2270 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2271 }
2272 else {
665f33e1 2273 #ifndef HAVE_ARMV7
57871462 2274 emit_movimm(imm1,rt);
2275 add_literal((int)out,imm2);
2276 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2277 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2278 #else
2279 emit_movw(imm1&0x0000FFFF,rt);
2280 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2281 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2282 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2283 }
2284 emit_movt(imm1&0xFFFF0000,rt);
2285 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2286 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2287 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2288 }
2289 #endif
2290 }
2291}
2292
57871462 2293// special case for checking invalid_code
e2b5e7aa 2294static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2295{
2296 assert(imm<128&&imm>=0);
2297 assert(r>=0&&r<16);
2298 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2299 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2300 emit_cmpimm(HOST_TEMPREG,imm);
2301}
2302
e2b5e7aa 2303static void emit_callne(int a)
0bbd1454 2304{
2305 assem_debug("blne %x\n",a);
2306 u_int offset=genjmp(a);
2307 output_w32(0x1b000000|offset);
2308}
2309
57871462 2310// Used to preload hash table entries
e2b5e7aa 2311static unused void emit_prefetchreg(int r)
57871462 2312{
2313 assem_debug("pld %s\n",regname[r]);
2314 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2315}
2316
2317// Special case for mini_ht
e2b5e7aa 2318static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2319{
2320 assert(offset<4096);
2321 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2322 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2323}
2324
e2b5e7aa 2325static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2326{
2327 u_int armval;
cfbd3c6e 2328 genimm_checked(imm,&armval);
57871462 2329 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2330 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2331}
2332
e2b5e7aa 2333static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2334{
2335 u_int armval;
cfbd3c6e 2336 genimm_checked(imm,&armval);
57871462 2337 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2338 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2339}
2340
e2b5e7aa 2341static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2342{
2343 u_int armval;
cfbd3c6e 2344 genimm_checked(imm,&armval);
57871462 2345 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2346 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2347}
2348
e2b5e7aa 2349static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2350{
2351 u_int armval;
cfbd3c6e 2352 genimm_checked(imm,&armval);
57871462 2353 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2354 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2355}
2356
e2b5e7aa 2357static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2358{
2359 u_int armval;
cfbd3c6e 2360 genimm_checked(imm,&armval);
57871462 2361 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2362 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2363}
2364
e2b5e7aa 2365static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2366{
2367 u_int armval;
cfbd3c6e 2368 genimm_checked(imm,&armval);
b9b61529 2369 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2370 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2371}
2372
e2b5e7aa 2373static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2374{
2375 u_int armval;
cfbd3c6e 2376 genimm_checked(imm,&armval);
b9b61529 2377 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2378 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2379}
2380
e2b5e7aa 2381static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2382{
2383 u_int armval;
2384 genimm_checked(imm,&armval);
2385 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2386 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2387}
2388
e2b5e7aa 2389static void emit_jno_unlikely(int a)
57871462 2390{
2391 //emit_jno(a);
2392 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2393 output_w32(0x72800000|rd_rn_rm(15,15,0));
2394}
2395
054175e9 2396static void save_regs_all(u_int reglist)
57871462 2397{
054175e9 2398 int i;
57871462 2399 if(!reglist) return;
2400 assem_debug("stmia fp,{");
054175e9 2401 for(i=0;i<16;i++)
2402 if(reglist&(1<<i))
2403 assem_debug("r%d,",i);
57871462 2404 assem_debug("}\n");
2405 output_w32(0xe88b0000|reglist);
2406}
e2b5e7aa 2407
054175e9 2408static void restore_regs_all(u_int reglist)
57871462 2409{
054175e9 2410 int i;
57871462 2411 if(!reglist) return;
2412 assem_debug("ldmia fp,{");
054175e9 2413 for(i=0;i<16;i++)
2414 if(reglist&(1<<i))
2415 assem_debug("r%d,",i);
57871462 2416 assem_debug("}\n");
2417 output_w32(0xe89b0000|reglist);
2418}
e2b5e7aa 2419
054175e9 2420// Save registers before function call
2421static void save_regs(u_int reglist)
2422{
4d646738 2423 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2424 save_regs_all(reglist);
2425}
e2b5e7aa 2426
054175e9 2427// Restore registers after function call
2428static void restore_regs(u_int reglist)
2429{
4d646738 2430 reglist&=CALLER_SAVE_REGS;
054175e9 2431 restore_regs_all(reglist);
2432}
57871462 2433
57871462 2434/* Stubs/epilogue */
2435
e2b5e7aa 2436static void literal_pool(int n)
57871462 2437{
2438 if(!literalcount) return;
2439 if(n) {
2440 if((int)out-literals[0][0]<4096-n) return;
2441 }
2442 u_int *ptr;
2443 int i;
2444 for(i=0;i<literalcount;i++)
2445 {
77750690 2446 u_int l_addr=(u_int)out;
2447 int j;
2448 for(j=0;j<i;j++) {
2449 if(literals[j][1]==literals[i][1]) {
2450 //printf("dup %08x\n",literals[i][1]);
2451 l_addr=literals[j][0];
2452 break;
2453 }
2454 }
57871462 2455 ptr=(u_int *)literals[i][0];
77750690 2456 u_int offset=l_addr-(u_int)ptr-8;
57871462 2457 assert(offset<4096);
2458 assert(!(offset&3));
2459 *ptr|=offset;
77750690 2460 if(l_addr==(u_int)out) {
2461 literals[i][0]=l_addr; // remember for dupes
2462 output_w32(literals[i][1]);
2463 }
57871462 2464 }
2465 literalcount=0;
2466}
2467
e2b5e7aa 2468static void literal_pool_jumpover(int n)
57871462 2469{
2470 if(!literalcount) return;
2471 if(n) {
2472 if((int)out-literals[0][0]<4096-n) return;
2473 }
df4dc2b1 2474 void *jaddr = out;
57871462 2475 emit_jmp(0);
2476 literal_pool(0);
df4dc2b1 2477 set_jump_target(jaddr, out);
57871462 2478}
2479
b14b6a8f 2480static void emit_extjump2(u_int addr, int target, void *linker)
57871462 2481{
2482 u_char *ptr=(u_char *)addr;
2483 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2484 (void)ptr;
2485
57871462 2486 emit_loadlp(target,0);
2487 emit_loadlp(addr,1);
24385cae 2488 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2489 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2490//DEBUG >
2491#ifdef DEBUG_CYCLE_COUNT
2492 emit_readword((int)&last_count,ECX);
2493 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2494 emit_readword((int)&next_interupt,ECX);
2495 emit_writeword(HOST_CCREG,(int)&Count);
2496 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2497 emit_writeword(ECX,(int)&last_count);
2498#endif
2499//DEBUG <
2500 emit_jmp(linker);
2501}
2502
e2b5e7aa 2503static void emit_extjump(int addr, int target)
57871462 2504{
b14b6a8f 2505 emit_extjump2(addr, target, dyna_linker);
57871462 2506}
e2b5e7aa 2507
2508static void emit_extjump_ds(int addr, int target)
57871462 2509{
b14b6a8f 2510 emit_extjump2(addr, target, dyna_linker_ds);
57871462 2511}
2512
13e35c04 2513// put rt_val into rt, potentially making use of rs with value rs_val
2514static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2515{
8575a877 2516 u_int armval;
2517 int diff;
2518 if(genimm(rt_val,&armval)) {
2519 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2520 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2521 return;
2522 }
2523 if(genimm(~rt_val,&armval)) {
2524 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2525 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2526 return;
2527 }
2528 diff=rt_val-rs_val;
2529 if(genimm(diff,&armval)) {
2530 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2531 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2532 return;
2533 }else if(genimm(-diff,&armval)) {
2534 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2535 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2536 return;
2537 }
2538 emit_movimm(rt_val,rt);
2539}
2540
2541// return 1 if above function can do it's job cheaply
2542static int is_similar_value(u_int v1,u_int v2)
2543{
13e35c04 2544 u_int xs;
8575a877 2545 int diff;
2546 if(v1==v2) return 1;
2547 diff=v2-v1;
2548 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2549 ;
8575a877 2550 if(xs<0x100) return 1;
2551 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2552 ;
2553 if(xs<0x100) return 1;
2554 return 0;
13e35c04 2555}
cbbab9cd 2556
b96d3df7 2557// trashes r2
2558static void pass_args(int a0, int a1)
2559{
2560 if(a0==1&&a1==0) {
2561 // must swap
2562 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2563 }
2564 else if(a0!=0&&a1==0) {
2565 emit_mov(a1,1);
2566 if (a0>=0) emit_mov(a0,0);
2567 }
2568 else {
2569 if(a0>=0&&a0!=0) emit_mov(a0,0);
2570 if(a1>=0&&a1!=1) emit_mov(a1,1);
2571 }
2572}
2573
b14b6a8f 2574static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 2575{
2576 switch(type) {
2577 case LOADB_STUB: emit_signextend8(rs,rt); break;
2578 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2579 case LOADH_STUB: emit_signextend16(rs,rt); break;
2580 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2581 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2582 default: assert(0);
2583 }
2584}
2585
b1be1eee 2586#include "pcsxmem.h"
2587#include "pcsxmem_inline.c"
b1be1eee 2588
e2b5e7aa 2589static void do_readstub(int n)
57871462 2590{
b14b6a8f 2591 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 2592 literal_pool(256);
b14b6a8f 2593 set_jump_target(stubs[n].addr, out);
2594 enum stub_type type=stubs[n].type;
2595 int i=stubs[n].a;
2596 int rs=stubs[n].b;
2597 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2598 u_int reglist=stubs[n].e;
57871462 2599 signed char *i_regmap=i_regs->regmap;
581335b0 2600 int rt;
b9b61529 2601 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2602 rt=get_reg(i_regmap,FTEMP);
2603 }else{
57871462 2604 rt=get_reg(i_regmap,rt1[i]);
2605 }
2606 assert(rs>=0);
df4dc2b1 2607 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2608 void *restore_jump = NULL;
c6c3b1b3 2609 reglist|=(1<<rs);
2610 for(r=0;r<=12;r++) {
2611 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2612 temp=r; break;
2613 }
2614 }
db829eeb 2615 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2616 reglist&=~(1<<rt);
2617 if(temp==-1) {
2618 save_regs(reglist);
2619 regs_saved=1;
2620 temp=(rs==0)?2:0;
2621 }
2622 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2623 temp2=1;
2624 emit_readword((int)&mem_rtab,temp);
2625 emit_shrimm(rs,12,temp2);
2626 emit_readword_dualindexedx4(temp,temp2,temp2);
2627 emit_lsls_imm(temp2,1,temp2);
2628 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2629 switch(type) {
2630 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2631 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2632 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2633 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2634 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 2635 default: assert(0);
c6c3b1b3 2636 }
2637 }
2638 if(regs_saved) {
df4dc2b1 2639 restore_jump=out;
c6c3b1b3 2640 emit_jcc(0); // jump to reg restore
2641 }
2642 else
b14b6a8f 2643 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 2644
2645 if(!regs_saved)
2646 save_regs(reglist);
2647 int handler=0;
2648 if(type==LOADB_STUB||type==LOADBU_STUB)
2649 handler=(int)jump_handler_read8;
2650 if(type==LOADH_STUB||type==LOADHU_STUB)
2651 handler=(int)jump_handler_read16;
2652 if(type==LOADW_STUB)
2653 handler=(int)jump_handler_read32;
2654 assert(handler!=0);
b96d3df7 2655 pass_args(rs,temp2);
c6c3b1b3 2656 int cc=get_reg(i_regmap,CCREG);
2657 if(cc<0)
2658 emit_loadreg(CCREG,2);
b14b6a8f 2659 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
c6c3b1b3 2660 emit_call(handler);
2661 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2662 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2663 }
2664 if(restore_jump)
df4dc2b1 2665 set_jump_target(restore_jump, out);
c6c3b1b3 2666 restore_regs(reglist);
b14b6a8f 2667 emit_jmp(stubs[n].retaddr); // return address
57871462 2668}
2669
c6c3b1b3 2670// return memhandler, or get directly accessable address and return 0
b14b6a8f 2671static u_int get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
c6c3b1b3 2672{
2673 u_int l1,l2=0;
2674 l1=((u_int *)table)[addr>>12];
2675 if((l1&(1<<31))==0) {
2676 u_int v=l1<<1;
2677 *addr_host=v+addr;
2678 return 0;
2679 }
2680 else {
2681 l1<<=1;
2682 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2683 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2684 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2685 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2686 else
2687 l2=((u_int *)l1)[(addr&0xfff)/4];
2688 if((l2&(1<<31))==0) {
2689 u_int v=l2<<1;
2690 *addr_host=v+(addr&0xfff);
2691 return 0;
2692 }
2693 return l2<<1;
2694 }
2695}
c6c3b1b3 2696
b14b6a8f 2697static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2698{
2699 int rs=get_reg(regmap,target);
57871462 2700 int rt=get_reg(regmap,target);
535d208a 2701 if(rs<0) rs=get_reg(regmap,-1);
57871462 2702 assert(rs>=0);
b1be1eee 2703 u_int handler,host_addr=0,is_dynamic,far_call=0;
2704 int cc=get_reg(regmap,CCREG);
2705 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2706 return;
c6c3b1b3 2707 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2708 if (handler==0) {
db829eeb 2709 if(rt<0||rt1[i]==0)
c6c3b1b3 2710 return;
13e35c04 2711 if(addr!=host_addr)
2712 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2713 switch(type) {
2714 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2715 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2716 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2717 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2718 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2719 default: assert(0);
2720 }
2721 return;
2722 }
b1be1eee 2723 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2724 if(is_dynamic) {
2725 if(type==LOADB_STUB||type==LOADBU_STUB)
2726 handler=(int)jump_handler_read8;
2727 if(type==LOADH_STUB||type==LOADHU_STUB)
2728 handler=(int)jump_handler_read16;
2729 if(type==LOADW_STUB)
2730 handler=(int)jump_handler_read32;
2731 }
c6c3b1b3 2732
2733 // call a memhandler
db829eeb 2734 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2735 reglist&=~(1<<rt);
2736 save_regs(reglist);
2737 if(target==0)
2738 emit_movimm(addr,0);
2739 else if(rs!=0)
2740 emit_mov(rs,0);
c6c3b1b3 2741 int offset=(int)handler-(int)out-8;
2742 if(offset<-33554432||offset>=33554432) {
2743 // unreachable memhandler, a plugin func perhaps
b1be1eee 2744 emit_movimm(handler,12);
2745 far_call=1;
2746 }
2747 if(cc<0)
2748 emit_loadreg(CCREG,2);
2749 if(is_dynamic) {
2750 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2751 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2752 }
b1be1eee 2753 else {
2754 emit_readword((int)&last_count,3);
2755 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2756 emit_add(2,3,2);
2757 emit_writeword(2,(int)&Count);
2758 }
2759
2760 if(far_call)
2761 emit_callreg(12);
c6c3b1b3 2762 else
2763 emit_call(handler);
b1be1eee 2764
db829eeb 2765 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2766 switch(type) {
2767 case LOADB_STUB: emit_signextend8(0,rt); break;
2768 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2769 case LOADH_STUB: emit_signextend16(0,rt); break;
2770 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2771 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2772 default: assert(0);
2773 }
2774 }
2775 restore_regs(reglist);
57871462 2776}
2777
e2b5e7aa 2778static void do_writestub(int n)
57871462 2779{
b14b6a8f 2780 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 2781 literal_pool(256);
b14b6a8f 2782 set_jump_target(stubs[n].addr, out);
2783 enum stub_type type=stubs[n].type;
2784 int i=stubs[n].a;
2785 int rs=stubs[n].b;
2786 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2787 u_int reglist=stubs[n].e;
57871462 2788 signed char *i_regmap=i_regs->regmap;
581335b0 2789 int rt,r;
b9b61529 2790 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2791 rt=get_reg(i_regmap,r=FTEMP);
2792 }else{
57871462 2793 rt=get_reg(i_regmap,r=rs2[i]);
2794 }
2795 assert(rs>=0);
2796 assert(rt>=0);
b14b6a8f 2797 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 2798 void *restore_jump = NULL;
b96d3df7 2799 int reglist2=reglist|(1<<rs)|(1<<rt);
2800 for(rtmp=0;rtmp<=12;rtmp++) {
2801 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2802 temp=rtmp; break;
2803 }
2804 }
2805 if(temp==-1) {
2806 save_regs(reglist);
2807 regs_saved=1;
2808 for(rtmp=0;rtmp<=3;rtmp++)
2809 if(rtmp!=rs&&rtmp!=rt)
2810 {temp=rtmp;break;}
2811 }
2812 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2813 temp2=3;
2814 emit_readword((int)&mem_wtab,temp);
2815 emit_shrimm(rs,12,temp2);
2816 emit_readword_dualindexedx4(temp,temp2,temp2);
2817 emit_lsls_imm(temp2,1,temp2);
2818 switch(type) {
2819 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2820 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2821 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2822 default: assert(0);
2823 }
2824 if(regs_saved) {
df4dc2b1 2825 restore_jump=out;
b96d3df7 2826 emit_jcc(0); // jump to reg restore
2827 }
2828 else
b14b6a8f 2829 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 2830
2831 if(!regs_saved)
2832 save_regs(reglist);
2833 int handler=0;
2834 switch(type) {
2835 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2836 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2837 case STOREW_STUB: handler=(int)jump_handler_write32; break;
b14b6a8f 2838 default: assert(0);
b96d3df7 2839 }
2840 assert(handler!=0);
2841 pass_args(rs,rt);
2842 if(temp2!=3)
2843 emit_mov(temp2,3);
2844 int cc=get_reg(i_regmap,CCREG);
2845 if(cc<0)
2846 emit_loadreg(CCREG,2);
b14b6a8f 2847 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
b96d3df7 2848 // returns new cycle_count
2849 emit_call(handler);
b14b6a8f 2850 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2851 if(cc<0)
2852 emit_storereg(CCREG,2);
2853 if(restore_jump)
df4dc2b1 2854 set_jump_target(restore_jump, out);
b96d3df7 2855 restore_regs(reglist);
b14b6a8f 2856 emit_jmp(stubs[n].retaddr);
57871462 2857}
2858
b14b6a8f 2859static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2860{
2861 int rs=get_reg(regmap,-1);
57871462 2862 int rt=get_reg(regmap,target);
2863 assert(rs>=0);
2864 assert(rt>=0);
b96d3df7 2865 u_int handler,host_addr=0;
b96d3df7 2866 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2867 if (handler==0) {
13e35c04 2868 if(addr!=host_addr)
2869 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2870 switch(type) {
2871 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2872 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2873 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2874 default: assert(0);
2875 }
2876 return;
2877 }
2878
2879 // call a memhandler
2880 save_regs(reglist);
13e35c04 2881 pass_args(rs,rt);
b96d3df7 2882 int cc=get_reg(regmap,CCREG);
2883 if(cc<0)
2884 emit_loadreg(CCREG,2);
2573466a 2885 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2886 emit_movimm(handler,3);
2887 // returns new cycle_count
2888 emit_call((int)jump_handler_write_h);
2573466a 2889 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2890 if(cc<0)
2891 emit_storereg(CCREG,2);
2892 restore_regs(reglist);
57871462 2893}
2894
e2b5e7aa 2895static void do_unalignedwritestub(int n)
57871462 2896{
b14b6a8f 2897 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
b7918751 2898 literal_pool(256);
b14b6a8f 2899 set_jump_target(stubs[n].addr, out);
b7918751 2900
b14b6a8f 2901 int i=stubs[n].a;
2902 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2903 int addr=stubs[n].b;
2904 u_int reglist=stubs[n].e;
b7918751 2905 signed char *i_regmap=i_regs->regmap;
2906 int temp2=get_reg(i_regmap,FTEMP);
2907 int rt;
b7918751 2908 rt=get_reg(i_regmap,rs2[i]);
2909 assert(rt>=0);
2910 assert(addr>=0);
2911 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2912 reglist|=(1<<addr);
2913 reglist&=~(1<<temp2);
2914
b96d3df7 2915#if 1
2916 // don't bother with it and call write handler
2917 save_regs(reglist);
2918 pass_args(addr,rt);
2919 int cc=get_reg(i_regmap,CCREG);
2920 if(cc<0)
2921 emit_loadreg(CCREG,2);
b14b6a8f 2922 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
b96d3df7 2923 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
b14b6a8f 2924 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2925 if(cc<0)
2926 emit_storereg(CCREG,2);
2927 restore_regs(reglist);
b14b6a8f 2928 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2929#else
b7918751 2930 emit_andimm(addr,0xfffffffc,temp2);
2931 emit_writeword(temp2,(int)&address);
2932
2933 save_regs(reglist);
b7918751 2934 emit_shrimm(addr,16,1);
2935 int cc=get_reg(i_regmap,CCREG);
2936 if(cc<0) {
2937 emit_loadreg(CCREG,2);
2938 }
2939 emit_movimm((u_int)readmem,0);
b14b6a8f 2940 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
b7918751 2941 emit_call((int)&indirect_jump_indexed);
2942 restore_regs(reglist);
2943
2944 emit_readword((int)&readmem_dword,temp2);
2945 int temp=addr; //hmh
2946 emit_shlimm(addr,3,temp);
2947 emit_andimm(temp,24,temp);
2948#ifdef BIG_ENDIAN_MIPS
2949 if (opcode[i]==0x2e) // SWR
2950#else
2951 if (opcode[i]==0x2a) // SWL
2952#endif
2953 emit_xorimm(temp,24,temp);
2954 emit_movimm(-1,HOST_TEMPREG);
55439448 2955 if (opcode[i]==0x2a) { // SWL
b7918751 2956 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2957 emit_orrshr(rt,temp,temp2);
2958 }else{
2959 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2960 emit_orrshl(rt,temp,temp2);
2961 }
2962 emit_readword((int)&address,addr);
2963 emit_writeword(temp2,(int)&word);
2964 //save_regs(reglist); // don't need to, no state changes
2965 emit_shrimm(addr,16,1);
2966 emit_movimm((u_int)writemem,0);
2967 //emit_call((int)&indirect_jump_indexed);
2968 emit_mov(15,14);
2969 emit_readword_dualindexedx4(0,1,15);
2970 emit_readword((int)&Count,HOST_TEMPREG);
2971 emit_readword((int)&next_interupt,2);
b14b6a8f 2972 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
b7918751 2973 emit_writeword(2,(int)&last_count);
2974 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2975 if(cc<0) {
2976 emit_storereg(CCREG,HOST_TEMPREG);
2977 }
2978 restore_regs(reglist);
b14b6a8f 2979 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2980#endif
57871462 2981}
2982
e2b5e7aa 2983static void do_invstub(int n)
57871462 2984{
2985 literal_pool(20);
b14b6a8f 2986 u_int reglist=stubs[n].a;
2987 set_jump_target(stubs[n].addr, out);
57871462 2988 save_regs(reglist);
b14b6a8f 2989 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
57871462 2990 emit_call((int)&invalidate_addr);
2991 restore_regs(reglist);
b14b6a8f 2992 emit_jmp(stubs[n].retaddr); // return address
57871462 2993}
2994
df4dc2b1 2995void *do_dirty_stub(int i)
57871462 2996{
2997 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2998 u_int addr=(u_int)source;
57871462 2999 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3000 #ifndef HAVE_ARMV7
ac545b3a 3001 emit_loadlp(addr,1);
57871462 3002 emit_loadlp((int)copy,2);
3003 emit_loadlp(slen*4,3);
3004 #else
ac545b3a 3005 emit_movw(addr&0x0000FFFF,1);
57871462 3006 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3007 emit_movt(addr&0xFFFF0000,1);
57871462 3008 emit_movt(((u_int)copy)&0xFFFF0000,2);
3009 emit_movw(slen*4,3);
3010 #endif
3011 emit_movimm(start+i*4,0);
3012 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
df4dc2b1 3013 void *entry = out;
57871462 3014 load_regs_entry(i);
df4dc2b1 3015 if (entry == out)
3016 entry = instr_addr[i];
57871462 3017 emit_jmp(instr_addr[i]);
3018 return entry;
3019}
3020
e2b5e7aa 3021static void do_dirty_stub_ds()
57871462 3022{
3023 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3024 #ifndef HAVE_ARMV7
57871462 3025 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3026 emit_loadlp((int)copy,2);
3027 emit_loadlp(slen*4,3);
3028 #else
3029 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3030 emit_movw(((u_int)copy)&0x0000FFFF,2);
3031 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3032 emit_movt(((u_int)copy)&0xFFFF0000,2);
3033 emit_movw(slen*4,3);
3034 #endif
3035 emit_movimm(start+1,0);
3036 emit_call((int)&verify_code_ds);
3037}
3038
e2b5e7aa 3039static void do_cop1stub(int n)
57871462 3040{
3041 literal_pool(256);
b14b6a8f 3042 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
3043 set_jump_target(stubs[n].addr, out);
3044 int i=stubs[n].a;
3045// int rs=stubs[n].b;
3046 struct regstat *i_regs=(struct regstat *)stubs[n].c;
3047 int ds=stubs[n].d;
57871462 3048 if(!ds) {
3049 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3050 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3051 }
3052 //else {printf("fp exception in delay slot\n");}
3053 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3054 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3055 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3056 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
b14b6a8f 3057 emit_jmp(ds?fp_exception_ds:fp_exception);
57871462 3058}
3059
57871462 3060/* Special assem */
3061
e2b5e7aa 3062static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 3063{
3064 if(rt1[i]) {
3065 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3066 {
3067 signed char s,t,shift;
3068 t=get_reg(i_regs->regmap,rt1[i]);
3069 s=get_reg(i_regs->regmap,rs1[i]);
3070 shift=get_reg(i_regs->regmap,rs2[i]);
3071 if(t>=0){
3072 if(rs1[i]==0)
3073 {
3074 emit_zeroreg(t);
3075 }
3076 else if(rs2[i]==0)
3077 {
3078 assert(s>=0);
3079 if(s!=t) emit_mov(s,t);
3080 }
3081 else
3082 {
3083 emit_andimm(shift,31,HOST_TEMPREG);
3084 if(opcode2[i]==4) // SLLV
3085 {
3086 emit_shl(s,HOST_TEMPREG,t);
3087 }
3088 if(opcode2[i]==6) // SRLV
3089 {
3090 emit_shr(s,HOST_TEMPREG,t);
3091 }
3092 if(opcode2[i]==7) // SRAV
3093 {
3094 emit_sar(s,HOST_TEMPREG,t);
3095 }
3096 }
3097 }
3098 } else { // DSLLV/DSRLV/DSRAV
3099 signed char sh,sl,th,tl,shift;
3100 th=get_reg(i_regs->regmap,rt1[i]|64);
3101 tl=get_reg(i_regs->regmap,rt1[i]);
3102 sh=get_reg(i_regs->regmap,rs1[i]|64);
3103 sl=get_reg(i_regs->regmap,rs1[i]);
3104 shift=get_reg(i_regs->regmap,rs2[i]);
3105 if(tl>=0){
3106 if(rs1[i]==0)
3107 {
3108 emit_zeroreg(tl);
3109 if(th>=0) emit_zeroreg(th);
3110 }
3111 else if(rs2[i]==0)
3112 {
3113 assert(sl>=0);
3114 if(sl!=tl) emit_mov(sl,tl);
3115 if(th>=0&&sh!=th) emit_mov(sh,th);
3116 }
3117 else
3118 {
3119 // FIXME: What if shift==tl ?
3120 assert(shift!=tl);
3121 int temp=get_reg(i_regs->regmap,-1);
3122 int real_th=th;
3123 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3124 assert(sl>=0);
3125 assert(sh>=0);
3126 emit_andimm(shift,31,HOST_TEMPREG);
3127 if(opcode2[i]==0x14) // DSLLV
3128 {
3129 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3130 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3131 emit_orrshr(sl,HOST_TEMPREG,th);
3132 emit_andimm(shift,31,HOST_TEMPREG);
3133 emit_testimm(shift,32);
3134 emit_shl(sl,HOST_TEMPREG,tl);
3135 if(th>=0) emit_cmovne_reg(tl,th);
3136 emit_cmovne_imm(0,tl);
3137 }
3138 if(opcode2[i]==0x16) // DSRLV
3139 {
3140 assert(th>=0);
3141 emit_shr(sl,HOST_TEMPREG,tl);
3142 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3143 emit_orrshl(sh,HOST_TEMPREG,tl);
3144 emit_andimm(shift,31,HOST_TEMPREG);
3145 emit_testimm(shift,32);
3146 emit_shr(sh,HOST_TEMPREG,th);
3147 emit_cmovne_reg(th,tl);
3148 if(real_th>=0) emit_cmovne_imm(0,th);
3149 }
3150 if(opcode2[i]==0x17) // DSRAV
3151 {
3152 assert(th>=0);
3153 emit_shr(sl,HOST_TEMPREG,tl);
3154 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3155 if(real_th>=0) {
3156 assert(temp>=0);
3157 emit_sarimm(th,31,temp);
3158 }
3159 emit_orrshl(sh,HOST_TEMPREG,tl);
3160 emit_andimm(shift,31,HOST_TEMPREG);
3161 emit_testimm(shift,32);
3162 emit_sar(sh,HOST_TEMPREG,th);
3163 emit_cmovne_reg(th,tl);
3164 if(real_th>=0) emit_cmovne_reg(temp,th);
3165 }
3166 }
3167 }
3168 }
3169 }
3170}
ffb0b9e0 3171
ffb0b9e0 3172static void speculate_mov(int rs,int rt)
3173{
3174 if(rt!=0) {
3175 smrv_strong_next|=1<<rt;
3176 smrv[rt]=smrv[rs];
3177 }
3178}
3179
3180static void speculate_mov_weak(int rs,int rt)
3181{
3182 if(rt!=0) {
3183 smrv_weak_next|=1<<rt;
3184 smrv[rt]=smrv[rs];
3185 }
3186}
3187
3188static void speculate_register_values(int i)
3189{
3190 if(i==0) {
3191 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3192 // gp,sp are likely to stay the same throughout the block
3193 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3194 smrv_weak_next=~smrv_strong_next;
3195 //printf(" llr %08x\n", smrv[4]);
3196 }
3197 smrv_strong=smrv_strong_next;
3198 smrv_weak=smrv_weak_next;
3199 switch(itype[i]) {
3200 case ALU:
3201 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3202 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3203 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3204 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3205 else {
3206 smrv_strong_next&=~(1<<rt1[i]);
3207 smrv_weak_next&=~(1<<rt1[i]);
3208 }
3209 break;
3210 case SHIFTIMM:
3211 smrv_strong_next&=~(1<<rt1[i]);
3212 smrv_weak_next&=~(1<<rt1[i]);
3213 // fallthrough
3214 case IMM16:
3215 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3216 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3217 if(hr>=0) {
3218 if(get_final_value(hr,i,&value))
3219 smrv[rt1[i]]=value;
3220 else smrv[rt1[i]]=constmap[i][hr];
3221 smrv_strong_next|=1<<rt1[i];
3222 }
3223 }
3224 else {
3225 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3226 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3227 }
3228 break;
3229 case LOAD:
3230 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3231 // special case for BIOS
3232 smrv[rt1[i]]=0xa0000000;
3233 smrv_strong_next|=1<<rt1[i];
3234 break;
3235 }
3236 // fallthrough
3237 case SHIFT:
3238 case LOADLR:
3239 case MOV:
3240 smrv_strong_next&=~(1<<rt1[i]);
3241 smrv_weak_next&=~(1<<rt1[i]);
3242 break;
3243 case COP0:
3244 case COP2:
3245 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3246 smrv_strong_next&=~(1<<rt1[i]);
3247 smrv_weak_next&=~(1<<rt1[i]);
3248 }
3249 break;
3250 case C2LS:
3251 if (opcode[i]==0x32) { // LWC2
3252 smrv_strong_next&=~(1<<rt1[i]);
3253 smrv_weak_next&=~(1<<rt1[i]);
3254 }
3255 break;
3256 }
3257#if 0
3258 int r=4;
3259 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3260 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3261#endif
3262}
3263
3264enum {
3265 MTYPE_8000 = 0,
3266 MTYPE_8020,
3267 MTYPE_0000,
3268 MTYPE_A000,
3269 MTYPE_1F80,
3270};
3271
3272static int get_ptr_mem_type(u_int a)
3273{
3274 if(a < 0x00200000) {
3275 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3276 // return wrong, must use memhandler for BIOS self-test to pass
3277 // 007 does similar stuff from a00 mirror, weird stuff
3278 return MTYPE_8000;
3279 return MTYPE_0000;
3280 }
3281 if(0x1f800000 <= a && a < 0x1f801000)
3282 return MTYPE_1F80;
3283 if(0x80200000 <= a && a < 0x80800000)
3284 return MTYPE_8020;
3285 if(0xa0000000 <= a && a < 0xa0200000)
3286 return MTYPE_A000;
3287 return MTYPE_8000;
3288}
ffb0b9e0 3289
b14b6a8f 3290static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
ffb0b9e0 3291{
b14b6a8f 3292 void *jaddr = NULL;
3293 int type=0;
ffb0b9e0 3294 int mr=rs1[i];
3295 if(((smrv_strong|smrv_weak)>>mr)&1) {
3296 type=get_ptr_mem_type(smrv[mr]);
3297 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3298 }
3299 else {
3300 // use the mirror we are running on
3301 type=get_ptr_mem_type(start);
3302 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3303 }
3304
3305 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3306 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3307 addr=*addr_reg_override=HOST_TEMPREG;
3308 type=0;
3309 }
3310 else if(type==MTYPE_0000) { // RAM 0 mirror
3311 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3312 addr=*addr_reg_override=HOST_TEMPREG;
3313 type=0;
3314 }
3315 else if(type==MTYPE_A000) { // RAM A mirror
3316 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3317 addr=*addr_reg_override=HOST_TEMPREG;
3318 type=0;
3319 }
3320 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3321 if (psxH == (void *)0x1f800000) {
3322 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3323 emit_cmpimm(HOST_TEMPREG,0x1000);
b14b6a8f 3324 jaddr=out;
6d760c92 3325 emit_jc(0);
3326 }
3327 else {
3328 // do usual RAM check, jump will go to the right handler
3329 type=0;
3330 }
ffb0b9e0 3331 }
ffb0b9e0 3332
3333 if(type==0)
3334 {
3335 emit_cmpimm(addr,RAM_SIZE);
b14b6a8f 3336 jaddr=out;
ffb0b9e0 3337 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3338 // Hint to branch predictor that the branch is unlikely to be taken
3339 if(rs1[i]>=28)
3340 emit_jno_unlikely(0);
3341 else
3342 #endif
3343 emit_jno(0);
a327ad27 3344 if(ram_offset!=0) {
3345 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3346 addr=*addr_reg_override=HOST_TEMPREG;
3347 }
ffb0b9e0 3348 }
3349
3350 return jaddr;
3351}
3352
57871462 3353#define shift_assemble shift_assemble_arm
3354
e2b5e7aa 3355static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3356{
3357 int s,th,tl,temp,temp2,addr,map=-1;
3358 int offset;
b14b6a8f 3359 void *jaddr=0;
af4ee1fe 3360 int memtarget=0,c=0;
ffb0b9e0 3361 int fastload_reg_override=0;
57871462 3362 u_int hr,reglist=0;
3363 th=get_reg(i_regs->regmap,rt1[i]|64);
3364 tl=get_reg(i_regs->regmap,rt1[i]);
3365 s=get_reg(i_regs->regmap,rs1[i]);
3366 temp=get_reg(i_regs->regmap,-1);
3367 temp2=get_reg(i_regs->regmap,FTEMP);
3368 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3369 assert(addr<0);
3370 offset=imm[i];
3371 for(hr=0;hr<HOST_REGS;hr++) {
3372 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3373 }
3374 reglist|=1<<temp;
3375 if(offset||s<0||c) addr=temp2;
3376 else addr=s;
3377 if(s>=0) {
3378 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3379 if(c) {
3380 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3381 }
57871462 3382 }
1edfcc68 3383 if(!c) {
3384 #ifdef RAM_OFFSET
3385 map=get_reg(i_regs->regmap,ROREG);
3386 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3387 #endif
3388 emit_shlimm(addr,3,temp);
3389 if (opcode[i]==0x22||opcode[i]==0x26) {
3390 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3391 }else{
3392 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3393 }
1edfcc68 3394 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3395 }
3396 else {
3397 if(ram_offset&&memtarget) {
3398 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3399 fastload_reg_override=HOST_TEMPREG;
57871462 3400 }
1edfcc68 3401 if (opcode[i]==0x22||opcode[i]==0x26) {
3402 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3403 }else{
1edfcc68 3404 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3405 }
535d208a 3406 }
3407 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3408 if(!c||memtarget) {
ffb0b9e0 3409 int a=temp2;
3410 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3411 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3412 emit_readword_indexed_tlb(0,a,map,temp2);
b14b6a8f 3413 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
535d208a 3414 }
3415 else
3416 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3417 if(rt1[i]) {
3418 assert(tl>=0);
57871462 3419 emit_andimm(temp,24,temp);
2002a1db 3420#ifdef BIG_ENDIAN_MIPS
3421 if (opcode[i]==0x26) // LWR
3422#else
3423 if (opcode[i]==0x22) // LWL
3424#endif
3425 emit_xorimm(temp,24,temp);
57871462 3426 emit_movimm(-1,HOST_TEMPREG);
3427 if (opcode[i]==0x26) {
3428 emit_shr(temp2,temp,temp2);
3429 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3430 }else{
3431 emit_shl(temp2,temp,temp2);
3432 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3433 }
3434 emit_or(temp2,tl,tl);
57871462 3435 }
535d208a 3436 //emit_storereg(rt1[i],tl); // DEBUG
3437 }
3438 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3439 // FIXME: little endian, fastload_reg_override
535d208a 3440 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3441 if(!c||memtarget) {
3442 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3443 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3444 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
b14b6a8f 3445 if(jaddr) add_stub_r(LOADD_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
535d208a 3446 }
3447 else
3448 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3449 if(rt1[i]) {
3450 assert(th>=0);
3451 assert(tl>=0);
57871462 3452 emit_testimm(temp,32);
3453 emit_andimm(temp,24,temp);
3454 if (opcode[i]==0x1A) { // LDL
3455 emit_rsbimm(temp,32,HOST_TEMPREG);
3456 emit_shl(temp2h,temp,temp2h);
3457 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3458 emit_movimm(-1,HOST_TEMPREG);
3459 emit_shl(temp2,temp,temp2);
3460 emit_cmove_reg(temp2h,th);
3461 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3462 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3463 emit_orreq(temp2,tl,tl);
3464 emit_orrne(temp2,th,th);
3465 }
3466 if (opcode[i]==0x1B) { // LDR
3467 emit_xorimm(temp,24,temp);
3468 emit_rsbimm(temp,32,HOST_TEMPREG);
3469 emit_shr(temp2,temp,temp2);
3470 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3471 emit_movimm(-1,HOST_TEMPREG);
3472 emit_shr(temp2h,temp,temp2h);
3473 emit_cmovne_reg(temp2,tl);
3474 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3475 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3476 emit_orrne(temp2h,th,th);
3477 emit_orreq(temp2h,tl,tl);
3478 }
3479 }
3480 }
3481}
3482#define loadlr_assemble loadlr_assemble_arm
3483
e2b5e7aa 3484static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3485{
3486 if(opcode2[i]==0) // MFC0
3487 {
3488 signed char t=get_reg(i_regs->regmap,rt1[i]);
3489 char copr=(source[i]>>11)&0x1f;
3490 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3491 if(t>=0&&rt1[i]!=0) {
7139f3c8 3492 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3493 }
3494 }
3495 else if(opcode2[i]==4) // MTC0
3496 {
3497 signed char s=get_reg(i_regs->regmap,rs1[i]);
3498 char copr=(source[i]>>11)&0x1f;
3499 assert(s>=0);
63cb0298 3500 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3501 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3502 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3503 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3504 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3505 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3506 emit_writeword(HOST_CCREG,(int)&Count);
3507 }
3508 // What a mess. The status register (12) can enable interrupts,
3509 // so needs a special case to handle a pending interrupt.
3510 // The interrupt must be taken immediately, because a subsequent
3511 // instruction might disable interrupts again.
7139f3c8 3512 if(copr==12||copr==13) {
fca1aef2 3513 if (is_delayslot) {
3514 // burn cycles to cause cc_interrupt, which will
3515 // reschedule next_interupt. Relies on CCREG from above.
3516 assem_debug("MTC0 DS %d\n", copr);
3517 emit_writeword(HOST_CCREG,(int)&last_count);
3518 emit_movimm(0,HOST_CCREG);
3519 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3520 emit_loadreg(rs1[i],1);
fca1aef2 3521 emit_movimm(copr,0);
3522 emit_call((int)pcsx_mtc0_ds);
042c7287 3523 emit_loadreg(rs1[i],s);
fca1aef2 3524 return;
3525 }
63cb0298 3526 emit_movimm(start+i*4+4,HOST_TEMPREG);
3527 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3528 emit_movimm(0,HOST_TEMPREG);
3529 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3530 }
3531 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3532 //else
caeefe31 3533 if(s==HOST_CCREG)
3534 emit_loadreg(rs1[i],1);
3535 else if(s!=1)
63cb0298 3536 emit_mov(s,1);
fca1aef2 3537 emit_movimm(copr,0);
3538 emit_call((int)pcsx_mtc0);
7139f3c8 3539 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3540 emit_readword((int)&Count,HOST_CCREG);
042c7287 3541 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3542 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3543 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3544 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3545 emit_storereg(CCREG,HOST_CCREG);
3546 }
7139f3c8 3547 if(copr==12||copr==13) {
57871462 3548 assert(!is_delayslot);
3549 emit_readword((int)&pending_exception,14);
042c7287 3550 emit_test(14,14);
3551 emit_jne((int)&do_interrupt);
57871462 3552 }
3553 emit_loadreg(rs1[i],s);
3554 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3555 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3556 cop1_usable=0;
3557 }
3558 else
3559 {
3560 assert(opcode2[i]==0x10);
576bbd8f 3561 if((source[i]&0x3f)==0x10) // RFE
3562 {
3563 emit_readword((int)&Status,0);
3564 emit_andimm(0,0x3c,1);
3565 emit_andimm(0,~0xf,0);
3566 emit_orrshr_imm(1,2,0);
3567 emit_writeword(0,(int)&Status);
3568 }
57871462 3569 }
3570}
3571
b9b61529 3572static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3573{
3574 switch (copr) {
3575 case 1:
3576 case 3:
3577 case 5:
3578 case 8:
3579 case 9:
3580 case 10:
3581 case 11:
3582 emit_readword((int)&reg_cop2d[copr],tl);
3583 emit_signextend16(tl,tl);
3584 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3585 break;
3586 case 7:
3587 case 16:
3588 case 17:
3589 case 18:
3590 case 19:
3591 emit_readword((int)&reg_cop2d[copr],tl);
3592 emit_andimm(tl,0xffff,tl);
3593 emit_writeword(tl,(int)&reg_cop2d[copr]);
3594 break;
3595 case 15:
3596 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3597 emit_writeword(tl,(int)&reg_cop2d[copr]);
3598 break;
3599 case 28:
b9b61529 3600 case 29:
3601 emit_readword((int)&reg_cop2d[9],temp);
3602 emit_testimm(temp,0x8000); // do we need this?
3603 emit_andimm(temp,0xf80,temp);
3604 emit_andne_imm(temp,0,temp);
f70d384d 3605 emit_shrimm(temp,7,tl);
b9b61529 3606 emit_readword((int)&reg_cop2d[10],temp);
3607 emit_testimm(temp,0x8000);
3608 emit_andimm(temp,0xf80,temp);
3609 emit_andne_imm(temp,0,temp);
f70d384d 3610 emit_orrshr_imm(temp,2,tl);
b9b61529 3611 emit_readword((int)&reg_cop2d[11],temp);
3612 emit_testimm(temp,0x8000);
3613 emit_andimm(temp,0xf80,temp);
3614 emit_andne_imm(temp,0,temp);
f70d384d 3615 emit_orrshl_imm(temp,3,tl);
b9b61529 3616 emit_writeword(tl,(int)&reg_cop2d[copr]);
3617 break;
3618 default:
3619 emit_readword((int)&reg_cop2d[copr],tl);
3620 break;
3621 }
3622}
3623
3624static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3625{
3626 switch (copr) {
3627 case 15:
3628 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3629 emit_writeword(sl,(int)&reg_cop2d[copr]);
3630 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3631 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3632 emit_writeword(sl,(int)&reg_cop2d[14]);
3633 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3634 break;
3635 case 28:
3636 emit_andimm(sl,0x001f,temp);
f70d384d 3637 emit_shlimm(temp,7,temp);
b9b61529 3638 emit_writeword(temp,(int)&reg_cop2d[9]);
3639 emit_andimm(sl,0x03e0,temp);
f70d384d 3640 emit_shlimm(temp,2,temp);
b9b61529 3641 emit_writeword(temp,(int)&reg_cop2d[10]);
3642 emit_andimm(sl,0x7c00,temp);
f70d384d 3643 emit_shrimm(temp,3,temp);
b9b61529 3644 emit_writeword(temp,(int)&reg_cop2d[11]);
3645 emit_writeword(sl,(int)&reg_cop2d[28]);
3646 break;
3647 case 30:
3648 emit_movs(sl,temp);
3649 emit_mvnmi(temp,temp);
665f33e1 3650#ifdef HAVE_ARMV5
b9b61529 3651 emit_clz(temp,temp);
665f33e1 3652#else
3653 emit_movs(temp,HOST_TEMPREG);
3654 emit_movimm(0,temp);
3655 emit_jeq((int)out+4*4);
3656 emit_addpl_imm(temp,1,temp);
3657 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3658 emit_jns((int)out-2*4);
3659#endif
b9b61529 3660 emit_writeword(sl,(int)&reg_cop2d[30]);
3661 emit_writeword(temp,(int)&reg_cop2d[31]);
3662 break;
b9b61529 3663 case 31:
3664 break;
3665 default:
3666 emit_writeword(sl,(int)&reg_cop2d[copr]);
3667 break;
3668 }
3669}
3670
e2b5e7aa 3671static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3672{
3673 u_int copr=(source[i]>>11)&0x1f;
3674 signed char temp=get_reg(i_regs->regmap,-1);
3675 if (opcode2[i]==0) { // MFC2
3676 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3677 if(tl>=0&&rt1[i]!=0)
b9b61529 3678 cop2_get_dreg(copr,tl,temp);
3679 }
3680 else if (opcode2[i]==4) { // MTC2
3681 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3682 cop2_put_dreg(copr,sl,temp);
3683 }
3684 else if (opcode2[i]==2) // CFC2
3685 {
3686 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3687 if(tl>=0&&rt1[i]!=0)
b9b61529 3688 emit_readword((int)&reg_cop2c[copr],tl);
3689 }
3690 else if (opcode2[i]==6) // CTC2
3691 {
3692 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3693 switch(copr) {
3694 case 4:
3695 case 12:
3696 case 20:
3697 case 26:
3698 case 27:
3699 case 29:
3700 case 30:
3701 emit_signextend16(sl,temp);
3702 break;
3703 case 31:
3704 //value = value & 0x7ffff000;
3705 //if (value & 0x7f87e000) value |= 0x80000000;
3706 emit_shrimm(sl,12,temp);
3707 emit_shlimm(temp,12,temp);
3708 emit_testimm(temp,0x7f000000);
3709 emit_testeqimm(temp,0x00870000);
3710 emit_testeqimm(temp,0x0000e000);
3711 emit_orrne_imm(temp,0x80000000,temp);
3712 break;
3713 default:
3714 temp=sl;
3715 break;
3716 }
3717 emit_writeword(temp,(int)&reg_cop2c[copr]);
3718 assert(sl>=0);
3719 }
3720}
3721
054175e9 3722static void c2op_prologue(u_int op,u_int reglist)
3723{
3724 save_regs_all(reglist);
82ed88eb 3725#ifdef PCNT
3726 emit_movimm(op,0);
3727 emit_call((int)pcnt_gte_start);
3728#endif
054175e9 3729 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3730}
3731
3732static void c2op_epilogue(u_int op,u_int reglist)
3733{
82ed88eb 3734#ifdef PCNT
3735 emit_movimm(op,0);
3736 emit_call((int)pcnt_gte_end);
3737#endif
054175e9 3738 restore_regs_all(reglist);
3739}
3740
6c0eefaf 3741static void c2op_call_MACtoIR(int lm,int need_flags)
3742{
3743 if(need_flags)
3744 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3745 else
3746 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3747}
3748
3749static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3750{
3751 emit_call((int)func);
3752 // func is C code and trashes r0
3753 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3754 if(need_flags||need_ir)
3755 c2op_call_MACtoIR(lm,need_flags);
3756 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3757}
3758
054175e9 3759static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3760{
b9b61529 3761 u_int c2op=source[i]&0x3f;
6c0eefaf 3762 u_int hr,reglist_full=0,reglist;
054175e9 3763 int need_flags,need_ir;
b9b61529 3764 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3765 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3766 }
4d646738 3767 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3768
3769 if (gte_handlers[c2op]!=NULL) {
bedfea38 3770 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3771 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3772 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3773 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3774 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3775 need_flags=0;
6c0eefaf 3776 int shift = (source[i] >> 19) & 1;
3777 int lm = (source[i] >> 10) & 1;
054175e9 3778 switch(c2op) {
19776aef 3779#ifndef DRC_DBG
054175e9 3780 case GTE_MVMVA: {
82336ba3 3781#ifdef HAVE_ARMV5
054175e9 3782 int v = (source[i] >> 15) & 3;
3783 int cv = (source[i] >> 13) & 3;
3784 int mx = (source[i] >> 17) & 3;
4d646738 3785 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3786 c2op_prologue(c2op,reglist);
3787 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3788 if(v<3)
3789 emit_ldrd(v*8,0,4);
3790 else {
3791 emit_movzwl_indexed(9*4,0,4); // gteIR
3792 emit_movzwl_indexed(10*4,0,6);
3793 emit_movzwl_indexed(11*4,0,5);
3794 emit_orrshl_imm(6,16,4);
3795 }
3796 if(mx<3)
3797 emit_addimm(0,32*4+mx*8*4,6);
3798 else
3799 emit_readword((int)&zeromem_ptr,6);
3800 if(cv<3)
3801 emit_addimm(0,32*4+(cv*8+5)*4,7);
3802 else
3803 emit_readword((int)&zeromem_ptr,7);
3804#ifdef __ARM_NEON__
3805 emit_movimm(source[i],1); // opcode
3806 emit_call((int)gteMVMVA_part_neon);
3807 if(need_flags) {
3808 emit_movimm(lm,1);
3809 emit_call((int)gteMACtoIR_flags_neon);
3810 }
3811#else
3812 if(cv==3&&shift)
3813 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3814 else {
3815 emit_movimm(shift,1);
3816 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3817 }
6c0eefaf 3818 if(need_flags||need_ir)
3819 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3820#endif
3821#else /* if not HAVE_ARMV5 */
3822 c2op_prologue(c2op,reglist);
3823 emit_movimm(source[i],1); // opcode
3824 emit_writeword(1,(int)&psxRegs.code);
3825 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3826#endif
3827 break;
3828 }
6c0eefaf 3829 case GTE_OP:
3830 c2op_prologue(c2op,reglist);
3831 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3832 if(need_flags||need_ir) {
3833 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3834 c2op_call_MACtoIR(lm,need_flags);
3835 }
3836 break;
3837 case GTE_DPCS:
3838 c2op_prologue(c2op,reglist);
3839 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3840 break;
3841 case GTE_INTPL:
3842 c2op_prologue(c2op,reglist);
3843 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3844 break;
3845 case GTE_SQR:
3846 c2op_prologue(c2op,reglist);
3847 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3848 if(need_flags||need_ir) {
3849 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3850 c2op_call_MACtoIR(lm,need_flags);
3851 }
3852 break;
3853 case GTE_DCPL:
3854 c2op_prologue(c2op,reglist);
3855 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3856 break;
3857 case GTE_GPF:
3858 c2op_prologue(c2op,reglist);
3859 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3860 break;
3861 case GTE_GPL:
3862 c2op_prologue(c2op,reglist);
3863 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3864 break;
19776aef 3865#endif
054175e9 3866 default:
054175e9 3867 c2op_prologue(c2op,reglist);
19776aef 3868#ifdef DRC_DBG
3869 emit_movimm(source[i],1); // opcode
3870 emit_writeword(1,(int)&psxRegs.code);
3871#endif
054175e9 3872 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3873 break;
3874 }
3875 c2op_epilogue(c2op,reglist);
3876 }
b9b61529 3877}
3878
e2b5e7aa 3879static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3880{
3881 // XXX: should just just do the exception instead
3882 if(!cop1_usable) {
b14b6a8f 3883 void *jaddr=out;
3d624f89 3884 emit_jmp(0);
b14b6a8f 3885 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3d624f89 3886 cop1_usable=1;
3887 }
3888}
3889
e2b5e7aa 3890static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3891{
3d624f89 3892 cop1_unusable(i, i_regs);
57871462 3893}
3894
e2b5e7aa 3895static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3896{
3d624f89 3897 cop1_unusable(i, i_regs);
57871462 3898}
3899#define fconv_assemble fconv_assemble_arm
3900
e2b5e7aa 3901static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3902{
3d624f89 3903 cop1_unusable(i, i_regs);
57871462 3904}
3905
e2b5e7aa 3906static void float_assemble(int i,struct regstat *i_regs)
57871462 3907{
3d624f89 3908 cop1_unusable(i, i_regs);
57871462 3909}
3910
e2b5e7aa 3911static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3912{
3913 // case 0x18: MULT
3914 // case 0x19: MULTU
3915 // case 0x1A: DIV
3916 // case 0x1B: DIVU
3917 // case 0x1C: DMULT
3918 // case 0x1D: DMULTU
3919 // case 0x1E: DDIV
3920 // case 0x1F: DDIVU
3921 if(rs1[i]&&rs2[i])
3922 {
3923 if((opcode2[i]&4)==0) // 32-bit
3924 {
3925 if(opcode2[i]==0x18) // MULT
3926 {
3927 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3928 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3929 signed char hi=get_reg(i_regs->regmap,HIREG);
3930 signed char lo=get_reg(i_regs->regmap,LOREG);
3931 assert(m1>=0);
3932 assert(m2>=0);
3933 assert(hi>=0);
3934 assert(lo>=0);
3935 emit_smull(m1,m2,hi,lo);
3936 }
3937 if(opcode2[i]==0x19) // MULTU
3938 {
3939 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3940 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3941 signed char hi=get_reg(i_regs->regmap,HIREG);
3942 signed char lo=get_reg(i_regs->regmap,LOREG);
3943 assert(m1>=0);
3944 assert(m2>=0);
3945 assert(hi>=0);
3946 assert(lo>=0);
3947 emit_umull(m1,m2,hi,lo);
3948 }
3949 if(opcode2[i]==0x1A) // DIV
3950 {
3951 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3952 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3953 assert(d1>=0);
3954 assert(d2>=0);
3955 signed char quotient=get_reg(i_regs->regmap,LOREG);
3956 signed char remainder=get_reg(i_regs->regmap,HIREG);
3957 assert(quotient>=0);
3958 assert(remainder>=0);
3959 emit_movs(d1,remainder);
44a80f6a 3960 emit_movimm(0xffffffff,quotient);
3961 emit_negmi(quotient,quotient); // .. quotient and ..
3962 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3963 emit_movs(d2,HOST_TEMPREG);
3964 emit_jeq((int)out+52); // Division by zero
82336ba3 3965 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3966#ifdef HAVE_ARMV5
57871462 3967 emit_clz(HOST_TEMPREG,quotient);
3968 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3969#else
3970 emit_movimm(0,quotient);
3971 emit_addpl_imm(quotient,1,quotient);
3972 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3973 emit_jns((int)out-2*4);
3974#endif
57871462 3975 emit_orimm(quotient,1<<31,quotient);
3976 emit_shr(quotient,quotient,quotient);
3977 emit_cmp(remainder,HOST_TEMPREG);
3978 emit_subcs(remainder,HOST_TEMPREG,remainder);
3979 emit_adcs(quotient,quotient,quotient);
3980 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 3981 emit_jcc(out-16); // -4
57871462 3982 emit_teq(d1,d2);
3983 emit_negmi(quotient,quotient);
3984 emit_test(d1,d1);
3985 emit_negmi(remainder,remainder);
3986 }
3987 if(opcode2[i]==0x1B) // DIVU
3988 {
3989 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3990 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3991 assert(d1>=0);
3992 assert(d2>=0);
3993 signed char quotient=get_reg(i_regs->regmap,LOREG);
3994 signed char remainder=get_reg(i_regs->regmap,HIREG);
3995 assert(quotient>=0);
3996 assert(remainder>=0);
44a80f6a 3997 emit_mov(d1,remainder);
3998 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3999 emit_test(d2,d2);
44a80f6a 4000 emit_jeq((int)out+40); // Division by zero
665f33e1 4001#ifdef HAVE_ARMV5
57871462 4002 emit_clz(d2,HOST_TEMPREG);
4003 emit_movimm(1<<31,quotient);
4004 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 4005#else
4006 emit_movimm(0,HOST_TEMPREG);
82336ba3 4007 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4008 emit_lslpls_imm(d2,1,d2);
665f33e1 4009 emit_jns((int)out-2*4);
4010 emit_movimm(1<<31,quotient);
4011#endif
57871462 4012 emit_shr(quotient,HOST_TEMPREG,quotient);
4013 emit_cmp(remainder,d2);
4014 emit_subcs(remainder,d2,remainder);
4015 emit_adcs(quotient,quotient,quotient);
4016 emit_shrcc_imm(d2,1,d2);
b14b6a8f 4017 emit_jcc(out-16); // -4
57871462 4018 }
4019 }
4020 else // 64-bit
71e490c5 4021 assert(0);
57871462 4022 }
4023 else
4024 {
4025 // Multiply by zero is zero.
4026 // MIPS does not have a divide by zero exception.
4027 // The result is undefined, we return zero.
4028 signed char hr=get_reg(i_regs->regmap,HIREG);
4029 signed char lr=get_reg(i_regs->regmap,LOREG);
4030 if(hr>=0) emit_zeroreg(hr);
4031 if(lr>=0) emit_zeroreg(lr);
4032 }
4033}
4034#define multdiv_assemble multdiv_assemble_arm
4035
e2b5e7aa 4036static void do_preload_rhash(int r) {
57871462 4037 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4038 // register. On ARM the hash can be done with a single instruction (below)
4039}
4040
e2b5e7aa 4041static void do_preload_rhtbl(int ht) {
57871462 4042 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4043}
4044
e2b5e7aa 4045static void do_rhash(int rs,int rh) {
57871462 4046 emit_andimm(rs,0xf8,rh);
4047}
4048
e2b5e7aa 4049static void do_miniht_load(int ht,int rh) {
57871462 4050 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4051 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4052}
4053
e2b5e7aa 4054static void do_miniht_jump(int rs,int rh,int ht) {
57871462 4055 emit_cmp(rh,rs);
4056 emit_ldreq_indexed(ht,4,15);
4057 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4058 emit_mov(rs,7);
4059 emit_jmp(jump_vaddr_reg[7]);
4060 #else
4061 emit_jmp(jump_vaddr_reg[rs]);
4062 #endif
4063}
4064
e2b5e7aa 4065static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 4066 #ifndef HAVE_ARMV7
57871462 4067 emit_movimm(return_address,rt); // PC into link register
4068 add_to_linker((int)out,return_address,1);
4069 emit_pcreladdr(temp);
4070 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4071 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4072 #else
4073 emit_movw(return_address&0x0000FFFF,rt);
4074 add_to_linker((int)out,return_address,1);
4075 emit_pcreladdr(temp);
4076 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4077 emit_movt(return_address&0xFFFF0000,rt);
4078 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4079 #endif
4080}
4081
e2b5e7aa 4082static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4083{
4084 //if(dirty_pre==dirty) return;
581335b0 4085 int hr,reg;
57871462 4086 for(hr=0;hr<HOST_REGS;hr++) {
4087 if(hr!=EXCLUDE_REG) {
4088 reg=pre[hr];
4089 if(((~u)>>(reg&63))&1) {
f776eb14 4090 if(reg>0) {
57871462 4091 if(((dirty_pre&~dirty)>>hr)&1) {
4092 if(reg>0&&reg<34) {
4093 emit_storereg(reg,hr);
4094 if( ((is32_pre&~uu)>>reg)&1 ) {
4095 emit_sarimm(hr,31,HOST_TEMPREG);
4096 emit_storereg(reg|64,HOST_TEMPREG);
4097 }
4098 }
4099 else if(reg>=64) {
4100 emit_storereg(reg,hr);
4101 }
4102 }
4103 }
57871462 4104 }
4105 }
4106 }
4107}
4108
4109
4110/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4111static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4112{
4113 int hr;
4114 int wrote=-1;
4115 for(hr=HOST_REGS-1;hr>=0;hr--) {
4116 if(hr!=EXCLUDE_REG) {
4117 if(pre[hr]!=entry[hr]) {
4118 if(pre[hr]>=0) {
4119 if((dirty>>hr)&1) {
4120 if(get_reg(entry,pre[hr])<0) {
4121 if(pre[hr]<64) {
4122 if(!((u>>pre[hr])&1)) {
4123 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4124 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4125 emit_sarimm(hr,31,hr+1);
4126 emit_strdreg(pre[hr],hr);
4127 }
4128 else
4129 emit_storereg(pre[hr],hr);
4130 }else{
4131 emit_storereg(pre[hr],hr);
4132 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4133 emit_sarimm(hr,31,hr);
4134 emit_storereg(pre[hr]|64,hr);
4135 }
4136 }
4137 }
4138 }else{
4139 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4140 emit_storereg(pre[hr],hr);
4141 }
4142 }
4143 wrote=hr;
4144 }
4145 }
4146 }
4147 }
4148 }
4149 }
4150 for(hr=0;hr<HOST_REGS;hr++) {
4151 if(hr!=EXCLUDE_REG) {
4152 if(pre[hr]!=entry[hr]) {
4153 if(pre[hr]>=0) {
4154 int nr;
4155 if((nr=get_reg(entry,pre[hr]))>=0) {
4156 emit_mov(hr,nr);
4157 }
4158 }
4159 }
4160 }
4161 }
4162}
4163#define wb_invalidate wb_invalidate_arm
4164*/
4165
d148d265 4166static void mark_clear_cache(void *target)
4167{
4168 u_long offset = (char *)target - (char *)BASE_ADDR;
4169 u_int mask = 1u << ((offset >> 12) & 31);
4170 if (!(needs_clear_cache[offset >> 17] & mask)) {
4171 char *start = (char *)((u_long)target & ~4095ul);
4172 start_tcache_write(start, start + 4096);
4173 needs_clear_cache[offset >> 17] |= mask;
4174 }
4175}
4176
dd3a91a1 4177// Clearing the cache is rather slow on ARM Linux, so mark the areas
4178// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4179static void do_clear_cache()
dd3a91a1 4180{
4181 int i,j;
4182 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4183 {
4184 u_int bitmap=needs_clear_cache[i];
4185 if(bitmap) {
4186 u_int start,end;
9f51b4b9 4187 for(j=0;j<32;j++)
dd3a91a1 4188 {
4189 if(bitmap&(1<<j)) {
bdeade46 4190 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4191 end=start+4095;
4192 j++;
4193 while(j<32) {
4194 if(bitmap&(1<<j)) {
4195 end+=4096;
4196 j++;
4197 }else{
d148d265 4198 end_tcache_write((void *)start,(void *)end);
dd3a91a1 4199 break;
4200 }
4201 }
4202 }
4203 }
4204 needs_clear_cache[i]=0;
4205 }
4206 }
4207}
4208
57871462 4209// CPU-architecture-specific initialization
71e490c5 4210static void arch_init() {
57871462 4211}
b9b61529 4212
4213// vim:shiftwidth=2:expandtab