some drc debug patches
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
bdeade46 35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
57871462 58extern void *dynarec_local;
57871462 59extern u_int mini_ht[32][2];
57871462 60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77const u_int jump_vaddr_reg[16] = {
78 (int)jump_vaddr_r0,
79 (int)jump_vaddr_r1,
80 (int)jump_vaddr_r2,
81 (int)jump_vaddr_r3,
82 (int)jump_vaddr_r4,
83 (int)jump_vaddr_r5,
84 (int)jump_vaddr_r6,
85 (int)jump_vaddr_r7,
86 (int)jump_vaddr_r8,
87 (int)jump_vaddr_r9,
88 (int)jump_vaddr_r10,
89 0,
90 (int)jump_vaddr_r12,
91 0,
92 0,
93 0};
94
0bbd1454 95void invalidate_addr_r0();
96void invalidate_addr_r1();
97void invalidate_addr_r2();
98void invalidate_addr_r3();
99void invalidate_addr_r4();
100void invalidate_addr_r5();
101void invalidate_addr_r6();
102void invalidate_addr_r7();
103void invalidate_addr_r8();
104void invalidate_addr_r9();
105void invalidate_addr_r10();
106void invalidate_addr_r12();
107
108const u_int invalidate_addr_reg[16] = {
109 (int)invalidate_addr_r0,
110 (int)invalidate_addr_r1,
111 (int)invalidate_addr_r2,
112 (int)invalidate_addr_r3,
113 (int)invalidate_addr_r4,
114 (int)invalidate_addr_r5,
115 (int)invalidate_addr_r6,
116 (int)invalidate_addr_r7,
117 (int)invalidate_addr_r8,
118 (int)invalidate_addr_r9,
119 (int)invalidate_addr_r10,
120 0,
121 (int)invalidate_addr_r12,
122 0,
123 0,
124 0};
125
d148d265 126static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 127
57871462 128/* Linker */
129
e2b5e7aa 130static void set_jump_target(int addr,u_int target)
57871462 131{
132 u_char *ptr=(u_char *)addr;
133 u_int *ptr2=(u_int *)ptr;
134 if(ptr[3]==0xe2) {
135 assert((target-(u_int)ptr2-8)<1024);
136 assert((addr&3)==0);
137 assert((target&3)==0);
138 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
139 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
140 }
141 else if(ptr[3]==0x72) {
142 // generated by emit_jno_unlikely
143 if((target-(u_int)ptr2-8)<1024) {
144 assert((addr&3)==0);
145 assert((target&3)==0);
146 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
147 }
148 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
149 assert((addr&3)==0);
150 assert((target&3)==0);
151 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
152 }
153 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
154 }
155 else {
156 assert((ptr[3]&0x0e)==0xa);
157 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
158 }
159}
160
161// This optionally copies the instruction from the target of the branch into
162// the space before the branch. Works, but the difference in speed is
163// usually insignificant.
e2b5e7aa 164#if 0
165static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 166{
167 u_char *ptr=(u_char *)addr;
168 u_int *ptr2=(u_int *)ptr;
169 assert(!copy||ptr2[-1]==0xe28dd000);
170 if(ptr[3]==0xe2) {
171 assert(!copy);
172 assert((target-(u_int)ptr2-8)<4096);
173 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
174 }
175 else {
176 assert((ptr[3]&0x0e)==0xa);
177 u_int target_insn=*(u_int *)target;
178 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
179 copy=0;
180 }
181 if((target_insn&0x0c100000)==0x04100000) { // Load
182 copy=0;
183 }
184 if(target_insn&0x08000000) {
185 copy=0;
186 }
187 if(copy) {
188 ptr2[-1]=target_insn;
189 target+=4;
190 }
191 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
192 }
193}
e2b5e7aa 194#endif
57871462 195
196/* Literal pool */
e2b5e7aa 197static void add_literal(int addr,int val)
57871462 198{
15776b68 199 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 200 literals[literalcount][0]=addr;
201 literals[literalcount][1]=val;
9f51b4b9 202 literalcount++;
203}
57871462 204
d148d265 205// from a pointer to external jump stub (which was produced by emit_extjump2)
206// find where the jumping insn is
207static void *find_extjump_insn(void *stub)
57871462 208{
209 int *ptr=(int *)(stub+4);
d148d265 210 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 211 u_int offset=*ptr&0xfff;
d148d265 212 void **l_ptr=(void *)ptr+offset+8;
213 return *l_ptr;
57871462 214}
215
f968d35d 216// find where external branch is liked to using addr of it's stub:
217// get address that insn one after stub loads (dyna_linker arg1),
218// treat it as a pointer to branch insn,
219// return addr where that branch jumps to
e2b5e7aa 220static int get_pointer(void *stub)
57871462 221{
222 //printf("get_pointer(%x)\n",(int)stub);
d148d265 223 int *i_ptr=find_extjump_insn(stub);
57871462 224 assert((*i_ptr&0x0f000000)==0x0a000000);
225 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
226}
227
228// Find the "clean" entry point from a "dirty" entry point
229// by skipping past the call to verify_code
e2b5e7aa 230static u_int get_clean_addr(int addr)
57871462 231{
232 int *ptr=(int *)addr;
665f33e1 233 #ifndef HAVE_ARMV7
57871462 234 ptr+=4;
235 #else
236 ptr+=6;
237 #endif
238 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
239 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
240 ptr++;
241 if((*ptr&0xFF000000)==0xea000000) {
242 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
243 }
244 return (u_int)ptr;
245}
246
e2b5e7aa 247static int verify_dirty(u_int *ptr)
57871462 248{
665f33e1 249 #ifndef HAVE_ARMV7
16c8be17 250 u_int offset;
57871462 251 // get from literal pool
15776b68 252 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 253 offset=*ptr&0xfff;
254 u_int source=*(u_int*)((void *)ptr+offset+8);
255 ptr++;
256 assert((*ptr&0xFFFF0000)==0xe59f0000);
257 offset=*ptr&0xfff;
258 u_int copy=*(u_int*)((void *)ptr+offset+8);
259 ptr++;
260 assert((*ptr&0xFFFF0000)==0xe59f0000);
261 offset=*ptr&0xfff;
262 u_int len=*(u_int*)((void *)ptr+offset+8);
263 ptr++;
264 ptr++;
57871462 265 #else
266 // ARMv7 movw/movt
267 assert((*ptr&0xFFF00000)==0xe3000000);
268 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
269 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
270 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
271 ptr+=6;
272 #endif
273 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
274 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 275 //printf("verify_dirty: %x %x %x\n",source,copy,len);
276 return !memcmp((void *)source,(void *)copy,len);
277}
278
279// This doesn't necessarily find all clean entry points, just
280// guarantees that it's not dirty
e2b5e7aa 281static int isclean(int addr)
57871462 282{
665f33e1 283 #ifndef HAVE_ARMV7
581335b0 284 u_int *ptr=((u_int *)addr)+4;
57871462 285 #else
581335b0 286 u_int *ptr=((u_int *)addr)+6;
57871462 287 #endif
288 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
289 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
290 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
293 return 1;
294}
295
4a35de07 296// get source that block at addr was compiled from (host pointers)
e2b5e7aa 297static void get_bounds(int addr,u_int *start,u_int *end)
57871462 298{
299 u_int *ptr=(u_int *)addr;
665f33e1 300 #ifndef HAVE_ARMV7
16c8be17 301 u_int offset;
57871462 302 // get from literal pool
15776b68 303 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 304 offset=*ptr&0xfff;
305 u_int source=*(u_int*)((void *)ptr+offset+8);
306 ptr++;
307 //assert((*ptr&0xFFFF0000)==0xe59f0000);
308 //offset=*ptr&0xfff;
309 //u_int copy=*(u_int*)((void *)ptr+offset+8);
310 ptr++;
311 assert((*ptr&0xFFFF0000)==0xe59f0000);
312 offset=*ptr&0xfff;
313 u_int len=*(u_int*)((void *)ptr+offset+8);
314 ptr++;
315 ptr++;
57871462 316 #else
317 // ARMv7 movw/movt
318 assert((*ptr&0xFFF00000)==0xe3000000);
319 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
320 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
321 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
322 ptr+=6;
323 #endif
324 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
325 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 326 *start=source;
327 *end=source+len;
328}
329
330/* Register allocation */
331
332// Note: registers are allocated clean (unmodified state)
333// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 334static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 335{
336 int r,hr;
337 int preferred_reg = (reg&7);
338 if(reg==CCREG) preferred_reg=HOST_CCREG;
339 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 340
57871462 341 // Don't allocate unused registers
342 if((cur->u>>reg)&1) return;
9f51b4b9 343
57871462 344 // see if it's already allocated
345 for(hr=0;hr<HOST_REGS;hr++)
346 {
347 if(cur->regmap[hr]==reg) return;
348 }
9f51b4b9 349
57871462 350 // Keep the same mapping if the register was already allocated in a loop
351 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 352
57871462 353 // Try to allocate the preferred register
354 if(cur->regmap[preferred_reg]==-1) {
355 cur->regmap[preferred_reg]=reg;
356 cur->dirty&=~(1<<preferred_reg);
357 cur->isconst&=~(1<<preferred_reg);
358 return;
359 }
360 r=cur->regmap[preferred_reg];
361 if(r<64&&((cur->u>>r)&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367 if(r>=64&&((cur->uu>>(r&63))&1)) {
368 cur->regmap[preferred_reg]=reg;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
371 return;
372 }
9f51b4b9 373
57871462 374 // Clear any unneeded registers
375 // We try to keep the mapping consistent, if possible, because it
376 // makes branches easier (especially loops). So we try to allocate
377 // first (see above) before removing old mappings. If this is not
378 // possible then go ahead and clear out the registers that are no
379 // longer needed.
380 for(hr=0;hr<HOST_REGS;hr++)
381 {
382 r=cur->regmap[hr];
383 if(r>=0) {
384 if(r<64) {
385 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
386 }
387 else
388 {
389 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
390 }
391 }
392 }
393 // Try to allocate any available register, but prefer
394 // registers that have not been used recently.
395 if(i>0) {
396 for(hr=0;hr<HOST_REGS;hr++) {
397 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
398 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
399 cur->regmap[hr]=reg;
400 cur->dirty&=~(1<<hr);
401 cur->isconst&=~(1<<hr);
402 return;
403 }
404 }
405 }
406 }
407 // Try to allocate any available register
408 for(hr=0;hr<HOST_REGS;hr++) {
409 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
410 cur->regmap[hr]=reg;
411 cur->dirty&=~(1<<hr);
412 cur->isconst&=~(1<<hr);
413 return;
414 }
415 }
9f51b4b9 416
57871462 417 // Ok, now we have to evict someone
418 // Pick a register we hopefully won't need soon
419 u_char hsn[MAXREG+1];
420 memset(hsn,10,sizeof(hsn));
421 int j;
422 lsn(hsn,i,&preferred_reg);
423 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
424 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
425 if(i>0) {
426 // Don't evict the cycle count at entry points, otherwise the entry
427 // stub will have to write it.
428 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
429 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
430 for(j=10;j>=3;j--)
431 {
432 // Alloc preferred register if available
433 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
434 for(hr=0;hr<HOST_REGS;hr++) {
435 // Evict both parts of a 64-bit register
436 if((cur->regmap[hr]&63)==r) {
437 cur->regmap[hr]=-1;
438 cur->dirty&=~(1<<hr);
439 cur->isconst&=~(1<<hr);
440 }
441 }
442 cur->regmap[preferred_reg]=reg;
443 return;
444 }
445 for(r=1;r<=MAXREG;r++)
446 {
447 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
448 for(hr=0;hr<HOST_REGS;hr++) {
449 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
450 if(cur->regmap[hr]==r+64) {
451 cur->regmap[hr]=reg;
452 cur->dirty&=~(1<<hr);
453 cur->isconst&=~(1<<hr);
454 return;
455 }
456 }
457 }
458 for(hr=0;hr<HOST_REGS;hr++) {
459 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 }
471 }
472 for(j=10;j>=0;j--)
473 {
474 for(r=1;r<=MAXREG;r++)
475 {
476 if(hsn[r]==j) {
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r+64) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 for(hr=0;hr<HOST_REGS;hr++) {
486 if(cur->regmap[hr]==r) {
487 cur->regmap[hr]=reg;
488 cur->dirty&=~(1<<hr);
489 cur->isconst&=~(1<<hr);
490 return;
491 }
492 }
493 }
494 }
495 }
c43b5311 496 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 497}
498
e2b5e7aa 499static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 500{
501 int preferred_reg = 8+(reg&1);
502 int r,hr;
9f51b4b9 503
57871462 504 // allocate the lower 32 bits
505 alloc_reg(cur,i,reg);
9f51b4b9 506
57871462 507 // Don't allocate unused registers
508 if((cur->uu>>reg)&1) return;
9f51b4b9 509
57871462 510 // see if the upper half is already allocated
511 for(hr=0;hr<HOST_REGS;hr++)
512 {
513 if(cur->regmap[hr]==reg+64) return;
514 }
9f51b4b9 515
57871462 516 // Keep the same mapping if the register was already allocated in a loop
517 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 518
57871462 519 // Try to allocate the preferred register
520 if(cur->regmap[preferred_reg]==-1) {
521 cur->regmap[preferred_reg]=reg|64;
522 cur->dirty&=~(1<<preferred_reg);
523 cur->isconst&=~(1<<preferred_reg);
524 return;
525 }
526 r=cur->regmap[preferred_reg];
527 if(r<64&&((cur->u>>r)&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533 if(r>=64&&((cur->uu>>(r&63))&1)) {
534 cur->regmap[preferred_reg]=reg|64;
535 cur->dirty&=~(1<<preferred_reg);
536 cur->isconst&=~(1<<preferred_reg);
537 return;
538 }
9f51b4b9 539
57871462 540 // Clear any unneeded registers
541 // We try to keep the mapping consistent, if possible, because it
542 // makes branches easier (especially loops). So we try to allocate
543 // first (see above) before removing old mappings. If this is not
544 // possible then go ahead and clear out the registers that are no
545 // longer needed.
546 for(hr=HOST_REGS-1;hr>=0;hr--)
547 {
548 r=cur->regmap[hr];
549 if(r>=0) {
550 if(r<64) {
551 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
552 }
553 else
554 {
555 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
556 }
557 }
558 }
559 // Try to allocate any available register, but prefer
560 // registers that have not been used recently.
561 if(i>0) {
562 for(hr=0;hr<HOST_REGS;hr++) {
563 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
564 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
565 cur->regmap[hr]=reg|64;
566 cur->dirty&=~(1<<hr);
567 cur->isconst&=~(1<<hr);
568 return;
569 }
570 }
571 }
572 }
573 // Try to allocate any available register
574 for(hr=0;hr<HOST_REGS;hr++) {
575 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
576 cur->regmap[hr]=reg|64;
577 cur->dirty&=~(1<<hr);
578 cur->isconst&=~(1<<hr);
579 return;
580 }
581 }
9f51b4b9 582
57871462 583 // Ok, now we have to evict someone
584 // Pick a register we hopefully won't need soon
585 u_char hsn[MAXREG+1];
586 memset(hsn,10,sizeof(hsn));
587 int j;
588 lsn(hsn,i,&preferred_reg);
589 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
590 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
591 if(i>0) {
592 // Don't evict the cycle count at entry points, otherwise the entry
593 // stub will have to write it.
594 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
595 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
596 for(j=10;j>=3;j--)
597 {
598 // Alloc preferred register if available
599 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
600 for(hr=0;hr<HOST_REGS;hr++) {
601 // Evict both parts of a 64-bit register
602 if((cur->regmap[hr]&63)==r) {
603 cur->regmap[hr]=-1;
604 cur->dirty&=~(1<<hr);
605 cur->isconst&=~(1<<hr);
606 }
607 }
608 cur->regmap[preferred_reg]=reg|64;
609 return;
610 }
611 for(r=1;r<=MAXREG;r++)
612 {
613 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
614 for(hr=0;hr<HOST_REGS;hr++) {
615 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
616 if(cur->regmap[hr]==r+64) {
617 cur->regmap[hr]=reg|64;
618 cur->dirty&=~(1<<hr);
619 cur->isconst&=~(1<<hr);
620 return;
621 }
622 }
623 }
624 for(hr=0;hr<HOST_REGS;hr++) {
625 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 }
637 }
638 for(j=10;j>=0;j--)
639 {
640 for(r=1;r<=MAXREG;r++)
641 {
642 if(hsn[r]==j) {
643 for(hr=0;hr<HOST_REGS;hr++) {
644 if(cur->regmap[hr]==r+64) {
645 cur->regmap[hr]=reg|64;
646 cur->dirty&=~(1<<hr);
647 cur->isconst&=~(1<<hr);
648 return;
649 }
650 }
651 for(hr=0;hr<HOST_REGS;hr++) {
652 if(cur->regmap[hr]==r) {
653 cur->regmap[hr]=reg|64;
654 cur->dirty&=~(1<<hr);
655 cur->isconst&=~(1<<hr);
656 return;
657 }
658 }
659 }
660 }
661 }
c43b5311 662 SysPrintf("This shouldn't happen");exit(1);
57871462 663}
664
665// Allocate a temporary register. This is done without regard to
666// dirty status or whether the register we request is on the unneeded list
667// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 668static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 669{
670 int r,hr;
671 int preferred_reg = -1;
9f51b4b9 672
57871462 673 // see if it's already allocated
674 for(hr=0;hr<HOST_REGS;hr++)
675 {
676 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
677 }
9f51b4b9 678
57871462 679 // Try to allocate any available register
680 for(hr=HOST_REGS-1;hr>=0;hr--) {
681 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
9f51b4b9 688
57871462 689 // Find an unneeded register
690 for(hr=HOST_REGS-1;hr>=0;hr--)
691 {
692 r=cur->regmap[hr];
693 if(r>=0) {
694 if(r<64) {
695 if((cur->u>>r)&1) {
696 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
697 cur->regmap[hr]=reg;
698 cur->dirty&=~(1<<hr);
699 cur->isconst&=~(1<<hr);
700 return;
701 }
702 }
703 }
704 else
705 {
706 if((cur->uu>>(r&63))&1) {
707 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 }
715 }
716 }
9f51b4b9 717
57871462 718 // Ok, now we have to evict someone
719 // Pick a register we hopefully won't need soon
720 // TODO: we might want to follow unconditional jumps here
721 // TODO: get rid of dupe code and make this into a function
722 u_char hsn[MAXREG+1];
723 memset(hsn,10,sizeof(hsn));
724 int j;
725 lsn(hsn,i,&preferred_reg);
726 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
727 if(i>0) {
728 // Don't evict the cycle count at entry points, otherwise the entry
729 // stub will have to write it.
730 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
731 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
732 for(j=10;j>=3;j--)
733 {
734 for(r=1;r<=MAXREG;r++)
735 {
736 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
737 for(hr=0;hr<HOST_REGS;hr++) {
738 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
739 if(cur->regmap[hr]==r+64) {
740 cur->regmap[hr]=reg;
741 cur->dirty&=~(1<<hr);
742 cur->isconst&=~(1<<hr);
743 return;
744 }
745 }
746 }
747 for(hr=0;hr<HOST_REGS;hr++) {
748 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 }
760 }
761 for(j=10;j>=0;j--)
762 {
763 for(r=1;r<=MAXREG;r++)
764 {
765 if(hsn[r]==j) {
766 for(hr=0;hr<HOST_REGS;hr++) {
767 if(cur->regmap[hr]==r+64) {
768 cur->regmap[hr]=reg;
769 cur->dirty&=~(1<<hr);
770 cur->isconst&=~(1<<hr);
771 return;
772 }
773 }
774 for(hr=0;hr<HOST_REGS;hr++) {
775 if(cur->regmap[hr]==r) {
776 cur->regmap[hr]=reg;
777 cur->dirty&=~(1<<hr);
778 cur->isconst&=~(1<<hr);
779 return;
780 }
781 }
782 }
783 }
784 }
c43b5311 785 SysPrintf("This shouldn't happen");exit(1);
57871462 786}
e2b5e7aa 787
57871462 788// Allocate a specific ARM register.
e2b5e7aa 789static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 790{
791 int n;
f776eb14 792 int dirty=0;
9f51b4b9 793
57871462 794 // see if it's already allocated (and dealloc it)
795 for(n=0;n<HOST_REGS;n++)
796 {
f776eb14 797 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
798 dirty=(cur->dirty>>n)&1;
799 cur->regmap[n]=-1;
800 }
57871462 801 }
9f51b4b9 802
57871462 803 cur->regmap[hr]=reg;
804 cur->dirty&=~(1<<hr);
f776eb14 805 cur->dirty|=dirty<<hr;
57871462 806 cur->isconst&=~(1<<hr);
807}
808
809// Alloc cycle count into dedicated register
e2b5e7aa 810static void alloc_cc(struct regstat *cur,int i)
57871462 811{
812 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
813}
814
815/* Special alloc */
816
817
818/* Assembler */
819
e2b5e7aa 820static unused char regname[16][4] = {
57871462 821 "r0",
822 "r1",
823 "r2",
824 "r3",
825 "r4",
826 "r5",
827 "r6",
828 "r7",
829 "r8",
830 "r9",
831 "r10",
832 "fp",
833 "r12",
834 "sp",
835 "lr",
836 "pc"};
837
e2b5e7aa 838static void output_w32(u_int word)
57871462 839{
840 *((u_int *)out)=word;
841 out+=4;
842}
e2b5e7aa 843
844static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 845{
846 assert(rd<16);
847 assert(rn<16);
848 assert(rm<16);
849 return((rn<<16)|(rd<<12)|rm);
850}
e2b5e7aa 851
852static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 853{
854 assert(rd<16);
855 assert(rn<16);
856 assert(imm<256);
857 assert((shift&1)==0);
858 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
859}
e2b5e7aa 860
861static u_int genimm(u_int imm,u_int *encoded)
57871462 862{
c2e3bd42 863 *encoded=0;
864 if(imm==0) return 1;
57871462 865 int i=32;
866 while(i>0)
867 {
868 if(imm<256) {
869 *encoded=((i&30)<<7)|imm;
870 return 1;
871 }
872 imm=(imm>>2)|(imm<<30);i-=2;
873 }
874 return 0;
875}
e2b5e7aa 876
877static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 878{
879 u_int ret=genimm(imm,encoded);
880 assert(ret);
581335b0 881 (void)ret;
cfbd3c6e 882}
e2b5e7aa 883
884static u_int genjmp(u_int addr)
57871462 885{
886 int offset=addr-(int)out-8;
e80343e2 887 if(offset<-33554432||offset>=33554432) {
888 if (addr>2) {
c43b5311 889 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 890 exit(1);
891 }
892 return 0;
893 }
57871462 894 return ((u_int)offset>>2)&0xffffff;
895}
896
e2b5e7aa 897static void emit_mov(int rs,int rt)
57871462 898{
899 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
900 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
901}
902
e2b5e7aa 903static void emit_movs(int rs,int rt)
57871462 904{
905 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
906 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
907}
908
e2b5e7aa 909static void emit_add(int rs1,int rs2,int rt)
57871462 910{
911 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
912 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
913}
914
e2b5e7aa 915static void emit_adds(int rs1,int rs2,int rt)
57871462 916{
917 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
918 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
919}
920
e2b5e7aa 921static void emit_adcs(int rs1,int rs2,int rt)
57871462 922{
923 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
924 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
925}
926
e2b5e7aa 927static void emit_sbc(int rs1,int rs2,int rt)
57871462 928{
929 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
930 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
931}
932
e2b5e7aa 933static void emit_sbcs(int rs1,int rs2,int rt)
57871462 934{
935 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
936 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
937}
938
e2b5e7aa 939static void emit_neg(int rs, int rt)
57871462 940{
941 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
942 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
943}
944
e2b5e7aa 945static void emit_negs(int rs, int rt)
57871462 946{
947 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
948 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
949}
950
e2b5e7aa 951static void emit_sub(int rs1,int rs2,int rt)
57871462 952{
953 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
954 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
955}
956
e2b5e7aa 957static void emit_subs(int rs1,int rs2,int rt)
57871462 958{
959 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
960 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
961}
962
e2b5e7aa 963static void emit_zeroreg(int rt)
57871462 964{
965 assem_debug("mov %s,#0\n",regname[rt]);
966 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
967}
968
e2b5e7aa 969static void emit_loadlp(u_int imm,u_int rt)
790ee18e 970{
971 add_literal((int)out,imm);
972 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
973 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
974}
e2b5e7aa 975
976static void emit_movw(u_int imm,u_int rt)
790ee18e 977{
978 assert(imm<65536);
979 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
980 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
981}
e2b5e7aa 982
983static void emit_movt(u_int imm,u_int rt)
790ee18e 984{
985 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
986 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
987}
e2b5e7aa 988
989static void emit_movimm(u_int imm,u_int rt)
790ee18e 990{
991 u_int armval;
992 if(genimm(imm,&armval)) {
993 assem_debug("mov %s,#%d\n",regname[rt],imm);
994 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
995 }else if(genimm(~imm,&armval)) {
996 assem_debug("mvn %s,#%d\n",regname[rt],imm);
997 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
998 }else if(imm<65536) {
665f33e1 999 #ifndef HAVE_ARMV7
790ee18e 1000 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1001 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1002 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1003 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1004 #else
1005 emit_movw(imm,rt);
1006 #endif
1007 }else{
665f33e1 1008 #ifndef HAVE_ARMV7
790ee18e 1009 emit_loadlp(imm,rt);
1010 #else
1011 emit_movw(imm&0x0000FFFF,rt);
1012 emit_movt(imm&0xFFFF0000,rt);
1013 #endif
1014 }
1015}
e2b5e7aa 1016
1017static void emit_pcreladdr(u_int rt)
790ee18e 1018{
1019 assem_debug("add %s,pc,#?\n",regname[rt]);
1020 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1021}
1022
e2b5e7aa 1023static void emit_loadreg(int r, int hr)
57871462 1024{
3d624f89 1025 if(r&64) {
c43b5311 1026 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1027 assert(0);
1028 return;
3d624f89 1029 }
57871462 1030 if((r&63)==0)
1031 emit_zeroreg(hr);
1032 else {
3d624f89 1033 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1034 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1035 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1036 if(r==CCREG) addr=(int)&cycle_count;
1037 if(r==CSREG) addr=(int)&Status;
1038 if(r==FSREG) addr=(int)&FCR31;
1039 if(r==INVCP) addr=(int)&invc_ptr;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1044 }
1045}
e2b5e7aa 1046
1047static void emit_storereg(int r, int hr)
57871462 1048{
3d624f89 1049 if(r&64) {
c43b5311 1050 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1051 assert(0);
1052 return;
3d624f89 1053 }
3d624f89 1054 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1055 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1056 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1057 if(r==CCREG) addr=(int)&cycle_count;
1058 if(r==FSREG) addr=(int)&FCR31;
1059 u_int offset = addr-(u_int)&dynarec_local;
1060 assert(offset<4096);
1061 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1062 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1063}
1064
e2b5e7aa 1065static void emit_test(int rs, int rt)
57871462 1066{
1067 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1068 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1069}
1070
e2b5e7aa 1071static void emit_testimm(int rs,int imm)
57871462 1072{
1073 u_int armval;
5a05d80c 1074 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1075 genimm_checked(imm,&armval);
57871462 1076 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1077}
1078
e2b5e7aa 1079static void emit_testeqimm(int rs,int imm)
b9b61529 1080{
1081 u_int armval;
1082 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1083 genimm_checked(imm,&armval);
b9b61529 1084 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1085}
1086
e2b5e7aa 1087static void emit_not(int rs,int rt)
57871462 1088{
1089 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1090 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1091}
1092
e2b5e7aa 1093static void emit_mvnmi(int rs,int rt)
b9b61529 1094{
1095 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1096 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1097}
1098
e2b5e7aa 1099static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1100{
1101 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1102 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1103}
1104
e2b5e7aa 1105static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1106{
1107 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1109}
e2b5e7aa 1110
1111static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1112{
1113 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1114 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1115}
1116
e2b5e7aa 1117static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1118{
1119 assert(rs<16);
1120 assert(rt<16);
1121 assert(imm<32);
1122 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1123 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1124}
1125
e2b5e7aa 1126static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1127{
1128 assert(rs<16);
1129 assert(rt<16);
1130 assert(imm<32);
1131 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1132 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1133}
1134
e2b5e7aa 1135static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1136{
1137 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1138 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1139}
1140
e2b5e7aa 1141static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1142{
1143 assert(rs<16);
1144 assert(rt<16);
1145 if(imm!=0) {
57871462 1146 u_int armval;
1147 if(genimm(imm,&armval)) {
1148 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1149 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1150 }else if(genimm(-imm,&armval)) {
8a0a8423 1151 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1152 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1153 #ifdef HAVE_ARMV7
1154 }else if(rt!=rs&&(u_int)imm<65536) {
1155 emit_movw(imm&0x0000ffff,rt);
1156 emit_add(rs,rt,rt);
1157 }else if(rt!=rs&&(u_int)-imm<65536) {
1158 emit_movw(-imm&0x0000ffff,rt);
1159 emit_sub(rs,rt,rt);
1160 #endif
1161 }else if((u_int)-imm<65536) {
57871462 1162 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1163 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1164 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1165 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1166 }else {
1167 do {
1168 int shift = (ffs(imm) - 1) & ~1;
1169 int imm8 = imm & (0xff << shift);
1170 genimm_checked(imm8,&armval);
1171 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1172 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1173 rs = rt;
1174 imm &= ~imm8;
1175 }
1176 while (imm != 0);
57871462 1177 }
1178 }
1179 else if(rs!=rt) emit_mov(rs,rt);
1180}
1181
e2b5e7aa 1182static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1183{
1184 assert(imm>-65536&&imm<65536);
1185 u_int armval;
1186 if(genimm(imm,&armval)) {
1187 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1188 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1189 }else if(genimm(-imm,&armval)) {
1190 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1191 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1192 }else if(imm<0) {
1193 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1194 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1195 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1196 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1197 }else{
1198 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1199 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1200 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1201 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1202 }
1203}
e2b5e7aa 1204
1205static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1206{
1207 emit_addimm(rt,imm,rt);
1208}
1209
e2b5e7aa 1210static void emit_addnop(u_int r)
57871462 1211{
1212 assert(r<16);
1213 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1214 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1215}
1216
e2b5e7aa 1217static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1218{
1219 u_int armval;
cfbd3c6e 1220 genimm_checked(imm,&armval);
57871462 1221 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1222 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1223}
1edfcc68 1224
e2b5e7aa 1225static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1226{
1227 assert(0);
1228 u_int armval;
cfbd3c6e 1229 genimm_checked(imm,&armval);
57871462 1230 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1232}
1233
e2b5e7aa 1234static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1235{
1236 // TODO: if(genimm(imm,&armval)) ...
1237 // else
1238 emit_movimm(imm,HOST_TEMPREG);
1239 emit_adds(HOST_TEMPREG,rsl,rtl);
1240 emit_adcimm(rsh,0,rth);
1241}
1242
e2b5e7aa 1243static void emit_andimm(int rs,int imm,int rt)
57871462 1244{
1245 u_int armval;
790ee18e 1246 if(imm==0) {
1247 emit_zeroreg(rt);
1248 }else if(genimm(imm,&armval)) {
57871462 1249 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1250 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1251 }else if(genimm(~imm,&armval)) {
1252 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1253 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1254 }else if(imm==65535) {
332a4533 1255 #ifndef HAVE_ARMV6
57871462 1256 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1257 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1258 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1260 #else
1261 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1262 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1263 #endif
1264 }else{
1265 assert(imm>0&&imm<65535);
665f33e1 1266 #ifndef HAVE_ARMV7
57871462 1267 assem_debug("mov r14,#%d\n",imm&0xFF00);
1268 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1269 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1270 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1271 #else
1272 emit_movw(imm,HOST_TEMPREG);
1273 #endif
1274 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1275 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1276 }
1277}
1278
e2b5e7aa 1279static void emit_orimm(int rs,int imm,int rt)
57871462 1280{
1281 u_int armval;
790ee18e 1282 if(imm==0) {
1283 if(rs!=rt) emit_mov(rs,rt);
1284 }else if(genimm(imm,&armval)) {
57871462 1285 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1286 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1287 }else{
1288 assert(imm>0&&imm<65536);
1289 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1291 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1292 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1293 }
1294}
1295
e2b5e7aa 1296static void emit_xorimm(int rs,int imm,int rt)
57871462 1297{
57871462 1298 u_int armval;
790ee18e 1299 if(imm==0) {
1300 if(rs!=rt) emit_mov(rs,rt);
1301 }else if(genimm(imm,&armval)) {
57871462 1302 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1303 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1304 }else{
514ed0d9 1305 assert(imm>0&&imm<65536);
57871462 1306 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1308 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1309 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1310 }
1311}
1312
e2b5e7aa 1313static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1314{
1315 assert(imm>0);
1316 assert(imm<32);
1317 //if(imm==1) ...
1318 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1319 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1320}
1321
e2b5e7aa 1322static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1323{
1324 assert(imm>0);
1325 assert(imm<32);
1326 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1327 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1328}
1329
e2b5e7aa 1330static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1331{
1332 assert(imm>0);
1333 assert(imm<32);
1334 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1335 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1336}
1337
e2b5e7aa 1338static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1339{
1340 assert(imm>0);
1341 assert(imm<32);
1342 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1343 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1344}
1345
e2b5e7aa 1346static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1347{
1348 assert(imm>0);
1349 assert(imm<32);
1350 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1351 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1352}
1353
e2b5e7aa 1354static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1355{
1356 assert(imm>0);
1357 assert(imm<32);
1358 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1359 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1360}
1361
e2b5e7aa 1362static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1363{
1364 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1365 assert(imm>0);
1366 assert(imm<32);
1367 //if(imm==1) ...
1368 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1369 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1370 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1371 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1372}
1373
e2b5e7aa 1374static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1375{
1376 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1377 assert(imm>0);
1378 assert(imm<32);
1379 //if(imm==1) ...
1380 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1381 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1382 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1383 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1384}
1385
e2b5e7aa 1386static void emit_signextend16(int rs,int rt)
b9b61529 1387{
332a4533 1388 #ifndef HAVE_ARMV6
b9b61529 1389 emit_shlimm(rs,16,rt);
1390 emit_sarimm(rt,16,rt);
1391 #else
1392 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1393 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1394 #endif
1395}
1396
e2b5e7aa 1397static void emit_signextend8(int rs,int rt)
c6c3b1b3 1398{
332a4533 1399 #ifndef HAVE_ARMV6
c6c3b1b3 1400 emit_shlimm(rs,24,rt);
1401 emit_sarimm(rt,24,rt);
1402 #else
1403 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1404 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1405 #endif
1406}
1407
e2b5e7aa 1408static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1409{
1410 assert(rs<16);
1411 assert(rt<16);
1412 assert(shift<16);
1413 //if(imm==1) ...
1414 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1415 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1416}
e2b5e7aa 1417
1418static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1419{
1420 assert(rs<16);
1421 assert(rt<16);
1422 assert(shift<16);
1423 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1424 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1425}
e2b5e7aa 1426
1427static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1428{
1429 assert(rs<16);
1430 assert(rt<16);
1431 assert(shift<16);
1432 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1433 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1434}
57871462 1435
e2b5e7aa 1436static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1437{
1438 assert(rs<16);
1439 assert(rt<16);
1440 assert(shift<16);
1441 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1442 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1443}
e2b5e7aa 1444
1445static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1446{
1447 assert(rs<16);
1448 assert(rt<16);
1449 assert(shift<16);
1450 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1451 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1452}
1453
e2b5e7aa 1454static void emit_cmpimm(int rs,int imm)
57871462 1455{
1456 u_int armval;
1457 if(genimm(imm,&armval)) {
5a05d80c 1458 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1459 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1460 }else if(genimm(-imm,&armval)) {
5a05d80c 1461 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1462 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1463 }else if(imm>0) {
1464 assert(imm<65536);
57871462 1465 emit_movimm(imm,HOST_TEMPREG);
57871462 1466 assem_debug("cmp %s,r14\n",regname[rs]);
1467 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1468 }else{
1469 assert(imm>-65536);
57871462 1470 emit_movimm(-imm,HOST_TEMPREG);
57871462 1471 assem_debug("cmn %s,r14\n",regname[rs]);
1472 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1473 }
1474}
1475
e2b5e7aa 1476static void emit_cmovne_imm(int imm,int rt)
57871462 1477{
1478 assem_debug("movne %s,#%d\n",regname[rt],imm);
1479 u_int armval;
cfbd3c6e 1480 genimm_checked(imm,&armval);
57871462 1481 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1482}
e2b5e7aa 1483
1484static void emit_cmovl_imm(int imm,int rt)
57871462 1485{
1486 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1487 u_int armval;
cfbd3c6e 1488 genimm_checked(imm,&armval);
57871462 1489 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1490}
e2b5e7aa 1491
1492static void emit_cmovb_imm(int imm,int rt)
57871462 1493{
1494 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1495 u_int armval;
cfbd3c6e 1496 genimm_checked(imm,&armval);
57871462 1497 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1498}
e2b5e7aa 1499
1500static void emit_cmovs_imm(int imm,int rt)
57871462 1501{
1502 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1503 u_int armval;
cfbd3c6e 1504 genimm_checked(imm,&armval);
57871462 1505 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1506}
e2b5e7aa 1507
1508static void emit_cmove_reg(int rs,int rt)
57871462 1509{
1510 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1511 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1512}
e2b5e7aa 1513
1514static void emit_cmovne_reg(int rs,int rt)
57871462 1515{
1516 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1517 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1518}
e2b5e7aa 1519
1520static void emit_cmovl_reg(int rs,int rt)
57871462 1521{
1522 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1523 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1524}
e2b5e7aa 1525
1526static void emit_cmovs_reg(int rs,int rt)
57871462 1527{
1528 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1529 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1530}
1531
e2b5e7aa 1532static void emit_slti32(int rs,int imm,int rt)
57871462 1533{
1534 if(rs!=rt) emit_zeroreg(rt);
1535 emit_cmpimm(rs,imm);
1536 if(rs==rt) emit_movimm(0,rt);
1537 emit_cmovl_imm(1,rt);
1538}
e2b5e7aa 1539
1540static void emit_sltiu32(int rs,int imm,int rt)
57871462 1541{
1542 if(rs!=rt) emit_zeroreg(rt);
1543 emit_cmpimm(rs,imm);
1544 if(rs==rt) emit_movimm(0,rt);
1545 emit_cmovb_imm(1,rt);
1546}
e2b5e7aa 1547
1548static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1549{
1550 assert(rsh!=rt);
1551 emit_slti32(rsl,imm,rt);
1552 if(imm>=0)
1553 {
1554 emit_test(rsh,rsh);
1555 emit_cmovne_imm(0,rt);
1556 emit_cmovs_imm(1,rt);
1557 }
1558 else
1559 {
1560 emit_cmpimm(rsh,-1);
1561 emit_cmovne_imm(0,rt);
1562 emit_cmovl_imm(1,rt);
1563 }
1564}
e2b5e7aa 1565
1566static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1567{
1568 assert(rsh!=rt);
1569 emit_sltiu32(rsl,imm,rt);
1570 if(imm>=0)
1571 {
1572 emit_test(rsh,rsh);
1573 emit_cmovne_imm(0,rt);
1574 }
1575 else
1576 {
1577 emit_cmpimm(rsh,-1);
1578 emit_cmovne_imm(1,rt);
1579 }
1580}
1581
e2b5e7aa 1582static void emit_cmp(int rs,int rt)
57871462 1583{
1584 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1585 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1586}
e2b5e7aa 1587
1588static void emit_set_gz32(int rs, int rt)
57871462 1589{
1590 //assem_debug("set_gz32\n");
1591 emit_cmpimm(rs,1);
1592 emit_movimm(1,rt);
1593 emit_cmovl_imm(0,rt);
1594}
e2b5e7aa 1595
1596static void emit_set_nz32(int rs, int rt)
57871462 1597{
1598 //assem_debug("set_nz32\n");
1599 if(rs!=rt) emit_movs(rs,rt);
1600 else emit_test(rs,rs);
1601 emit_cmovne_imm(1,rt);
1602}
e2b5e7aa 1603
1604static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1605{
1606 //assem_debug("set_gz64\n");
1607 emit_set_gz32(rsl,rt);
1608 emit_test(rsh,rsh);
1609 emit_cmovne_imm(1,rt);
1610 emit_cmovs_imm(0,rt);
1611}
e2b5e7aa 1612
1613static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1614{
1615 //assem_debug("set_nz64\n");
1616 emit_or_and_set_flags(rsh,rsl,rt);
1617 emit_cmovne_imm(1,rt);
1618}
e2b5e7aa 1619
1620static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1621{
1622 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1623 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1624 emit_cmp(rs1,rs2);
1625 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1626 emit_cmovl_imm(1,rt);
1627}
e2b5e7aa 1628
1629static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1630{
1631 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1632 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1633 emit_cmp(rs1,rs2);
1634 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1635 emit_cmovb_imm(1,rt);
1636}
e2b5e7aa 1637
1638static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1639{
1640 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1641 assert(u1!=rt);
1642 assert(u2!=rt);
1643 emit_cmp(l1,l2);
1644 emit_movimm(0,rt);
1645 emit_sbcs(u1,u2,HOST_TEMPREG);
1646 emit_cmovl_imm(1,rt);
1647}
e2b5e7aa 1648
1649static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1650{
1651 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1652 assert(u1!=rt);
1653 assert(u2!=rt);
1654 emit_cmp(l1,l2);
1655 emit_movimm(0,rt);
1656 emit_sbcs(u1,u2,HOST_TEMPREG);
1657 emit_cmovb_imm(1,rt);
1658}
1659
dd114d7d 1660#ifdef DRC_DBG
1661extern void gen_interupt();
1662extern void do_insn_cmp();
1663#define FUNCNAME(f) { (intptr_t)f, " " #f }
1664static const struct {
1665 intptr_t addr;
1666 const char *name;
1667} function_names[] = {
1668 FUNCNAME(cc_interrupt),
1669 FUNCNAME(gen_interupt),
1670 FUNCNAME(get_addr_ht),
1671 FUNCNAME(get_addr),
1672 FUNCNAME(jump_handler_read8),
1673 FUNCNAME(jump_handler_read16),
1674 FUNCNAME(jump_handler_read32),
1675 FUNCNAME(jump_handler_write8),
1676 FUNCNAME(jump_handler_write16),
1677 FUNCNAME(jump_handler_write32),
1678 FUNCNAME(invalidate_addr),
1679 FUNCNAME(verify_code_vm),
1680 FUNCNAME(verify_code),
1681 FUNCNAME(jump_hlecall),
1682 FUNCNAME(jump_syscall_hle),
1683 FUNCNAME(new_dyna_leave),
1684 FUNCNAME(pcsx_mtc0),
1685 FUNCNAME(pcsx_mtc0_ds),
1686 FUNCNAME(do_insn_cmp),
1687};
1688
1689static const char *func_name(intptr_t a)
1690{
1691 int i;
1692 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1693 if (function_names[i].addr == a)
1694 return function_names[i].name;
1695 return "";
1696}
1697#else
1698#define func_name(x) ""
1699#endif
1700
e2b5e7aa 1701static void emit_call(int a)
57871462 1702{
dd114d7d 1703 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1704 u_int offset=genjmp(a);
1705 output_w32(0xeb000000|offset);
1706}
e2b5e7aa 1707
1708static void emit_jmp(int a)
57871462 1709{
dd114d7d 1710 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1711 u_int offset=genjmp(a);
1712 output_w32(0xea000000|offset);
1713}
e2b5e7aa 1714
1715static void emit_jne(int a)
57871462 1716{
1717 assem_debug("bne %x\n",a);
1718 u_int offset=genjmp(a);
1719 output_w32(0x1a000000|offset);
1720}
e2b5e7aa 1721
1722static void emit_jeq(int a)
57871462 1723{
1724 assem_debug("beq %x\n",a);
1725 u_int offset=genjmp(a);
1726 output_w32(0x0a000000|offset);
1727}
e2b5e7aa 1728
1729static void emit_js(int a)
57871462 1730{
1731 assem_debug("bmi %x\n",a);
1732 u_int offset=genjmp(a);
1733 output_w32(0x4a000000|offset);
1734}
e2b5e7aa 1735
1736static void emit_jns(int a)
57871462 1737{
1738 assem_debug("bpl %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0x5a000000|offset);
1741}
e2b5e7aa 1742
1743static void emit_jl(int a)
57871462 1744{
1745 assem_debug("blt %x\n",a);
1746 u_int offset=genjmp(a);
1747 output_w32(0xba000000|offset);
1748}
e2b5e7aa 1749
1750static void emit_jge(int a)
57871462 1751{
1752 assem_debug("bge %x\n",a);
1753 u_int offset=genjmp(a);
1754 output_w32(0xaa000000|offset);
1755}
e2b5e7aa 1756
1757static void emit_jno(int a)
57871462 1758{
1759 assem_debug("bvc %x\n",a);
1760 u_int offset=genjmp(a);
1761 output_w32(0x7a000000|offset);
1762}
e2b5e7aa 1763
1764static void emit_jc(int a)
57871462 1765{
1766 assem_debug("bcs %x\n",a);
1767 u_int offset=genjmp(a);
1768 output_w32(0x2a000000|offset);
1769}
e2b5e7aa 1770
1771static void emit_jcc(int a)
57871462 1772{
1773 assem_debug("bcc %x\n",a);
1774 u_int offset=genjmp(a);
1775 output_w32(0x3a000000|offset);
1776}
1777
e2b5e7aa 1778static void emit_callreg(u_int r)
57871462 1779{
c6c3b1b3 1780 assert(r<15);
1781 assem_debug("blx %s\n",regname[r]);
1782 output_w32(0xe12fff30|r);
57871462 1783}
e2b5e7aa 1784
1785static void emit_jmpreg(u_int r)
57871462 1786{
1787 assem_debug("mov pc,%s\n",regname[r]);
1788 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1789}
1790
e2b5e7aa 1791static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1792{
1793 assert(offset>-4096&&offset<4096);
1794 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1795 if(offset>=0) {
1796 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1797 }else{
1798 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1799 }
1800}
e2b5e7aa 1801
1802static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1803{
1804 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1805 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1806}
e2b5e7aa 1807
1808static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1809{
1810 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1811 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1812}
e2b5e7aa 1813
1814static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1815{
1816 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1818}
e2b5e7aa 1819
1820static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1821{
1822 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1824}
e2b5e7aa 1825
1826static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1827{
1828 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1829 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1830}
e2b5e7aa 1831
1832static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1833{
1834 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1835 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1836}
e2b5e7aa 1837
1838static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1839{
1840 if(map<0) emit_readword_indexed(addr, rs, rt);
1841 else {
1842 assert(addr==0);
1843 emit_readword_dualindexedx4(rs, map, rt);
1844 }
1845}
e2b5e7aa 1846
1847static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1848{
1849 if(map<0) {
1850 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1851 emit_readword_indexed(addr+4, rs, rl);
1852 }else{
1853 assert(rh!=rs);
1854 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1855 emit_addimm(map,1,map);
1856 emit_readword_indexed_tlb(addr, rs, map, rl);
1857 }
1858}
e2b5e7aa 1859
1860static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1861{
1862 assert(offset>-256&&offset<256);
1863 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1864 if(offset>=0) {
1865 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1866 }else{
1867 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1868 }
1869}
e2b5e7aa 1870
1871static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1872{
1873 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1874 else {
1875 if(addr==0) {
1876 emit_shlimm(map,2,map);
1877 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1878 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1879 }else{
1880 assert(addr>-256&&addr<256);
1881 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1882 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1883 emit_movsbl_indexed(addr, rt, rt);
1884 }
1885 }
1886}
e2b5e7aa 1887
1888static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1889{
1890 assert(offset>-256&&offset<256);
1891 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1892 if(offset>=0) {
1893 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1894 }else{
1895 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1896 }
1897}
e2b5e7aa 1898
1899static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1900{
1901 assert(offset>-4096&&offset<4096);
1902 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1903 if(offset>=0) {
1904 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1905 }else{
1906 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1907 }
1908}
e2b5e7aa 1909
1910static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1911{
1912 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1914}
e2b5e7aa 1915
1916static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1917{
1918 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1919 else {
1920 if(addr==0) {
1921 emit_movzbl_dualindexedx4(rs, map, rt);
1922 }else{
1923 emit_addimm(rs,addr,rt);
1924 emit_movzbl_dualindexedx4(rt, map, rt);
1925 }
1926 }
1927}
e2b5e7aa 1928
1929static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1930{
1931 assert(offset>-256&&offset<256);
1932 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1933 if(offset>=0) {
1934 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1935 }else{
1936 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1937 }
1938}
e2b5e7aa 1939
054175e9 1940static void emit_ldrd(int offset, int rs, int rt)
1941{
1942 assert(offset>-256&&offset<256);
1943 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1944 if(offset>=0) {
1945 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1946 }else{
1947 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1948 }
1949}
e2b5e7aa 1950
1951static void emit_readword(int addr, int rt)
57871462 1952{
1953 u_int offset = addr-(u_int)&dynarec_local;
1954 assert(offset<4096);
1955 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1956 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1957}
e2b5e7aa 1958
1959static unused void emit_movsbl(int addr, int rt)
57871462 1960{
1961 u_int offset = addr-(u_int)&dynarec_local;
1962 assert(offset<256);
1963 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1964 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1965}
e2b5e7aa 1966
1967static unused void emit_movswl(int addr, int rt)
57871462 1968{
1969 u_int offset = addr-(u_int)&dynarec_local;
1970 assert(offset<256);
1971 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1972 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1973}
e2b5e7aa 1974
1975static unused void emit_movzbl(int addr, int rt)
57871462 1976{
1977 u_int offset = addr-(u_int)&dynarec_local;
1978 assert(offset<4096);
1979 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1980 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1981}
e2b5e7aa 1982
1983static unused void emit_movzwl(int addr, int rt)
57871462 1984{
1985 u_int offset = addr-(u_int)&dynarec_local;
1986 assert(offset<256);
1987 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1988 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1989}
57871462 1990
e2b5e7aa 1991static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1992{
1993 assert(offset>-4096&&offset<4096);
1994 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1995 if(offset>=0) {
1996 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1997 }else{
1998 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1999 }
2000}
e2b5e7aa 2001
2002static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 2003{
2004 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2005 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2006}
e2b5e7aa 2007
2008static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2009{
2010 if(map<0) emit_writeword_indexed(rt, addr, rs);
2011 else {
2012 assert(addr==0);
2013 emit_writeword_dualindexedx4(rt, rs, map);
2014 }
2015}
e2b5e7aa 2016
2017static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 2018{
2019 if(map<0) {
2020 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2021 emit_writeword_indexed(rl, addr+4, rs);
2022 }else{
2023 assert(rh>=0);
2024 if(temp!=rs) emit_addimm(map,1,temp);
2025 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2026 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2027 else {
2028 emit_addimm(rs,4,rs);
2029 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2030 }
2031 }
2032}
e2b5e7aa 2033
2034static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 2035{
2036 assert(offset>-256&&offset<256);
2037 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2038 if(offset>=0) {
2039 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2040 }else{
2041 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2042 }
2043}
e2b5e7aa 2044
2045static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 2046{
2047 assert(offset>-4096&&offset<4096);
2048 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2049 if(offset>=0) {
2050 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2051 }else{
2052 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2053 }
2054}
e2b5e7aa 2055
2056static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 2057{
2058 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2059 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2060}
e2b5e7aa 2061
2062static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2063{
2064 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2065 else {
2066 if(addr==0) {
2067 emit_writebyte_dualindexedx4(rt, rs, map);
2068 }else{
2069 emit_addimm(rs,addr,temp);
2070 emit_writebyte_dualindexedx4(rt, temp, map);
2071 }
2072 }
2073}
e2b5e7aa 2074
2075static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2076{
2077 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2079}
e2b5e7aa 2080
2081static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2082{
2083 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2084 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2085}
e2b5e7aa 2086
2087static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2088{
2089 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2090 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2091}
e2b5e7aa 2092
2093static void emit_writeword(int rt, int addr)
57871462 2094{
2095 u_int offset = addr-(u_int)&dynarec_local;
2096 assert(offset<4096);
2097 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2098 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2099}
e2b5e7aa 2100
2101static unused void emit_writehword(int rt, int addr)
57871462 2102{
2103 u_int offset = addr-(u_int)&dynarec_local;
2104 assert(offset<256);
2105 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2106 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2107}
e2b5e7aa 2108
2109static unused void emit_writebyte(int rt, int addr)
57871462 2110{
2111 u_int offset = addr-(u_int)&dynarec_local;
2112 assert(offset<4096);
74426039 2113 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2114 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2115}
57871462 2116
e2b5e7aa 2117static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2118{
2119 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2120 assert(rs1<16);
2121 assert(rs2<16);
2122 assert(hi<16);
2123 assert(lo<16);
2124 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2125}
e2b5e7aa 2126
2127static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2128{
2129 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2130 assert(rs1<16);
2131 assert(rs2<16);
2132 assert(hi<16);
2133 assert(lo<16);
2134 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2135}
2136
e2b5e7aa 2137static void emit_clz(int rs,int rt)
57871462 2138{
2139 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2140 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2141}
2142
e2b5e7aa 2143static void emit_subcs(int rs1,int rs2,int rt)
57871462 2144{
2145 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2146 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2147}
2148
e2b5e7aa 2149static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2150{
2151 assert(imm>0);
2152 assert(imm<32);
2153 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2154 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2155}
2156
e2b5e7aa 2157static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2158{
2159 assert(imm>0);
2160 assert(imm<32);
2161 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2162 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2163}
2164
e2b5e7aa 2165static void emit_negmi(int rs, int rt)
57871462 2166{
2167 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2168 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2169}
2170
e2b5e7aa 2171static void emit_negsmi(int rs, int rt)
57871462 2172{
2173 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2174 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2175}
2176
e2b5e7aa 2177static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2178{
2179 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2180 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2181}
2182
e2b5e7aa 2183static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2184{
2185 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2186 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2187}
2188
e2b5e7aa 2189static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2190{
2191 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2192 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2193}
2194
e2b5e7aa 2195static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2196{
2197 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2198 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2199}
2200
e2b5e7aa 2201static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2202{
2203 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2204 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2205}
2206
e2b5e7aa 2207static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2208{
2209 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2210 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2211}
2212
e2b5e7aa 2213static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2214{
2215 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2216 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2217}
2218
e2b5e7aa 2219static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2220{
2221 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2223}
2224
e2b5e7aa 2225static void emit_teq(int rs, int rt)
57871462 2226{
2227 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2228 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2229}
2230
e2b5e7aa 2231static void emit_rsbimm(int rs, int imm, int rt)
57871462 2232{
2233 u_int armval;
cfbd3c6e 2234 genimm_checked(imm,&armval);
57871462 2235 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2236 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2237}
2238
2239// Load 2 immediates optimizing for small code size
e2b5e7aa 2240static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2241{
2242 emit_movimm(imm1,rt1);
2243 u_int armval;
2244 if(genimm(imm2-imm1,&armval)) {
2245 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2246 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2247 }else if(genimm(imm1-imm2,&armval)) {
2248 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2249 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2250 }
2251 else emit_movimm(imm2,rt2);
2252}
2253
2254// Conditionally select one of two immediates, optimizing for small code size
2255// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2256static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2257{
2258 u_int armval;
2259 if(genimm(imm2-imm1,&armval)) {
2260 emit_movimm(imm1,rt);
2261 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2262 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2263 }else if(genimm(imm1-imm2,&armval)) {
2264 emit_movimm(imm1,rt);
2265 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2266 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2267 }
2268 else {
665f33e1 2269 #ifndef HAVE_ARMV7
57871462 2270 emit_movimm(imm1,rt);
2271 add_literal((int)out,imm2);
2272 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2273 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2274 #else
2275 emit_movw(imm1&0x0000FFFF,rt);
2276 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2277 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2278 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2279 }
2280 emit_movt(imm1&0xFFFF0000,rt);
2281 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2282 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2283 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2284 }
2285 #endif
2286 }
2287}
2288
57871462 2289// special case for checking invalid_code
e2b5e7aa 2290static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2291{
2292 assert(imm<128&&imm>=0);
2293 assert(r>=0&&r<16);
2294 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2295 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2296 emit_cmpimm(HOST_TEMPREG,imm);
2297}
2298
e2b5e7aa 2299static void emit_callne(int a)
0bbd1454 2300{
2301 assem_debug("blne %x\n",a);
2302 u_int offset=genjmp(a);
2303 output_w32(0x1b000000|offset);
2304}
2305
57871462 2306// Used to preload hash table entries
e2b5e7aa 2307static unused void emit_prefetchreg(int r)
57871462 2308{
2309 assem_debug("pld %s\n",regname[r]);
2310 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2311}
2312
2313// Special case for mini_ht
e2b5e7aa 2314static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2315{
2316 assert(offset<4096);
2317 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2318 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2319}
2320
e2b5e7aa 2321static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2322{
2323 u_int armval;
cfbd3c6e 2324 genimm_checked(imm,&armval);
57871462 2325 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2326 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2327}
2328
e2b5e7aa 2329static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2330{
2331 u_int armval;
cfbd3c6e 2332 genimm_checked(imm,&armval);
57871462 2333 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2334 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2335}
2336
e2b5e7aa 2337static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2338{
2339 u_int armval;
cfbd3c6e 2340 genimm_checked(imm,&armval);
57871462 2341 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2342 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2343}
2344
e2b5e7aa 2345static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2346{
2347 u_int armval;
cfbd3c6e 2348 genimm_checked(imm,&armval);
57871462 2349 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2350 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2351}
2352
e2b5e7aa 2353static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2354{
2355 u_int armval;
cfbd3c6e 2356 genimm_checked(imm,&armval);
57871462 2357 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2358 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2359}
2360
e2b5e7aa 2361static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2362{
2363 u_int armval;
cfbd3c6e 2364 genimm_checked(imm,&armval);
b9b61529 2365 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2366 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2367}
2368
e2b5e7aa 2369static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2370{
2371 u_int armval;
cfbd3c6e 2372 genimm_checked(imm,&armval);
b9b61529 2373 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2374 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2375}
2376
e2b5e7aa 2377static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2378{
2379 u_int armval;
2380 genimm_checked(imm,&armval);
2381 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2382 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2383}
2384
e2b5e7aa 2385static void emit_jno_unlikely(int a)
57871462 2386{
2387 //emit_jno(a);
2388 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2389 output_w32(0x72800000|rd_rn_rm(15,15,0));
2390}
2391
054175e9 2392static void save_regs_all(u_int reglist)
57871462 2393{
054175e9 2394 int i;
57871462 2395 if(!reglist) return;
2396 assem_debug("stmia fp,{");
054175e9 2397 for(i=0;i<16;i++)
2398 if(reglist&(1<<i))
2399 assem_debug("r%d,",i);
57871462 2400 assem_debug("}\n");
2401 output_w32(0xe88b0000|reglist);
2402}
e2b5e7aa 2403
054175e9 2404static void restore_regs_all(u_int reglist)
57871462 2405{
054175e9 2406 int i;
57871462 2407 if(!reglist) return;
2408 assem_debug("ldmia fp,{");
054175e9 2409 for(i=0;i<16;i++)
2410 if(reglist&(1<<i))
2411 assem_debug("r%d,",i);
57871462 2412 assem_debug("}\n");
2413 output_w32(0xe89b0000|reglist);
2414}
e2b5e7aa 2415
054175e9 2416// Save registers before function call
2417static void save_regs(u_int reglist)
2418{
4d646738 2419 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2420 save_regs_all(reglist);
2421}
e2b5e7aa 2422
054175e9 2423// Restore registers after function call
2424static void restore_regs(u_int reglist)
2425{
4d646738 2426 reglist&=CALLER_SAVE_REGS;
054175e9 2427 restore_regs_all(reglist);
2428}
57871462 2429
57871462 2430/* Stubs/epilogue */
2431
e2b5e7aa 2432static void literal_pool(int n)
57871462 2433{
2434 if(!literalcount) return;
2435 if(n) {
2436 if((int)out-literals[0][0]<4096-n) return;
2437 }
2438 u_int *ptr;
2439 int i;
2440 for(i=0;i<literalcount;i++)
2441 {
77750690 2442 u_int l_addr=(u_int)out;
2443 int j;
2444 for(j=0;j<i;j++) {
2445 if(literals[j][1]==literals[i][1]) {
2446 //printf("dup %08x\n",literals[i][1]);
2447 l_addr=literals[j][0];
2448 break;
2449 }
2450 }
57871462 2451 ptr=(u_int *)literals[i][0];
77750690 2452 u_int offset=l_addr-(u_int)ptr-8;
57871462 2453 assert(offset<4096);
2454 assert(!(offset&3));
2455 *ptr|=offset;
77750690 2456 if(l_addr==(u_int)out) {
2457 literals[i][0]=l_addr; // remember for dupes
2458 output_w32(literals[i][1]);
2459 }
57871462 2460 }
2461 literalcount=0;
2462}
2463
e2b5e7aa 2464static void literal_pool_jumpover(int n)
57871462 2465{
2466 if(!literalcount) return;
2467 if(n) {
2468 if((int)out-literals[0][0]<4096-n) return;
2469 }
2470 int jaddr=(int)out;
2471 emit_jmp(0);
2472 literal_pool(0);
2473 set_jump_target(jaddr,(int)out);
2474}
2475
e2b5e7aa 2476static void emit_extjump2(u_int addr, int target, int linker)
57871462 2477{
2478 u_char *ptr=(u_char *)addr;
2479 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2480 (void)ptr;
2481
57871462 2482 emit_loadlp(target,0);
2483 emit_loadlp(addr,1);
24385cae 2484 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2485 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2486//DEBUG >
2487#ifdef DEBUG_CYCLE_COUNT
2488 emit_readword((int)&last_count,ECX);
2489 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2490 emit_readword((int)&next_interupt,ECX);
2491 emit_writeword(HOST_CCREG,(int)&Count);
2492 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2493 emit_writeword(ECX,(int)&last_count);
2494#endif
2495//DEBUG <
2496 emit_jmp(linker);
2497}
2498
e2b5e7aa 2499static void emit_extjump(int addr, int target)
57871462 2500{
2501 emit_extjump2(addr, target, (int)dyna_linker);
2502}
e2b5e7aa 2503
2504static void emit_extjump_ds(int addr, int target)
57871462 2505{
2506 emit_extjump2(addr, target, (int)dyna_linker_ds);
2507}
2508
13e35c04 2509// put rt_val into rt, potentially making use of rs with value rs_val
2510static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2511{
8575a877 2512 u_int armval;
2513 int diff;
2514 if(genimm(rt_val,&armval)) {
2515 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2516 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2517 return;
2518 }
2519 if(genimm(~rt_val,&armval)) {
2520 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2521 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2522 return;
2523 }
2524 diff=rt_val-rs_val;
2525 if(genimm(diff,&armval)) {
2526 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2527 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2528 return;
2529 }else if(genimm(-diff,&armval)) {
2530 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2531 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2532 return;
2533 }
2534 emit_movimm(rt_val,rt);
2535}
2536
2537// return 1 if above function can do it's job cheaply
2538static int is_similar_value(u_int v1,u_int v2)
2539{
13e35c04 2540 u_int xs;
8575a877 2541 int diff;
2542 if(v1==v2) return 1;
2543 diff=v2-v1;
2544 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2545 ;
8575a877 2546 if(xs<0x100) return 1;
2547 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2548 ;
2549 if(xs<0x100) return 1;
2550 return 0;
13e35c04 2551}
cbbab9cd 2552
b96d3df7 2553// trashes r2
2554static void pass_args(int a0, int a1)
2555{
2556 if(a0==1&&a1==0) {
2557 // must swap
2558 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2559 }
2560 else if(a0!=0&&a1==0) {
2561 emit_mov(a1,1);
2562 if (a0>=0) emit_mov(a0,0);
2563 }
2564 else {
2565 if(a0>=0&&a0!=0) emit_mov(a0,0);
2566 if(a1>=0&&a1!=1) emit_mov(a1,1);
2567 }
2568}
2569
b1be1eee 2570static void mov_loadtype_adj(int type,int rs,int rt)
2571{
2572 switch(type) {
2573 case LOADB_STUB: emit_signextend8(rs,rt); break;
2574 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2575 case LOADH_STUB: emit_signextend16(rs,rt); break;
2576 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2577 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2578 default: assert(0);
2579 }
2580}
2581
b1be1eee 2582#include "pcsxmem.h"
2583#include "pcsxmem_inline.c"
b1be1eee 2584
e2b5e7aa 2585static void do_readstub(int n)
57871462 2586{
2587 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2588 literal_pool(256);
2589 set_jump_target(stubs[n][1],(int)out);
2590 int type=stubs[n][0];
2591 int i=stubs[n][3];
2592 int rs=stubs[n][4];
2593 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2594 u_int reglist=stubs[n][7];
2595 signed char *i_regmap=i_regs->regmap;
581335b0 2596 int rt;
b9b61529 2597 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2598 rt=get_reg(i_regmap,FTEMP);
2599 }else{
57871462 2600 rt=get_reg(i_regmap,rt1[i]);
2601 }
2602 assert(rs>=0);
c6c3b1b3 2603 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2604 reglist|=(1<<rs);
2605 for(r=0;r<=12;r++) {
2606 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2607 temp=r; break;
2608 }
2609 }
db829eeb 2610 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2611 reglist&=~(1<<rt);
2612 if(temp==-1) {
2613 save_regs(reglist);
2614 regs_saved=1;
2615 temp=(rs==0)?2:0;
2616 }
2617 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2618 temp2=1;
2619 emit_readword((int)&mem_rtab,temp);
2620 emit_shrimm(rs,12,temp2);
2621 emit_readword_dualindexedx4(temp,temp2,temp2);
2622 emit_lsls_imm(temp2,1,temp2);
2623 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2624 switch(type) {
2625 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2626 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2627 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2628 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2629 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2630 }
2631 }
2632 if(regs_saved) {
2633 restore_jump=(int)out;
2634 emit_jcc(0); // jump to reg restore
2635 }
2636 else
2637 emit_jcc(stubs[n][2]); // return address
2638
2639 if(!regs_saved)
2640 save_regs(reglist);
2641 int handler=0;
2642 if(type==LOADB_STUB||type==LOADBU_STUB)
2643 handler=(int)jump_handler_read8;
2644 if(type==LOADH_STUB||type==LOADHU_STUB)
2645 handler=(int)jump_handler_read16;
2646 if(type==LOADW_STUB)
2647 handler=(int)jump_handler_read32;
2648 assert(handler!=0);
b96d3df7 2649 pass_args(rs,temp2);
c6c3b1b3 2650 int cc=get_reg(i_regmap,CCREG);
2651 if(cc<0)
2652 emit_loadreg(CCREG,2);
2573466a 2653 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2654 emit_call(handler);
2655 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2656 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2657 }
2658 if(restore_jump)
2659 set_jump_target(restore_jump,(int)out);
2660 restore_regs(reglist);
2661 emit_jmp(stubs[n][2]); // return address
57871462 2662}
2663
c6c3b1b3 2664// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2665static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2666{
2667 u_int l1,l2=0;
2668 l1=((u_int *)table)[addr>>12];
2669 if((l1&(1<<31))==0) {
2670 u_int v=l1<<1;
2671 *addr_host=v+addr;
2672 return 0;
2673 }
2674 else {
2675 l1<<=1;
2676 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2677 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2678 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2679 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2680 else
2681 l2=((u_int *)l1)[(addr&0xfff)/4];
2682 if((l2&(1<<31))==0) {
2683 u_int v=l2<<1;
2684 *addr_host=v+(addr&0xfff);
2685 return 0;
2686 }
2687 return l2<<1;
2688 }
2689}
c6c3b1b3 2690
e2b5e7aa 2691static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2692{
2693 int rs=get_reg(regmap,target);
57871462 2694 int rt=get_reg(regmap,target);
535d208a 2695 if(rs<0) rs=get_reg(regmap,-1);
57871462 2696 assert(rs>=0);
b1be1eee 2697 u_int handler,host_addr=0,is_dynamic,far_call=0;
2698 int cc=get_reg(regmap,CCREG);
2699 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2700 return;
c6c3b1b3 2701 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2702 if (handler==0) {
db829eeb 2703 if(rt<0||rt1[i]==0)
c6c3b1b3 2704 return;
13e35c04 2705 if(addr!=host_addr)
2706 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2707 switch(type) {
2708 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2709 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2710 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2711 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2712 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2713 default: assert(0);
2714 }
2715 return;
2716 }
b1be1eee 2717 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2718 if(is_dynamic) {
2719 if(type==LOADB_STUB||type==LOADBU_STUB)
2720 handler=(int)jump_handler_read8;
2721 if(type==LOADH_STUB||type==LOADHU_STUB)
2722 handler=(int)jump_handler_read16;
2723 if(type==LOADW_STUB)
2724 handler=(int)jump_handler_read32;
2725 }
c6c3b1b3 2726
2727 // call a memhandler
db829eeb 2728 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2729 reglist&=~(1<<rt);
2730 save_regs(reglist);
2731 if(target==0)
2732 emit_movimm(addr,0);
2733 else if(rs!=0)
2734 emit_mov(rs,0);
c6c3b1b3 2735 int offset=(int)handler-(int)out-8;
2736 if(offset<-33554432||offset>=33554432) {
2737 // unreachable memhandler, a plugin func perhaps
b1be1eee 2738 emit_movimm(handler,12);
2739 far_call=1;
2740 }
2741 if(cc<0)
2742 emit_loadreg(CCREG,2);
2743 if(is_dynamic) {
2744 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2745 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2746 }
b1be1eee 2747 else {
2748 emit_readword((int)&last_count,3);
2749 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2750 emit_add(2,3,2);
2751 emit_writeword(2,(int)&Count);
2752 }
2753
2754 if(far_call)
2755 emit_callreg(12);
c6c3b1b3 2756 else
2757 emit_call(handler);
b1be1eee 2758
db829eeb 2759 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2760 switch(type) {
2761 case LOADB_STUB: emit_signextend8(0,rt); break;
2762 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2763 case LOADH_STUB: emit_signextend16(0,rt); break;
2764 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2765 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2766 default: assert(0);
2767 }
2768 }
2769 restore_regs(reglist);
57871462 2770}
2771
e2b5e7aa 2772static void do_writestub(int n)
57871462 2773{
2774 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2775 literal_pool(256);
2776 set_jump_target(stubs[n][1],(int)out);
2777 int type=stubs[n][0];
2778 int i=stubs[n][3];
2779 int rs=stubs[n][4];
2780 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2781 u_int reglist=stubs[n][7];
2782 signed char *i_regmap=i_regs->regmap;
581335b0 2783 int rt,r;
b9b61529 2784 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2785 rt=get_reg(i_regmap,r=FTEMP);
2786 }else{
57871462 2787 rt=get_reg(i_regmap,r=rs2[i]);
2788 }
2789 assert(rs>=0);
2790 assert(rt>=0);
b96d3df7 2791 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2792 int reglist2=reglist|(1<<rs)|(1<<rt);
2793 for(rtmp=0;rtmp<=12;rtmp++) {
2794 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2795 temp=rtmp; break;
2796 }
2797 }
2798 if(temp==-1) {
2799 save_regs(reglist);
2800 regs_saved=1;
2801 for(rtmp=0;rtmp<=3;rtmp++)
2802 if(rtmp!=rs&&rtmp!=rt)
2803 {temp=rtmp;break;}
2804 }
2805 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2806 temp2=3;
2807 emit_readword((int)&mem_wtab,temp);
2808 emit_shrimm(rs,12,temp2);
2809 emit_readword_dualindexedx4(temp,temp2,temp2);
2810 emit_lsls_imm(temp2,1,temp2);
2811 switch(type) {
2812 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2813 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2814 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2815 default: assert(0);
2816 }
2817 if(regs_saved) {
2818 restore_jump=(int)out;
2819 emit_jcc(0); // jump to reg restore
2820 }
2821 else
2822 emit_jcc(stubs[n][2]); // return address (invcode check)
2823
2824 if(!regs_saved)
2825 save_regs(reglist);
2826 int handler=0;
2827 switch(type) {
2828 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2829 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2830 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2831 }
2832 assert(handler!=0);
2833 pass_args(rs,rt);
2834 if(temp2!=3)
2835 emit_mov(temp2,3);
2836 int cc=get_reg(i_regmap,CCREG);
2837 if(cc<0)
2838 emit_loadreg(CCREG,2);
2573466a 2839 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2840 // returns new cycle_count
2841 emit_call(handler);
2573466a 2842 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2843 if(cc<0)
2844 emit_storereg(CCREG,2);
2845 if(restore_jump)
2846 set_jump_target(restore_jump,(int)out);
2847 restore_regs(reglist);
2848 ra=stubs[n][2];
b96d3df7 2849 emit_jmp(ra);
57871462 2850}
2851
e2b5e7aa 2852static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2853{
2854 int rs=get_reg(regmap,-1);
57871462 2855 int rt=get_reg(regmap,target);
2856 assert(rs>=0);
2857 assert(rt>=0);
b96d3df7 2858 u_int handler,host_addr=0;
b96d3df7 2859 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2860 if (handler==0) {
13e35c04 2861 if(addr!=host_addr)
2862 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2863 switch(type) {
2864 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2865 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2866 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2867 default: assert(0);
2868 }
2869 return;
2870 }
2871
2872 // call a memhandler
2873 save_regs(reglist);
13e35c04 2874 pass_args(rs,rt);
b96d3df7 2875 int cc=get_reg(regmap,CCREG);
2876 if(cc<0)
2877 emit_loadreg(CCREG,2);
2573466a 2878 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2879 emit_movimm(handler,3);
2880 // returns new cycle_count
2881 emit_call((int)jump_handler_write_h);
2573466a 2882 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2883 if(cc<0)
2884 emit_storereg(CCREG,2);
2885 restore_regs(reglist);
57871462 2886}
2887
e2b5e7aa 2888static void do_unalignedwritestub(int n)
57871462 2889{
b7918751 2890 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2891 literal_pool(256);
57871462 2892 set_jump_target(stubs[n][1],(int)out);
b7918751 2893
2894 int i=stubs[n][3];
2895 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2896 int addr=stubs[n][5];
2897 u_int reglist=stubs[n][7];
2898 signed char *i_regmap=i_regs->regmap;
2899 int temp2=get_reg(i_regmap,FTEMP);
2900 int rt;
b7918751 2901 rt=get_reg(i_regmap,rs2[i]);
2902 assert(rt>=0);
2903 assert(addr>=0);
2904 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2905 reglist|=(1<<addr);
2906 reglist&=~(1<<temp2);
2907
b96d3df7 2908#if 1
2909 // don't bother with it and call write handler
2910 save_regs(reglist);
2911 pass_args(addr,rt);
2912 int cc=get_reg(i_regmap,CCREG);
2913 if(cc<0)
2914 emit_loadreg(CCREG,2);
2573466a 2915 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2916 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2917 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2918 if(cc<0)
2919 emit_storereg(CCREG,2);
2920 restore_regs(reglist);
2921 emit_jmp(stubs[n][2]); // return address
2922#else
b7918751 2923 emit_andimm(addr,0xfffffffc,temp2);
2924 emit_writeword(temp2,(int)&address);
2925
2926 save_regs(reglist);
b7918751 2927 emit_shrimm(addr,16,1);
2928 int cc=get_reg(i_regmap,CCREG);
2929 if(cc<0) {
2930 emit_loadreg(CCREG,2);
2931 }
2932 emit_movimm((u_int)readmem,0);
2933 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2934 emit_call((int)&indirect_jump_indexed);
2935 restore_regs(reglist);
2936
2937 emit_readword((int)&readmem_dword,temp2);
2938 int temp=addr; //hmh
2939 emit_shlimm(addr,3,temp);
2940 emit_andimm(temp,24,temp);
2941#ifdef BIG_ENDIAN_MIPS
2942 if (opcode[i]==0x2e) // SWR
2943#else
2944 if (opcode[i]==0x2a) // SWL
2945#endif
2946 emit_xorimm(temp,24,temp);
2947 emit_movimm(-1,HOST_TEMPREG);
55439448 2948 if (opcode[i]==0x2a) { // SWL
b7918751 2949 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2950 emit_orrshr(rt,temp,temp2);
2951 }else{
2952 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshl(rt,temp,temp2);
2954 }
2955 emit_readword((int)&address,addr);
2956 emit_writeword(temp2,(int)&word);
2957 //save_regs(reglist); // don't need to, no state changes
2958 emit_shrimm(addr,16,1);
2959 emit_movimm((u_int)writemem,0);
2960 //emit_call((int)&indirect_jump_indexed);
2961 emit_mov(15,14);
2962 emit_readword_dualindexedx4(0,1,15);
2963 emit_readword((int)&Count,HOST_TEMPREG);
2964 emit_readword((int)&next_interupt,2);
2965 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2966 emit_writeword(2,(int)&last_count);
2967 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2968 if(cc<0) {
2969 emit_storereg(CCREG,HOST_TEMPREG);
2970 }
2971 restore_regs(reglist);
57871462 2972 emit_jmp(stubs[n][2]); // return address
b96d3df7 2973#endif
57871462 2974}
2975
e2b5e7aa 2976static void do_invstub(int n)
57871462 2977{
2978 literal_pool(20);
2979 u_int reglist=stubs[n][3];
2980 set_jump_target(stubs[n][1],(int)out);
2981 save_regs(reglist);
2982 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2983 emit_call((int)&invalidate_addr);
2984 restore_regs(reglist);
2985 emit_jmp(stubs[n][2]); // return address
2986}
2987
2988int do_dirty_stub(int i)
2989{
2990 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2991 u_int addr=(u_int)source;
57871462 2992 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2993 #ifndef HAVE_ARMV7
ac545b3a 2994 emit_loadlp(addr,1);
57871462 2995 emit_loadlp((int)copy,2);
2996 emit_loadlp(slen*4,3);
2997 #else
ac545b3a 2998 emit_movw(addr&0x0000FFFF,1);
57871462 2999 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3000 emit_movt(addr&0xFFFF0000,1);
57871462 3001 emit_movt(((u_int)copy)&0xFFFF0000,2);
3002 emit_movw(slen*4,3);
3003 #endif
3004 emit_movimm(start+i*4,0);
3005 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3006 int entry=(int)out;
3007 load_regs_entry(i);
3008 if(entry==(int)out) entry=instr_addr[i];
3009 emit_jmp(instr_addr[i]);
3010 return entry;
3011}
3012
e2b5e7aa 3013static void do_dirty_stub_ds()
57871462 3014{
3015 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3016 #ifndef HAVE_ARMV7
57871462 3017 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3018 emit_loadlp((int)copy,2);
3019 emit_loadlp(slen*4,3);
3020 #else
3021 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3022 emit_movw(((u_int)copy)&0x0000FFFF,2);
3023 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3024 emit_movt(((u_int)copy)&0xFFFF0000,2);
3025 emit_movw(slen*4,3);
3026 #endif
3027 emit_movimm(start+1,0);
3028 emit_call((int)&verify_code_ds);
3029}
3030
e2b5e7aa 3031static void do_cop1stub(int n)
57871462 3032{
3033 literal_pool(256);
3034 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3035 set_jump_target(stubs[n][1],(int)out);
3036 int i=stubs[n][3];
3d624f89 3037// int rs=stubs[n][4];
57871462 3038 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3039 int ds=stubs[n][6];
3040 if(!ds) {
3041 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3042 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3043 }
3044 //else {printf("fp exception in delay slot\n");}
3045 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3046 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3047 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3048 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3049 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3050}
3051
57871462 3052/* Special assem */
3053
e2b5e7aa 3054static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 3055{
3056 if(rt1[i]) {
3057 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3058 {
3059 signed char s,t,shift;
3060 t=get_reg(i_regs->regmap,rt1[i]);
3061 s=get_reg(i_regs->regmap,rs1[i]);
3062 shift=get_reg(i_regs->regmap,rs2[i]);
3063 if(t>=0){
3064 if(rs1[i]==0)
3065 {
3066 emit_zeroreg(t);
3067 }
3068 else if(rs2[i]==0)
3069 {
3070 assert(s>=0);
3071 if(s!=t) emit_mov(s,t);
3072 }
3073 else
3074 {
3075 emit_andimm(shift,31,HOST_TEMPREG);
3076 if(opcode2[i]==4) // SLLV
3077 {
3078 emit_shl(s,HOST_TEMPREG,t);
3079 }
3080 if(opcode2[i]==6) // SRLV
3081 {
3082 emit_shr(s,HOST_TEMPREG,t);
3083 }
3084 if(opcode2[i]==7) // SRAV
3085 {
3086 emit_sar(s,HOST_TEMPREG,t);
3087 }
3088 }
3089 }
3090 } else { // DSLLV/DSRLV/DSRAV
3091 signed char sh,sl,th,tl,shift;
3092 th=get_reg(i_regs->regmap,rt1[i]|64);
3093 tl=get_reg(i_regs->regmap,rt1[i]);
3094 sh=get_reg(i_regs->regmap,rs1[i]|64);
3095 sl=get_reg(i_regs->regmap,rs1[i]);
3096 shift=get_reg(i_regs->regmap,rs2[i]);
3097 if(tl>=0){
3098 if(rs1[i]==0)
3099 {
3100 emit_zeroreg(tl);
3101 if(th>=0) emit_zeroreg(th);
3102 }
3103 else if(rs2[i]==0)
3104 {
3105 assert(sl>=0);
3106 if(sl!=tl) emit_mov(sl,tl);
3107 if(th>=0&&sh!=th) emit_mov(sh,th);
3108 }
3109 else
3110 {
3111 // FIXME: What if shift==tl ?
3112 assert(shift!=tl);
3113 int temp=get_reg(i_regs->regmap,-1);
3114 int real_th=th;
3115 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3116 assert(sl>=0);
3117 assert(sh>=0);
3118 emit_andimm(shift,31,HOST_TEMPREG);
3119 if(opcode2[i]==0x14) // DSLLV
3120 {
3121 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3122 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3123 emit_orrshr(sl,HOST_TEMPREG,th);
3124 emit_andimm(shift,31,HOST_TEMPREG);
3125 emit_testimm(shift,32);
3126 emit_shl(sl,HOST_TEMPREG,tl);
3127 if(th>=0) emit_cmovne_reg(tl,th);
3128 emit_cmovne_imm(0,tl);
3129 }
3130 if(opcode2[i]==0x16) // DSRLV
3131 {
3132 assert(th>=0);
3133 emit_shr(sl,HOST_TEMPREG,tl);
3134 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3135 emit_orrshl(sh,HOST_TEMPREG,tl);
3136 emit_andimm(shift,31,HOST_TEMPREG);
3137 emit_testimm(shift,32);
3138 emit_shr(sh,HOST_TEMPREG,th);
3139 emit_cmovne_reg(th,tl);
3140 if(real_th>=0) emit_cmovne_imm(0,th);
3141 }
3142 if(opcode2[i]==0x17) // DSRAV
3143 {
3144 assert(th>=0);
3145 emit_shr(sl,HOST_TEMPREG,tl);
3146 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3147 if(real_th>=0) {
3148 assert(temp>=0);
3149 emit_sarimm(th,31,temp);
3150 }
3151 emit_orrshl(sh,HOST_TEMPREG,tl);
3152 emit_andimm(shift,31,HOST_TEMPREG);
3153 emit_testimm(shift,32);
3154 emit_sar(sh,HOST_TEMPREG,th);
3155 emit_cmovne_reg(th,tl);
3156 if(real_th>=0) emit_cmovne_reg(temp,th);
3157 }
3158 }
3159 }
3160 }
3161 }
3162}
ffb0b9e0 3163
ffb0b9e0 3164static void speculate_mov(int rs,int rt)
3165{
3166 if(rt!=0) {
3167 smrv_strong_next|=1<<rt;
3168 smrv[rt]=smrv[rs];
3169 }
3170}
3171
3172static void speculate_mov_weak(int rs,int rt)
3173{
3174 if(rt!=0) {
3175 smrv_weak_next|=1<<rt;
3176 smrv[rt]=smrv[rs];
3177 }
3178}
3179
3180static void speculate_register_values(int i)
3181{
3182 if(i==0) {
3183 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3184 // gp,sp are likely to stay the same throughout the block
3185 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3186 smrv_weak_next=~smrv_strong_next;
3187 //printf(" llr %08x\n", smrv[4]);
3188 }
3189 smrv_strong=smrv_strong_next;
3190 smrv_weak=smrv_weak_next;
3191 switch(itype[i]) {
3192 case ALU:
3193 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3194 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3195 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3196 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3197 else {
3198 smrv_strong_next&=~(1<<rt1[i]);
3199 smrv_weak_next&=~(1<<rt1[i]);
3200 }
3201 break;
3202 case SHIFTIMM:
3203 smrv_strong_next&=~(1<<rt1[i]);
3204 smrv_weak_next&=~(1<<rt1[i]);
3205 // fallthrough
3206 case IMM16:
3207 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3208 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3209 if(hr>=0) {
3210 if(get_final_value(hr,i,&value))
3211 smrv[rt1[i]]=value;
3212 else smrv[rt1[i]]=constmap[i][hr];
3213 smrv_strong_next|=1<<rt1[i];
3214 }
3215 }
3216 else {
3217 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3218 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3219 }
3220 break;
3221 case LOAD:
3222 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3223 // special case for BIOS
3224 smrv[rt1[i]]=0xa0000000;
3225 smrv_strong_next|=1<<rt1[i];
3226 break;
3227 }
3228 // fallthrough
3229 case SHIFT:
3230 case LOADLR:
3231 case MOV:
3232 smrv_strong_next&=~(1<<rt1[i]);
3233 smrv_weak_next&=~(1<<rt1[i]);
3234 break;
3235 case COP0:
3236 case COP2:
3237 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3238 smrv_strong_next&=~(1<<rt1[i]);
3239 smrv_weak_next&=~(1<<rt1[i]);
3240 }
3241 break;
3242 case C2LS:
3243 if (opcode[i]==0x32) { // LWC2
3244 smrv_strong_next&=~(1<<rt1[i]);
3245 smrv_weak_next&=~(1<<rt1[i]);
3246 }
3247 break;
3248 }
3249#if 0
3250 int r=4;
3251 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3252 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3253#endif
3254}
3255
3256enum {
3257 MTYPE_8000 = 0,
3258 MTYPE_8020,
3259 MTYPE_0000,
3260 MTYPE_A000,
3261 MTYPE_1F80,
3262};
3263
3264static int get_ptr_mem_type(u_int a)
3265{
3266 if(a < 0x00200000) {
3267 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3268 // return wrong, must use memhandler for BIOS self-test to pass
3269 // 007 does similar stuff from a00 mirror, weird stuff
3270 return MTYPE_8000;
3271 return MTYPE_0000;
3272 }
3273 if(0x1f800000 <= a && a < 0x1f801000)
3274 return MTYPE_1F80;
3275 if(0x80200000 <= a && a < 0x80800000)
3276 return MTYPE_8020;
3277 if(0xa0000000 <= a && a < 0xa0200000)
3278 return MTYPE_A000;
3279 return MTYPE_8000;
3280}
ffb0b9e0 3281
3282static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3283{
581335b0 3284 int jaddr=0,type=0;
ffb0b9e0 3285 int mr=rs1[i];
3286 if(((smrv_strong|smrv_weak)>>mr)&1) {
3287 type=get_ptr_mem_type(smrv[mr]);
3288 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3289 }
3290 else {
3291 // use the mirror we are running on
3292 type=get_ptr_mem_type(start);
3293 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3294 }
3295
3296 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3297 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3298 addr=*addr_reg_override=HOST_TEMPREG;
3299 type=0;
3300 }
3301 else if(type==MTYPE_0000) { // RAM 0 mirror
3302 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3303 addr=*addr_reg_override=HOST_TEMPREG;
3304 type=0;
3305 }
3306 else if(type==MTYPE_A000) { // RAM A mirror
3307 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3308 addr=*addr_reg_override=HOST_TEMPREG;
3309 type=0;
3310 }
3311 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3312 if (psxH == (void *)0x1f800000) {
3313 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3314 emit_cmpimm(HOST_TEMPREG,0x1000);
3315 jaddr=(int)out;
3316 emit_jc(0);
3317 }
3318 else {
3319 // do usual RAM check, jump will go to the right handler
3320 type=0;
3321 }
ffb0b9e0 3322 }
ffb0b9e0 3323
3324 if(type==0)
3325 {
3326 emit_cmpimm(addr,RAM_SIZE);
3327 jaddr=(int)out;
3328 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3329 // Hint to branch predictor that the branch is unlikely to be taken
3330 if(rs1[i]>=28)
3331 emit_jno_unlikely(0);
3332 else
3333 #endif
3334 emit_jno(0);
a327ad27 3335 if(ram_offset!=0) {
3336 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3337 addr=*addr_reg_override=HOST_TEMPREG;
3338 }
ffb0b9e0 3339 }
3340
3341 return jaddr;
3342}
3343
57871462 3344#define shift_assemble shift_assemble_arm
3345
e2b5e7aa 3346static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3347{
3348 int s,th,tl,temp,temp2,addr,map=-1;
3349 int offset;
3350 int jaddr=0;
af4ee1fe 3351 int memtarget=0,c=0;
ffb0b9e0 3352 int fastload_reg_override=0;
57871462 3353 u_int hr,reglist=0;
3354 th=get_reg(i_regs->regmap,rt1[i]|64);
3355 tl=get_reg(i_regs->regmap,rt1[i]);
3356 s=get_reg(i_regs->regmap,rs1[i]);
3357 temp=get_reg(i_regs->regmap,-1);
3358 temp2=get_reg(i_regs->regmap,FTEMP);
3359 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3360 assert(addr<0);
3361 offset=imm[i];
3362 for(hr=0;hr<HOST_REGS;hr++) {
3363 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3364 }
3365 reglist|=1<<temp;
3366 if(offset||s<0||c) addr=temp2;
3367 else addr=s;
3368 if(s>=0) {
3369 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3370 if(c) {
3371 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3372 }
57871462 3373 }
1edfcc68 3374 if(!c) {
3375 #ifdef RAM_OFFSET
3376 map=get_reg(i_regs->regmap,ROREG);
3377 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3378 #endif
3379 emit_shlimm(addr,3,temp);
3380 if (opcode[i]==0x22||opcode[i]==0x26) {
3381 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3382 }else{
3383 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3384 }
1edfcc68 3385 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3386 }
3387 else {
3388 if(ram_offset&&memtarget) {
3389 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3390 fastload_reg_override=HOST_TEMPREG;
57871462 3391 }
1edfcc68 3392 if (opcode[i]==0x22||opcode[i]==0x26) {
3393 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3394 }else{
1edfcc68 3395 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3396 }
535d208a 3397 }
3398 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3399 if(!c||memtarget) {
ffb0b9e0 3400 int a=temp2;
3401 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3402 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3403 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3404 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3405 }
3406 else
3407 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3408 if(rt1[i]) {
3409 assert(tl>=0);
57871462 3410 emit_andimm(temp,24,temp);
2002a1db 3411#ifdef BIG_ENDIAN_MIPS
3412 if (opcode[i]==0x26) // LWR
3413#else
3414 if (opcode[i]==0x22) // LWL
3415#endif
3416 emit_xorimm(temp,24,temp);
57871462 3417 emit_movimm(-1,HOST_TEMPREG);
3418 if (opcode[i]==0x26) {
3419 emit_shr(temp2,temp,temp2);
3420 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3421 }else{
3422 emit_shl(temp2,temp,temp2);
3423 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3424 }
3425 emit_or(temp2,tl,tl);
57871462 3426 }
535d208a 3427 //emit_storereg(rt1[i],tl); // DEBUG
3428 }
3429 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3430 // FIXME: little endian, fastload_reg_override
535d208a 3431 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3432 if(!c||memtarget) {
3433 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3434 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3435 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3436 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3437 }
3438 else
3439 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3440 if(rt1[i]) {
3441 assert(th>=0);
3442 assert(tl>=0);
57871462 3443 emit_testimm(temp,32);
3444 emit_andimm(temp,24,temp);
3445 if (opcode[i]==0x1A) { // LDL
3446 emit_rsbimm(temp,32,HOST_TEMPREG);
3447 emit_shl(temp2h,temp,temp2h);
3448 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3449 emit_movimm(-1,HOST_TEMPREG);
3450 emit_shl(temp2,temp,temp2);
3451 emit_cmove_reg(temp2h,th);
3452 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3453 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3454 emit_orreq(temp2,tl,tl);
3455 emit_orrne(temp2,th,th);
3456 }
3457 if (opcode[i]==0x1B) { // LDR
3458 emit_xorimm(temp,24,temp);
3459 emit_rsbimm(temp,32,HOST_TEMPREG);
3460 emit_shr(temp2,temp,temp2);
3461 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3462 emit_movimm(-1,HOST_TEMPREG);
3463 emit_shr(temp2h,temp,temp2h);
3464 emit_cmovne_reg(temp2,tl);
3465 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3466 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3467 emit_orrne(temp2h,th,th);
3468 emit_orreq(temp2h,tl,tl);
3469 }
3470 }
3471 }
3472}
3473#define loadlr_assemble loadlr_assemble_arm
3474
e2b5e7aa 3475static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3476{
3477 if(opcode2[i]==0) // MFC0
3478 {
3479 signed char t=get_reg(i_regs->regmap,rt1[i]);
3480 char copr=(source[i]>>11)&0x1f;
3481 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3482 if(t>=0&&rt1[i]!=0) {
7139f3c8 3483 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3484 }
3485 }
3486 else if(opcode2[i]==4) // MTC0
3487 {
3488 signed char s=get_reg(i_regs->regmap,rs1[i]);
3489 char copr=(source[i]>>11)&0x1f;
3490 assert(s>=0);
63cb0298 3491 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3492 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3493 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3494 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3495 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3496 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3497 emit_writeword(HOST_CCREG,(int)&Count);
3498 }
3499 // What a mess. The status register (12) can enable interrupts,
3500 // so needs a special case to handle a pending interrupt.
3501 // The interrupt must be taken immediately, because a subsequent
3502 // instruction might disable interrupts again.
7139f3c8 3503 if(copr==12||copr==13) {
fca1aef2 3504 if (is_delayslot) {
3505 // burn cycles to cause cc_interrupt, which will
3506 // reschedule next_interupt. Relies on CCREG from above.
3507 assem_debug("MTC0 DS %d\n", copr);
3508 emit_writeword(HOST_CCREG,(int)&last_count);
3509 emit_movimm(0,HOST_CCREG);
3510 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3511 emit_loadreg(rs1[i],1);
fca1aef2 3512 emit_movimm(copr,0);
3513 emit_call((int)pcsx_mtc0_ds);
042c7287 3514 emit_loadreg(rs1[i],s);
fca1aef2 3515 return;
3516 }
63cb0298 3517 emit_movimm(start+i*4+4,HOST_TEMPREG);
3518 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3519 emit_movimm(0,HOST_TEMPREG);
3520 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3521 }
3522 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3523 //else
caeefe31 3524 if(s==HOST_CCREG)
3525 emit_loadreg(rs1[i],1);
3526 else if(s!=1)
63cb0298 3527 emit_mov(s,1);
fca1aef2 3528 emit_movimm(copr,0);
3529 emit_call((int)pcsx_mtc0);
7139f3c8 3530 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3531 emit_readword((int)&Count,HOST_CCREG);
042c7287 3532 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3533 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3534 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3535 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3536 emit_storereg(CCREG,HOST_CCREG);
3537 }
7139f3c8 3538 if(copr==12||copr==13) {
57871462 3539 assert(!is_delayslot);
3540 emit_readword((int)&pending_exception,14);
042c7287 3541 emit_test(14,14);
3542 emit_jne((int)&do_interrupt);
57871462 3543 }
3544 emit_loadreg(rs1[i],s);
3545 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3546 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3547 cop1_usable=0;
3548 }
3549 else
3550 {
3551 assert(opcode2[i]==0x10);
576bbd8f 3552 if((source[i]&0x3f)==0x10) // RFE
3553 {
3554 emit_readword((int)&Status,0);
3555 emit_andimm(0,0x3c,1);
3556 emit_andimm(0,~0xf,0);
3557 emit_orrshr_imm(1,2,0);
3558 emit_writeword(0,(int)&Status);
3559 }
57871462 3560 }
3561}
3562
b9b61529 3563static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3564{
3565 switch (copr) {
3566 case 1:
3567 case 3:
3568 case 5:
3569 case 8:
3570 case 9:
3571 case 10:
3572 case 11:
3573 emit_readword((int)&reg_cop2d[copr],tl);
3574 emit_signextend16(tl,tl);
3575 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3576 break;
3577 case 7:
3578 case 16:
3579 case 17:
3580 case 18:
3581 case 19:
3582 emit_readword((int)&reg_cop2d[copr],tl);
3583 emit_andimm(tl,0xffff,tl);
3584 emit_writeword(tl,(int)&reg_cop2d[copr]);
3585 break;
3586 case 15:
3587 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3588 emit_writeword(tl,(int)&reg_cop2d[copr]);
3589 break;
3590 case 28:
b9b61529 3591 case 29:
3592 emit_readword((int)&reg_cop2d[9],temp);
3593 emit_testimm(temp,0x8000); // do we need this?
3594 emit_andimm(temp,0xf80,temp);
3595 emit_andne_imm(temp,0,temp);
f70d384d 3596 emit_shrimm(temp,7,tl);
b9b61529 3597 emit_readword((int)&reg_cop2d[10],temp);
3598 emit_testimm(temp,0x8000);
3599 emit_andimm(temp,0xf80,temp);
3600 emit_andne_imm(temp,0,temp);
f70d384d 3601 emit_orrshr_imm(temp,2,tl);
b9b61529 3602 emit_readword((int)&reg_cop2d[11],temp);
3603 emit_testimm(temp,0x8000);
3604 emit_andimm(temp,0xf80,temp);
3605 emit_andne_imm(temp,0,temp);
f70d384d 3606 emit_orrshl_imm(temp,3,tl);
b9b61529 3607 emit_writeword(tl,(int)&reg_cop2d[copr]);
3608 break;
3609 default:
3610 emit_readword((int)&reg_cop2d[copr],tl);
3611 break;
3612 }
3613}
3614
3615static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3616{
3617 switch (copr) {
3618 case 15:
3619 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3620 emit_writeword(sl,(int)&reg_cop2d[copr]);
3621 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3622 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3623 emit_writeword(sl,(int)&reg_cop2d[14]);
3624 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3625 break;
3626 case 28:
3627 emit_andimm(sl,0x001f,temp);
f70d384d 3628 emit_shlimm(temp,7,temp);
b9b61529 3629 emit_writeword(temp,(int)&reg_cop2d[9]);
3630 emit_andimm(sl,0x03e0,temp);
f70d384d 3631 emit_shlimm(temp,2,temp);
b9b61529 3632 emit_writeword(temp,(int)&reg_cop2d[10]);
3633 emit_andimm(sl,0x7c00,temp);
f70d384d 3634 emit_shrimm(temp,3,temp);
b9b61529 3635 emit_writeword(temp,(int)&reg_cop2d[11]);
3636 emit_writeword(sl,(int)&reg_cop2d[28]);
3637 break;
3638 case 30:
3639 emit_movs(sl,temp);
3640 emit_mvnmi(temp,temp);
665f33e1 3641#ifdef HAVE_ARMV5
b9b61529 3642 emit_clz(temp,temp);
665f33e1 3643#else
3644 emit_movs(temp,HOST_TEMPREG);
3645 emit_movimm(0,temp);
3646 emit_jeq((int)out+4*4);
3647 emit_addpl_imm(temp,1,temp);
3648 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3649 emit_jns((int)out-2*4);
3650#endif
b9b61529 3651 emit_writeword(sl,(int)&reg_cop2d[30]);
3652 emit_writeword(temp,(int)&reg_cop2d[31]);
3653 break;
b9b61529 3654 case 31:
3655 break;
3656 default:
3657 emit_writeword(sl,(int)&reg_cop2d[copr]);
3658 break;
3659 }
3660}
3661
e2b5e7aa 3662static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3663{
3664 u_int copr=(source[i]>>11)&0x1f;
3665 signed char temp=get_reg(i_regs->regmap,-1);
3666 if (opcode2[i]==0) { // MFC2
3667 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3668 if(tl>=0&&rt1[i]!=0)
b9b61529 3669 cop2_get_dreg(copr,tl,temp);
3670 }
3671 else if (opcode2[i]==4) { // MTC2
3672 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3673 cop2_put_dreg(copr,sl,temp);
3674 }
3675 else if (opcode2[i]==2) // CFC2
3676 {
3677 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3678 if(tl>=0&&rt1[i]!=0)
b9b61529 3679 emit_readword((int)&reg_cop2c[copr],tl);
3680 }
3681 else if (opcode2[i]==6) // CTC2
3682 {
3683 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3684 switch(copr) {
3685 case 4:
3686 case 12:
3687 case 20:
3688 case 26:
3689 case 27:
3690 case 29:
3691 case 30:
3692 emit_signextend16(sl,temp);
3693 break;
3694 case 31:
3695 //value = value & 0x7ffff000;
3696 //if (value & 0x7f87e000) value |= 0x80000000;
3697 emit_shrimm(sl,12,temp);
3698 emit_shlimm(temp,12,temp);
3699 emit_testimm(temp,0x7f000000);
3700 emit_testeqimm(temp,0x00870000);
3701 emit_testeqimm(temp,0x0000e000);
3702 emit_orrne_imm(temp,0x80000000,temp);
3703 break;
3704 default:
3705 temp=sl;
3706 break;
3707 }
3708 emit_writeword(temp,(int)&reg_cop2c[copr]);
3709 assert(sl>=0);
3710 }
3711}
3712
054175e9 3713static void c2op_prologue(u_int op,u_int reglist)
3714{
3715 save_regs_all(reglist);
82ed88eb 3716#ifdef PCNT
3717 emit_movimm(op,0);
3718 emit_call((int)pcnt_gte_start);
3719#endif
054175e9 3720 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3721}
3722
3723static void c2op_epilogue(u_int op,u_int reglist)
3724{
82ed88eb 3725#ifdef PCNT
3726 emit_movimm(op,0);
3727 emit_call((int)pcnt_gte_end);
3728#endif
054175e9 3729 restore_regs_all(reglist);
3730}
3731
6c0eefaf 3732static void c2op_call_MACtoIR(int lm,int need_flags)
3733{
3734 if(need_flags)
3735 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3736 else
3737 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3738}
3739
3740static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3741{
3742 emit_call((int)func);
3743 // func is C code and trashes r0
3744 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3745 if(need_flags||need_ir)
3746 c2op_call_MACtoIR(lm,need_flags);
3747 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3748}
3749
054175e9 3750static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3751{
b9b61529 3752 u_int c2op=source[i]&0x3f;
6c0eefaf 3753 u_int hr,reglist_full=0,reglist;
054175e9 3754 int need_flags,need_ir;
b9b61529 3755 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3756 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3757 }
4d646738 3758 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3759
3760 if (gte_handlers[c2op]!=NULL) {
bedfea38 3761 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3762 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3763 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3764 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3765 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3766 need_flags=0;
6c0eefaf 3767 int shift = (source[i] >> 19) & 1;
3768 int lm = (source[i] >> 10) & 1;
054175e9 3769 switch(c2op) {
19776aef 3770#ifndef DRC_DBG
054175e9 3771 case GTE_MVMVA: {
82336ba3 3772#ifdef HAVE_ARMV5
054175e9 3773 int v = (source[i] >> 15) & 3;
3774 int cv = (source[i] >> 13) & 3;
3775 int mx = (source[i] >> 17) & 3;
4d646738 3776 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3777 c2op_prologue(c2op,reglist);
3778 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3779 if(v<3)
3780 emit_ldrd(v*8,0,4);
3781 else {
3782 emit_movzwl_indexed(9*4,0,4); // gteIR
3783 emit_movzwl_indexed(10*4,0,6);
3784 emit_movzwl_indexed(11*4,0,5);
3785 emit_orrshl_imm(6,16,4);
3786 }
3787 if(mx<3)
3788 emit_addimm(0,32*4+mx*8*4,6);
3789 else
3790 emit_readword((int)&zeromem_ptr,6);
3791 if(cv<3)
3792 emit_addimm(0,32*4+(cv*8+5)*4,7);
3793 else
3794 emit_readword((int)&zeromem_ptr,7);
3795#ifdef __ARM_NEON__
3796 emit_movimm(source[i],1); // opcode
3797 emit_call((int)gteMVMVA_part_neon);
3798 if(need_flags) {
3799 emit_movimm(lm,1);
3800 emit_call((int)gteMACtoIR_flags_neon);
3801 }
3802#else
3803 if(cv==3&&shift)
3804 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3805 else {
3806 emit_movimm(shift,1);
3807 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3808 }
6c0eefaf 3809 if(need_flags||need_ir)
3810 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3811#endif
3812#else /* if not HAVE_ARMV5 */
3813 c2op_prologue(c2op,reglist);
3814 emit_movimm(source[i],1); // opcode
3815 emit_writeword(1,(int)&psxRegs.code);
3816 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3817#endif
3818 break;
3819 }
6c0eefaf 3820 case GTE_OP:
3821 c2op_prologue(c2op,reglist);
3822 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3823 if(need_flags||need_ir) {
3824 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3825 c2op_call_MACtoIR(lm,need_flags);
3826 }
3827 break;
3828 case GTE_DPCS:
3829 c2op_prologue(c2op,reglist);
3830 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3831 break;
3832 case GTE_INTPL:
3833 c2op_prologue(c2op,reglist);
3834 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3835 break;
3836 case GTE_SQR:
3837 c2op_prologue(c2op,reglist);
3838 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3839 if(need_flags||need_ir) {
3840 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3841 c2op_call_MACtoIR(lm,need_flags);
3842 }
3843 break;
3844 case GTE_DCPL:
3845 c2op_prologue(c2op,reglist);
3846 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3847 break;
3848 case GTE_GPF:
3849 c2op_prologue(c2op,reglist);
3850 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3851 break;
3852 case GTE_GPL:
3853 c2op_prologue(c2op,reglist);
3854 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3855 break;
19776aef 3856#endif
054175e9 3857 default:
054175e9 3858 c2op_prologue(c2op,reglist);
19776aef 3859#ifdef DRC_DBG
3860 emit_movimm(source[i],1); // opcode
3861 emit_writeword(1,(int)&psxRegs.code);
3862#endif
054175e9 3863 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3864 break;
3865 }
3866 c2op_epilogue(c2op,reglist);
3867 }
b9b61529 3868}
3869
e2b5e7aa 3870static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3871{
3872 // XXX: should just just do the exception instead
3873 if(!cop1_usable) {
3874 int jaddr=(int)out;
3875 emit_jmp(0);
3876 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3877 cop1_usable=1;
3878 }
3879}
3880
e2b5e7aa 3881static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3882{
3d624f89 3883 cop1_unusable(i, i_regs);
57871462 3884}
3885
e2b5e7aa 3886static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3887{
3d624f89 3888 cop1_unusable(i, i_regs);
57871462 3889}
3890#define fconv_assemble fconv_assemble_arm
3891
e2b5e7aa 3892static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3893{
3d624f89 3894 cop1_unusable(i, i_regs);
57871462 3895}
3896
e2b5e7aa 3897static void float_assemble(int i,struct regstat *i_regs)
57871462 3898{
3d624f89 3899 cop1_unusable(i, i_regs);
57871462 3900}
3901
e2b5e7aa 3902static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3903{
3904 // case 0x18: MULT
3905 // case 0x19: MULTU
3906 // case 0x1A: DIV
3907 // case 0x1B: DIVU
3908 // case 0x1C: DMULT
3909 // case 0x1D: DMULTU
3910 // case 0x1E: DDIV
3911 // case 0x1F: DDIVU
3912 if(rs1[i]&&rs2[i])
3913 {
3914 if((opcode2[i]&4)==0) // 32-bit
3915 {
3916 if(opcode2[i]==0x18) // MULT
3917 {
3918 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3919 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3920 signed char hi=get_reg(i_regs->regmap,HIREG);
3921 signed char lo=get_reg(i_regs->regmap,LOREG);
3922 assert(m1>=0);
3923 assert(m2>=0);
3924 assert(hi>=0);
3925 assert(lo>=0);
3926 emit_smull(m1,m2,hi,lo);
3927 }
3928 if(opcode2[i]==0x19) // MULTU
3929 {
3930 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3931 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3932 signed char hi=get_reg(i_regs->regmap,HIREG);
3933 signed char lo=get_reg(i_regs->regmap,LOREG);
3934 assert(m1>=0);
3935 assert(m2>=0);
3936 assert(hi>=0);
3937 assert(lo>=0);
3938 emit_umull(m1,m2,hi,lo);
3939 }
3940 if(opcode2[i]==0x1A) // DIV
3941 {
3942 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3943 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3944 assert(d1>=0);
3945 assert(d2>=0);
3946 signed char quotient=get_reg(i_regs->regmap,LOREG);
3947 signed char remainder=get_reg(i_regs->regmap,HIREG);
3948 assert(quotient>=0);
3949 assert(remainder>=0);
3950 emit_movs(d1,remainder);
44a80f6a 3951 emit_movimm(0xffffffff,quotient);
3952 emit_negmi(quotient,quotient); // .. quotient and ..
3953 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3954 emit_movs(d2,HOST_TEMPREG);
3955 emit_jeq((int)out+52); // Division by zero
82336ba3 3956 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3957#ifdef HAVE_ARMV5
57871462 3958 emit_clz(HOST_TEMPREG,quotient);
3959 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3960#else
3961 emit_movimm(0,quotient);
3962 emit_addpl_imm(quotient,1,quotient);
3963 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3964 emit_jns((int)out-2*4);
3965#endif
57871462 3966 emit_orimm(quotient,1<<31,quotient);
3967 emit_shr(quotient,quotient,quotient);
3968 emit_cmp(remainder,HOST_TEMPREG);
3969 emit_subcs(remainder,HOST_TEMPREG,remainder);
3970 emit_adcs(quotient,quotient,quotient);
3971 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3972 emit_jcc((int)out-16); // -4
3973 emit_teq(d1,d2);
3974 emit_negmi(quotient,quotient);
3975 emit_test(d1,d1);
3976 emit_negmi(remainder,remainder);
3977 }
3978 if(opcode2[i]==0x1B) // DIVU
3979 {
3980 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3981 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3982 assert(d1>=0);
3983 assert(d2>=0);
3984 signed char quotient=get_reg(i_regs->regmap,LOREG);
3985 signed char remainder=get_reg(i_regs->regmap,HIREG);
3986 assert(quotient>=0);
3987 assert(remainder>=0);
44a80f6a 3988 emit_mov(d1,remainder);
3989 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3990 emit_test(d2,d2);
44a80f6a 3991 emit_jeq((int)out+40); // Division by zero
665f33e1 3992#ifdef HAVE_ARMV5
57871462 3993 emit_clz(d2,HOST_TEMPREG);
3994 emit_movimm(1<<31,quotient);
3995 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3996#else
3997 emit_movimm(0,HOST_TEMPREG);
82336ba3 3998 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3999 emit_lslpls_imm(d2,1,d2);
665f33e1 4000 emit_jns((int)out-2*4);
4001 emit_movimm(1<<31,quotient);
4002#endif
57871462 4003 emit_shr(quotient,HOST_TEMPREG,quotient);
4004 emit_cmp(remainder,d2);
4005 emit_subcs(remainder,d2,remainder);
4006 emit_adcs(quotient,quotient,quotient);
4007 emit_shrcc_imm(d2,1,d2);
4008 emit_jcc((int)out-16); // -4
4009 }
4010 }
4011 else // 64-bit
71e490c5 4012 assert(0);
57871462 4013 }
4014 else
4015 {
4016 // Multiply by zero is zero.
4017 // MIPS does not have a divide by zero exception.
4018 // The result is undefined, we return zero.
4019 signed char hr=get_reg(i_regs->regmap,HIREG);
4020 signed char lr=get_reg(i_regs->regmap,LOREG);
4021 if(hr>=0) emit_zeroreg(hr);
4022 if(lr>=0) emit_zeroreg(lr);
4023 }
4024}
4025#define multdiv_assemble multdiv_assemble_arm
4026
e2b5e7aa 4027static void do_preload_rhash(int r) {
57871462 4028 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4029 // register. On ARM the hash can be done with a single instruction (below)
4030}
4031
e2b5e7aa 4032static void do_preload_rhtbl(int ht) {
57871462 4033 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4034}
4035
e2b5e7aa 4036static void do_rhash(int rs,int rh) {
57871462 4037 emit_andimm(rs,0xf8,rh);
4038}
4039
e2b5e7aa 4040static void do_miniht_load(int ht,int rh) {
57871462 4041 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4042 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4043}
4044
e2b5e7aa 4045static void do_miniht_jump(int rs,int rh,int ht) {
57871462 4046 emit_cmp(rh,rs);
4047 emit_ldreq_indexed(ht,4,15);
4048 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4049 emit_mov(rs,7);
4050 emit_jmp(jump_vaddr_reg[7]);
4051 #else
4052 emit_jmp(jump_vaddr_reg[rs]);
4053 #endif
4054}
4055
e2b5e7aa 4056static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 4057 #ifndef HAVE_ARMV7
57871462 4058 emit_movimm(return_address,rt); // PC into link register
4059 add_to_linker((int)out,return_address,1);
4060 emit_pcreladdr(temp);
4061 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4062 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4063 #else
4064 emit_movw(return_address&0x0000FFFF,rt);
4065 add_to_linker((int)out,return_address,1);
4066 emit_pcreladdr(temp);
4067 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4068 emit_movt(return_address&0xFFFF0000,rt);
4069 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4070 #endif
4071}
4072
e2b5e7aa 4073static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4074{
4075 //if(dirty_pre==dirty) return;
581335b0 4076 int hr,reg;
57871462 4077 for(hr=0;hr<HOST_REGS;hr++) {
4078 if(hr!=EXCLUDE_REG) {
4079 reg=pre[hr];
4080 if(((~u)>>(reg&63))&1) {
f776eb14 4081 if(reg>0) {
57871462 4082 if(((dirty_pre&~dirty)>>hr)&1) {
4083 if(reg>0&&reg<34) {
4084 emit_storereg(reg,hr);
4085 if( ((is32_pre&~uu)>>reg)&1 ) {
4086 emit_sarimm(hr,31,HOST_TEMPREG);
4087 emit_storereg(reg|64,HOST_TEMPREG);
4088 }
4089 }
4090 else if(reg>=64) {
4091 emit_storereg(reg,hr);
4092 }
4093 }
4094 }
57871462 4095 }
4096 }
4097 }
4098}
4099
4100
4101/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4102static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4103{
4104 int hr;
4105 int wrote=-1;
4106 for(hr=HOST_REGS-1;hr>=0;hr--) {
4107 if(hr!=EXCLUDE_REG) {
4108 if(pre[hr]!=entry[hr]) {
4109 if(pre[hr]>=0) {
4110 if((dirty>>hr)&1) {
4111 if(get_reg(entry,pre[hr])<0) {
4112 if(pre[hr]<64) {
4113 if(!((u>>pre[hr])&1)) {
4114 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4115 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4116 emit_sarimm(hr,31,hr+1);
4117 emit_strdreg(pre[hr],hr);
4118 }
4119 else
4120 emit_storereg(pre[hr],hr);
4121 }else{
4122 emit_storereg(pre[hr],hr);
4123 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4124 emit_sarimm(hr,31,hr);
4125 emit_storereg(pre[hr]|64,hr);
4126 }
4127 }
4128 }
4129 }else{
4130 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4131 emit_storereg(pre[hr],hr);
4132 }
4133 }
4134 wrote=hr;
4135 }
4136 }
4137 }
4138 }
4139 }
4140 }
4141 for(hr=0;hr<HOST_REGS;hr++) {
4142 if(hr!=EXCLUDE_REG) {
4143 if(pre[hr]!=entry[hr]) {
4144 if(pre[hr]>=0) {
4145 int nr;
4146 if((nr=get_reg(entry,pre[hr]))>=0) {
4147 emit_mov(hr,nr);
4148 }
4149 }
4150 }
4151 }
4152 }
4153}
4154#define wb_invalidate wb_invalidate_arm
4155*/
4156
d148d265 4157static void mark_clear_cache(void *target)
4158{
4159 u_long offset = (char *)target - (char *)BASE_ADDR;
4160 u_int mask = 1u << ((offset >> 12) & 31);
4161 if (!(needs_clear_cache[offset >> 17] & mask)) {
4162 char *start = (char *)((u_long)target & ~4095ul);
4163 start_tcache_write(start, start + 4096);
4164 needs_clear_cache[offset >> 17] |= mask;
4165 }
4166}
4167
dd3a91a1 4168// Clearing the cache is rather slow on ARM Linux, so mark the areas
4169// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4170static void do_clear_cache()
dd3a91a1 4171{
4172 int i,j;
4173 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4174 {
4175 u_int bitmap=needs_clear_cache[i];
4176 if(bitmap) {
4177 u_int start,end;
9f51b4b9 4178 for(j=0;j<32;j++)
dd3a91a1 4179 {
4180 if(bitmap&(1<<j)) {
bdeade46 4181 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4182 end=start+4095;
4183 j++;
4184 while(j<32) {
4185 if(bitmap&(1<<j)) {
4186 end+=4096;
4187 j++;
4188 }else{
d148d265 4189 end_tcache_write((void *)start,(void *)end);
dd3a91a1 4190 break;
4191 }
4192 }
4193 }
4194 }
4195 needs_clear_cache[i]=0;
4196 }
4197 }
4198}
4199
57871462 4200// CPU-architecture-specific initialization
71e490c5 4201static void arch_init() {
57871462 4202}
b9b61529 4203
4204// vim:shiftwidth=2:expandtab