drc: rework for 64bit, part 1
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
bdeade46 35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
57871462 58extern void *dynarec_local;
57871462 59extern u_int mini_ht[32][2];
57871462 60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77const u_int jump_vaddr_reg[16] = {
78 (int)jump_vaddr_r0,
79 (int)jump_vaddr_r1,
80 (int)jump_vaddr_r2,
81 (int)jump_vaddr_r3,
82 (int)jump_vaddr_r4,
83 (int)jump_vaddr_r5,
84 (int)jump_vaddr_r6,
85 (int)jump_vaddr_r7,
86 (int)jump_vaddr_r8,
87 (int)jump_vaddr_r9,
88 (int)jump_vaddr_r10,
89 0,
90 (int)jump_vaddr_r12,
91 0,
92 0,
93 0};
94
0bbd1454 95void invalidate_addr_r0();
96void invalidate_addr_r1();
97void invalidate_addr_r2();
98void invalidate_addr_r3();
99void invalidate_addr_r4();
100void invalidate_addr_r5();
101void invalidate_addr_r6();
102void invalidate_addr_r7();
103void invalidate_addr_r8();
104void invalidate_addr_r9();
105void invalidate_addr_r10();
106void invalidate_addr_r12();
107
108const u_int invalidate_addr_reg[16] = {
109 (int)invalidate_addr_r0,
110 (int)invalidate_addr_r1,
111 (int)invalidate_addr_r2,
112 (int)invalidate_addr_r3,
113 (int)invalidate_addr_r4,
114 (int)invalidate_addr_r5,
115 (int)invalidate_addr_r6,
116 (int)invalidate_addr_r7,
117 (int)invalidate_addr_r8,
118 (int)invalidate_addr_r9,
119 (int)invalidate_addr_r10,
120 0,
121 (int)invalidate_addr_r12,
122 0,
123 0,
124 0};
125
d148d265 126static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 127
57871462 128/* Linker */
129
df4dc2b1 130static void set_jump_target(void *addr, void *target_)
57871462 131{
df4dc2b1 132 u_int target = (u_int)target_;
133 u_char *ptr = addr;
57871462 134 u_int *ptr2=(u_int *)ptr;
135 if(ptr[3]==0xe2) {
136 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 137 assert(((uintptr_t)addr&3)==0);
57871462 138 assert((target&3)==0);
139 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 140 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 141 }
142 else if(ptr[3]==0x72) {
143 // generated by emit_jno_unlikely
144 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 145 assert(((uintptr_t)addr&3)==0);
57871462 146 assert((target&3)==0);
147 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
148 }
149 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 150 assert(((uintptr_t)addr&3)==0);
57871462 151 assert((target&3)==0);
152 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
153 }
154 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
155 }
156 else {
157 assert((ptr[3]&0x0e)==0xa);
158 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
159 }
160}
161
162// This optionally copies the instruction from the target of the branch into
163// the space before the branch. Works, but the difference in speed is
164// usually insignificant.
e2b5e7aa 165#if 0
166static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 167{
168 u_char *ptr=(u_char *)addr;
169 u_int *ptr2=(u_int *)ptr;
170 assert(!copy||ptr2[-1]==0xe28dd000);
171 if(ptr[3]==0xe2) {
172 assert(!copy);
173 assert((target-(u_int)ptr2-8)<4096);
174 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
175 }
176 else {
177 assert((ptr[3]&0x0e)==0xa);
178 u_int target_insn=*(u_int *)target;
179 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
180 copy=0;
181 }
182 if((target_insn&0x0c100000)==0x04100000) { // Load
183 copy=0;
184 }
185 if(target_insn&0x08000000) {
186 copy=0;
187 }
188 if(copy) {
189 ptr2[-1]=target_insn;
190 target+=4;
191 }
192 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
193 }
194}
e2b5e7aa 195#endif
57871462 196
197/* Literal pool */
e2b5e7aa 198static void add_literal(int addr,int val)
57871462 199{
15776b68 200 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 201 literals[literalcount][0]=addr;
202 literals[literalcount][1]=val;
9f51b4b9 203 literalcount++;
204}
57871462 205
d148d265 206// from a pointer to external jump stub (which was produced by emit_extjump2)
207// find where the jumping insn is
208static void *find_extjump_insn(void *stub)
57871462 209{
210 int *ptr=(int *)(stub+4);
d148d265 211 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 212 u_int offset=*ptr&0xfff;
d148d265 213 void **l_ptr=(void *)ptr+offset+8;
214 return *l_ptr;
57871462 215}
216
f968d35d 217// find where external branch is liked to using addr of it's stub:
218// get address that insn one after stub loads (dyna_linker arg1),
219// treat it as a pointer to branch insn,
220// return addr where that branch jumps to
e2b5e7aa 221static int get_pointer(void *stub)
57871462 222{
223 //printf("get_pointer(%x)\n",(int)stub);
d148d265 224 int *i_ptr=find_extjump_insn(stub);
57871462 225 assert((*i_ptr&0x0f000000)==0x0a000000);
226 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
227}
228
229// Find the "clean" entry point from a "dirty" entry point
230// by skipping past the call to verify_code
df4dc2b1 231static void *get_clean_addr(void *addr)
57871462 232{
df4dc2b1 233 signed int *ptr = addr;
665f33e1 234 #ifndef HAVE_ARMV7
57871462 235 ptr+=4;
236 #else
237 ptr+=6;
238 #endif
239 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
240 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
241 ptr++;
242 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 243 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 244 }
df4dc2b1 245 return ptr;
57871462 246}
247
e2b5e7aa 248static int verify_dirty(u_int *ptr)
57871462 249{
665f33e1 250 #ifndef HAVE_ARMV7
16c8be17 251 u_int offset;
57871462 252 // get from literal pool
15776b68 253 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 254 offset=*ptr&0xfff;
255 u_int source=*(u_int*)((void *)ptr+offset+8);
256 ptr++;
257 assert((*ptr&0xFFFF0000)==0xe59f0000);
258 offset=*ptr&0xfff;
259 u_int copy=*(u_int*)((void *)ptr+offset+8);
260 ptr++;
261 assert((*ptr&0xFFFF0000)==0xe59f0000);
262 offset=*ptr&0xfff;
263 u_int len=*(u_int*)((void *)ptr+offset+8);
264 ptr++;
265 ptr++;
57871462 266 #else
267 // ARMv7 movw/movt
268 assert((*ptr&0xFFF00000)==0xe3000000);
269 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
270 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
271 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
272 ptr+=6;
273 #endif
274 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
275 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 276 //printf("verify_dirty: %x %x %x\n",source,copy,len);
277 return !memcmp((void *)source,(void *)copy,len);
278}
279
280// This doesn't necessarily find all clean entry points, just
281// guarantees that it's not dirty
df4dc2b1 282static int isclean(void *addr)
57871462 283{
665f33e1 284 #ifndef HAVE_ARMV7
581335b0 285 u_int *ptr=((u_int *)addr)+4;
57871462 286 #else
581335b0 287 u_int *ptr=((u_int *)addr)+6;
57871462 288 #endif
289 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
290 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
294 return 1;
295}
296
4a35de07 297// get source that block at addr was compiled from (host pointers)
e2b5e7aa 298static void get_bounds(int addr,u_int *start,u_int *end)
57871462 299{
300 u_int *ptr=(u_int *)addr;
665f33e1 301 #ifndef HAVE_ARMV7
16c8be17 302 u_int offset;
57871462 303 // get from literal pool
15776b68 304 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 305 offset=*ptr&0xfff;
306 u_int source=*(u_int*)((void *)ptr+offset+8);
307 ptr++;
308 //assert((*ptr&0xFFFF0000)==0xe59f0000);
309 //offset=*ptr&0xfff;
310 //u_int copy=*(u_int*)((void *)ptr+offset+8);
311 ptr++;
312 assert((*ptr&0xFFFF0000)==0xe59f0000);
313 offset=*ptr&0xfff;
314 u_int len=*(u_int*)((void *)ptr+offset+8);
315 ptr++;
316 ptr++;
57871462 317 #else
318 // ARMv7 movw/movt
319 assert((*ptr&0xFFF00000)==0xe3000000);
320 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
321 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
322 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
323 ptr+=6;
324 #endif
325 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
326 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 327 *start=source;
328 *end=source+len;
329}
330
331/* Register allocation */
332
333// Note: registers are allocated clean (unmodified state)
334// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 335static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 336{
337 int r,hr;
338 int preferred_reg = (reg&7);
339 if(reg==CCREG) preferred_reg=HOST_CCREG;
340 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 341
57871462 342 // Don't allocate unused registers
343 if((cur->u>>reg)&1) return;
9f51b4b9 344
57871462 345 // see if it's already allocated
346 for(hr=0;hr<HOST_REGS;hr++)
347 {
348 if(cur->regmap[hr]==reg) return;
349 }
9f51b4b9 350
57871462 351 // Keep the same mapping if the register was already allocated in a loop
352 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 353
57871462 354 // Try to allocate the preferred register
355 if(cur->regmap[preferred_reg]==-1) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 r=cur->regmap[preferred_reg];
362 if(r<64&&((cur->u>>r)&1)) {
363 cur->regmap[preferred_reg]=reg;
364 cur->dirty&=~(1<<preferred_reg);
365 cur->isconst&=~(1<<preferred_reg);
366 return;
367 }
368 if(r>=64&&((cur->uu>>(r&63))&1)) {
369 cur->regmap[preferred_reg]=reg;
370 cur->dirty&=~(1<<preferred_reg);
371 cur->isconst&=~(1<<preferred_reg);
372 return;
373 }
9f51b4b9 374
57871462 375 // Clear any unneeded registers
376 // We try to keep the mapping consistent, if possible, because it
377 // makes branches easier (especially loops). So we try to allocate
378 // first (see above) before removing old mappings. If this is not
379 // possible then go ahead and clear out the registers that are no
380 // longer needed.
381 for(hr=0;hr<HOST_REGS;hr++)
382 {
383 r=cur->regmap[hr];
384 if(r>=0) {
385 if(r<64) {
386 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
387 }
388 else
389 {
390 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
391 }
392 }
393 }
394 // Try to allocate any available register, but prefer
395 // registers that have not been used recently.
396 if(i>0) {
397 for(hr=0;hr<HOST_REGS;hr++) {
398 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
399 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
400 cur->regmap[hr]=reg;
401 cur->dirty&=~(1<<hr);
402 cur->isconst&=~(1<<hr);
403 return;
404 }
405 }
406 }
407 }
408 // Try to allocate any available register
409 for(hr=0;hr<HOST_REGS;hr++) {
410 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
411 cur->regmap[hr]=reg;
412 cur->dirty&=~(1<<hr);
413 cur->isconst&=~(1<<hr);
414 return;
415 }
416 }
9f51b4b9 417
57871462 418 // Ok, now we have to evict someone
419 // Pick a register we hopefully won't need soon
420 u_char hsn[MAXREG+1];
421 memset(hsn,10,sizeof(hsn));
422 int j;
423 lsn(hsn,i,&preferred_reg);
424 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
425 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
426 if(i>0) {
427 // Don't evict the cycle count at entry points, otherwise the entry
428 // stub will have to write it.
429 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
430 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
431 for(j=10;j>=3;j--)
432 {
433 // Alloc preferred register if available
434 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
435 for(hr=0;hr<HOST_REGS;hr++) {
436 // Evict both parts of a 64-bit register
437 if((cur->regmap[hr]&63)==r) {
438 cur->regmap[hr]=-1;
439 cur->dirty&=~(1<<hr);
440 cur->isconst&=~(1<<hr);
441 }
442 }
443 cur->regmap[preferred_reg]=reg;
444 return;
445 }
446 for(r=1;r<=MAXREG;r++)
447 {
448 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
449 for(hr=0;hr<HOST_REGS;hr++) {
450 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
451 if(cur->regmap[hr]==r+64) {
452 cur->regmap[hr]=reg;
453 cur->dirty&=~(1<<hr);
454 cur->isconst&=~(1<<hr);
455 return;
456 }
457 }
458 }
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
461 if(cur->regmap[hr]==r) {
462 cur->regmap[hr]=reg;
463 cur->dirty&=~(1<<hr);
464 cur->isconst&=~(1<<hr);
465 return;
466 }
467 }
468 }
469 }
470 }
471 }
472 }
473 for(j=10;j>=0;j--)
474 {
475 for(r=1;r<=MAXREG;r++)
476 {
477 if(hsn[r]==j) {
478 for(hr=0;hr<HOST_REGS;hr++) {
479 if(cur->regmap[hr]==r+64) {
480 cur->regmap[hr]=reg;
481 cur->dirty&=~(1<<hr);
482 cur->isconst&=~(1<<hr);
483 return;
484 }
485 }
486 for(hr=0;hr<HOST_REGS;hr++) {
487 if(cur->regmap[hr]==r) {
488 cur->regmap[hr]=reg;
489 cur->dirty&=~(1<<hr);
490 cur->isconst&=~(1<<hr);
491 return;
492 }
493 }
494 }
495 }
496 }
c43b5311 497 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 498}
499
e2b5e7aa 500static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 501{
502 int preferred_reg = 8+(reg&1);
503 int r,hr;
9f51b4b9 504
57871462 505 // allocate the lower 32 bits
506 alloc_reg(cur,i,reg);
9f51b4b9 507
57871462 508 // Don't allocate unused registers
509 if((cur->uu>>reg)&1) return;
9f51b4b9 510
57871462 511 // see if the upper half is already allocated
512 for(hr=0;hr<HOST_REGS;hr++)
513 {
514 if(cur->regmap[hr]==reg+64) return;
515 }
9f51b4b9 516
57871462 517 // Keep the same mapping if the register was already allocated in a loop
518 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 519
57871462 520 // Try to allocate the preferred register
521 if(cur->regmap[preferred_reg]==-1) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 r=cur->regmap[preferred_reg];
528 if(r<64&&((cur->u>>r)&1)) {
529 cur->regmap[preferred_reg]=reg|64;
530 cur->dirty&=~(1<<preferred_reg);
531 cur->isconst&=~(1<<preferred_reg);
532 return;
533 }
534 if(r>=64&&((cur->uu>>(r&63))&1)) {
535 cur->regmap[preferred_reg]=reg|64;
536 cur->dirty&=~(1<<preferred_reg);
537 cur->isconst&=~(1<<preferred_reg);
538 return;
539 }
9f51b4b9 540
57871462 541 // Clear any unneeded registers
542 // We try to keep the mapping consistent, if possible, because it
543 // makes branches easier (especially loops). So we try to allocate
544 // first (see above) before removing old mappings. If this is not
545 // possible then go ahead and clear out the registers that are no
546 // longer needed.
547 for(hr=HOST_REGS-1;hr>=0;hr--)
548 {
549 r=cur->regmap[hr];
550 if(r>=0) {
551 if(r<64) {
552 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
553 }
554 else
555 {
556 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
557 }
558 }
559 }
560 // Try to allocate any available register, but prefer
561 // registers that have not been used recently.
562 if(i>0) {
563 for(hr=0;hr<HOST_REGS;hr++) {
564 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
565 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
566 cur->regmap[hr]=reg|64;
567 cur->dirty&=~(1<<hr);
568 cur->isconst&=~(1<<hr);
569 return;
570 }
571 }
572 }
573 }
574 // Try to allocate any available register
575 for(hr=0;hr<HOST_REGS;hr++) {
576 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
577 cur->regmap[hr]=reg|64;
578 cur->dirty&=~(1<<hr);
579 cur->isconst&=~(1<<hr);
580 return;
581 }
582 }
9f51b4b9 583
57871462 584 // Ok, now we have to evict someone
585 // Pick a register we hopefully won't need soon
586 u_char hsn[MAXREG+1];
587 memset(hsn,10,sizeof(hsn));
588 int j;
589 lsn(hsn,i,&preferred_reg);
590 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
591 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
592 if(i>0) {
593 // Don't evict the cycle count at entry points, otherwise the entry
594 // stub will have to write it.
595 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
596 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
597 for(j=10;j>=3;j--)
598 {
599 // Alloc preferred register if available
600 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
601 for(hr=0;hr<HOST_REGS;hr++) {
602 // Evict both parts of a 64-bit register
603 if((cur->regmap[hr]&63)==r) {
604 cur->regmap[hr]=-1;
605 cur->dirty&=~(1<<hr);
606 cur->isconst&=~(1<<hr);
607 }
608 }
609 cur->regmap[preferred_reg]=reg|64;
610 return;
611 }
612 for(r=1;r<=MAXREG;r++)
613 {
614 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
615 for(hr=0;hr<HOST_REGS;hr++) {
616 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
617 if(cur->regmap[hr]==r+64) {
618 cur->regmap[hr]=reg|64;
619 cur->dirty&=~(1<<hr);
620 cur->isconst&=~(1<<hr);
621 return;
622 }
623 }
624 }
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
627 if(cur->regmap[hr]==r) {
628 cur->regmap[hr]=reg|64;
629 cur->dirty&=~(1<<hr);
630 cur->isconst&=~(1<<hr);
631 return;
632 }
633 }
634 }
635 }
636 }
637 }
638 }
639 for(j=10;j>=0;j--)
640 {
641 for(r=1;r<=MAXREG;r++)
642 {
643 if(hsn[r]==j) {
644 for(hr=0;hr<HOST_REGS;hr++) {
645 if(cur->regmap[hr]==r+64) {
646 cur->regmap[hr]=reg|64;
647 cur->dirty&=~(1<<hr);
648 cur->isconst&=~(1<<hr);
649 return;
650 }
651 }
652 for(hr=0;hr<HOST_REGS;hr++) {
653 if(cur->regmap[hr]==r) {
654 cur->regmap[hr]=reg|64;
655 cur->dirty&=~(1<<hr);
656 cur->isconst&=~(1<<hr);
657 return;
658 }
659 }
660 }
661 }
662 }
c43b5311 663 SysPrintf("This shouldn't happen");exit(1);
57871462 664}
665
666// Allocate a temporary register. This is done without regard to
667// dirty status or whether the register we request is on the unneeded list
668// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 669static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 670{
671 int r,hr;
672 int preferred_reg = -1;
9f51b4b9 673
57871462 674 // see if it's already allocated
675 for(hr=0;hr<HOST_REGS;hr++)
676 {
677 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
678 }
9f51b4b9 679
57871462 680 // Try to allocate any available register
681 for(hr=HOST_REGS-1;hr>=0;hr--) {
682 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
683 cur->regmap[hr]=reg;
684 cur->dirty&=~(1<<hr);
685 cur->isconst&=~(1<<hr);
686 return;
687 }
688 }
9f51b4b9 689
57871462 690 // Find an unneeded register
691 for(hr=HOST_REGS-1;hr>=0;hr--)
692 {
693 r=cur->regmap[hr];
694 if(r>=0) {
695 if(r<64) {
696 if((cur->u>>r)&1) {
697 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
698 cur->regmap[hr]=reg;
699 cur->dirty&=~(1<<hr);
700 cur->isconst&=~(1<<hr);
701 return;
702 }
703 }
704 }
705 else
706 {
707 if((cur->uu>>(r&63))&1) {
708 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
709 cur->regmap[hr]=reg;
710 cur->dirty&=~(1<<hr);
711 cur->isconst&=~(1<<hr);
712 return;
713 }
714 }
715 }
716 }
717 }
9f51b4b9 718
57871462 719 // Ok, now we have to evict someone
720 // Pick a register we hopefully won't need soon
721 // TODO: we might want to follow unconditional jumps here
722 // TODO: get rid of dupe code and make this into a function
723 u_char hsn[MAXREG+1];
724 memset(hsn,10,sizeof(hsn));
725 int j;
726 lsn(hsn,i,&preferred_reg);
727 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
728 if(i>0) {
729 // Don't evict the cycle count at entry points, otherwise the entry
730 // stub will have to write it.
731 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
732 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
733 for(j=10;j>=3;j--)
734 {
735 for(r=1;r<=MAXREG;r++)
736 {
737 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
738 for(hr=0;hr<HOST_REGS;hr++) {
739 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
740 if(cur->regmap[hr]==r+64) {
741 cur->regmap[hr]=reg;
742 cur->dirty&=~(1<<hr);
743 cur->isconst&=~(1<<hr);
744 return;
745 }
746 }
747 }
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
750 if(cur->regmap[hr]==r) {
751 cur->regmap[hr]=reg;
752 cur->dirty&=~(1<<hr);
753 cur->isconst&=~(1<<hr);
754 return;
755 }
756 }
757 }
758 }
759 }
760 }
761 }
762 for(j=10;j>=0;j--)
763 {
764 for(r=1;r<=MAXREG;r++)
765 {
766 if(hsn[r]==j) {
767 for(hr=0;hr<HOST_REGS;hr++) {
768 if(cur->regmap[hr]==r+64) {
769 cur->regmap[hr]=reg;
770 cur->dirty&=~(1<<hr);
771 cur->isconst&=~(1<<hr);
772 return;
773 }
774 }
775 for(hr=0;hr<HOST_REGS;hr++) {
776 if(cur->regmap[hr]==r) {
777 cur->regmap[hr]=reg;
778 cur->dirty&=~(1<<hr);
779 cur->isconst&=~(1<<hr);
780 return;
781 }
782 }
783 }
784 }
785 }
c43b5311 786 SysPrintf("This shouldn't happen");exit(1);
57871462 787}
e2b5e7aa 788
57871462 789// Allocate a specific ARM register.
e2b5e7aa 790static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 791{
792 int n;
f776eb14 793 int dirty=0;
9f51b4b9 794
57871462 795 // see if it's already allocated (and dealloc it)
796 for(n=0;n<HOST_REGS;n++)
797 {
f776eb14 798 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
799 dirty=(cur->dirty>>n)&1;
800 cur->regmap[n]=-1;
801 }
57871462 802 }
9f51b4b9 803
57871462 804 cur->regmap[hr]=reg;
805 cur->dirty&=~(1<<hr);
f776eb14 806 cur->dirty|=dirty<<hr;
57871462 807 cur->isconst&=~(1<<hr);
808}
809
810// Alloc cycle count into dedicated register
e2b5e7aa 811static void alloc_cc(struct regstat *cur,int i)
57871462 812{
813 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
814}
815
816/* Special alloc */
817
818
819/* Assembler */
820
e2b5e7aa 821static unused char regname[16][4] = {
57871462 822 "r0",
823 "r1",
824 "r2",
825 "r3",
826 "r4",
827 "r5",
828 "r6",
829 "r7",
830 "r8",
831 "r9",
832 "r10",
833 "fp",
834 "r12",
835 "sp",
836 "lr",
837 "pc"};
838
e2b5e7aa 839static void output_w32(u_int word)
57871462 840{
841 *((u_int *)out)=word;
842 out+=4;
843}
e2b5e7aa 844
845static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 846{
847 assert(rd<16);
848 assert(rn<16);
849 assert(rm<16);
850 return((rn<<16)|(rd<<12)|rm);
851}
e2b5e7aa 852
853static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 854{
855 assert(rd<16);
856 assert(rn<16);
857 assert(imm<256);
858 assert((shift&1)==0);
859 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
860}
e2b5e7aa 861
862static u_int genimm(u_int imm,u_int *encoded)
57871462 863{
c2e3bd42 864 *encoded=0;
865 if(imm==0) return 1;
57871462 866 int i=32;
867 while(i>0)
868 {
869 if(imm<256) {
870 *encoded=((i&30)<<7)|imm;
871 return 1;
872 }
873 imm=(imm>>2)|(imm<<30);i-=2;
874 }
875 return 0;
876}
e2b5e7aa 877
878static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 879{
880 u_int ret=genimm(imm,encoded);
881 assert(ret);
581335b0 882 (void)ret;
cfbd3c6e 883}
e2b5e7aa 884
885static u_int genjmp(u_int addr)
57871462 886{
887 int offset=addr-(int)out-8;
e80343e2 888 if(offset<-33554432||offset>=33554432) {
889 if (addr>2) {
c43b5311 890 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 891 exit(1);
892 }
893 return 0;
894 }
57871462 895 return ((u_int)offset>>2)&0xffffff;
896}
897
e2b5e7aa 898static void emit_mov(int rs,int rt)
57871462 899{
900 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
901 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
902}
903
e2b5e7aa 904static void emit_movs(int rs,int rt)
57871462 905{
906 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
907 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
908}
909
e2b5e7aa 910static void emit_add(int rs1,int rs2,int rt)
57871462 911{
912 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
913 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
914}
915
e2b5e7aa 916static void emit_adds(int rs1,int rs2,int rt)
57871462 917{
918 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
919 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
920}
921
e2b5e7aa 922static void emit_adcs(int rs1,int rs2,int rt)
57871462 923{
924 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
926}
927
e2b5e7aa 928static void emit_sbc(int rs1,int rs2,int rt)
57871462 929{
930 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
932}
933
e2b5e7aa 934static void emit_sbcs(int rs1,int rs2,int rt)
57871462 935{
936 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
938}
939
e2b5e7aa 940static void emit_neg(int rs, int rt)
57871462 941{
942 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
943 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
944}
945
e2b5e7aa 946static void emit_negs(int rs, int rt)
57871462 947{
948 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
949 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
950}
951
e2b5e7aa 952static void emit_sub(int rs1,int rs2,int rt)
57871462 953{
954 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
955 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
956}
957
e2b5e7aa 958static void emit_subs(int rs1,int rs2,int rt)
57871462 959{
960 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
961 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
962}
963
e2b5e7aa 964static void emit_zeroreg(int rt)
57871462 965{
966 assem_debug("mov %s,#0\n",regname[rt]);
967 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
968}
969
e2b5e7aa 970static void emit_loadlp(u_int imm,u_int rt)
790ee18e 971{
972 add_literal((int)out,imm);
973 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
974 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
975}
e2b5e7aa 976
977static void emit_movw(u_int imm,u_int rt)
790ee18e 978{
979 assert(imm<65536);
980 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
981 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
982}
e2b5e7aa 983
984static void emit_movt(u_int imm,u_int rt)
790ee18e 985{
986 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
987 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
988}
e2b5e7aa 989
990static void emit_movimm(u_int imm,u_int rt)
790ee18e 991{
992 u_int armval;
993 if(genimm(imm,&armval)) {
994 assem_debug("mov %s,#%d\n",regname[rt],imm);
995 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
996 }else if(genimm(~imm,&armval)) {
997 assem_debug("mvn %s,#%d\n",regname[rt],imm);
998 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
999 }else if(imm<65536) {
665f33e1 1000 #ifndef HAVE_ARMV7
790ee18e 1001 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1002 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1003 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1004 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1005 #else
1006 emit_movw(imm,rt);
1007 #endif
1008 }else{
665f33e1 1009 #ifndef HAVE_ARMV7
790ee18e 1010 emit_loadlp(imm,rt);
1011 #else
1012 emit_movw(imm&0x0000FFFF,rt);
1013 emit_movt(imm&0xFFFF0000,rt);
1014 #endif
1015 }
1016}
e2b5e7aa 1017
1018static void emit_pcreladdr(u_int rt)
790ee18e 1019{
1020 assem_debug("add %s,pc,#?\n",regname[rt]);
1021 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1022}
1023
e2b5e7aa 1024static void emit_loadreg(int r, int hr)
57871462 1025{
3d624f89 1026 if(r&64) {
c43b5311 1027 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1028 assert(0);
1029 return;
3d624f89 1030 }
57871462 1031 if((r&63)==0)
1032 emit_zeroreg(hr);
1033 else {
3d624f89 1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==CSREG) addr=(int)&Status;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 if(r==INVCP) addr=(int)&invc_ptr;
1041 u_int offset = addr-(u_int)&dynarec_local;
1042 assert(offset<4096);
1043 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1044 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1045 }
1046}
e2b5e7aa 1047
1048static void emit_storereg(int r, int hr)
57871462 1049{
3d624f89 1050 if(r&64) {
c43b5311 1051 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1052 assert(0);
1053 return;
3d624f89 1054 }
3d624f89 1055 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1056 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1057 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1058 if(r==CCREG) addr=(int)&cycle_count;
1059 if(r==FSREG) addr=(int)&FCR31;
1060 u_int offset = addr-(u_int)&dynarec_local;
1061 assert(offset<4096);
1062 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1063 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1064}
1065
e2b5e7aa 1066static void emit_test(int rs, int rt)
57871462 1067{
1068 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1069 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1070}
1071
e2b5e7aa 1072static void emit_testimm(int rs,int imm)
57871462 1073{
1074 u_int armval;
5a05d80c 1075 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1076 genimm_checked(imm,&armval);
57871462 1077 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1078}
1079
e2b5e7aa 1080static void emit_testeqimm(int rs,int imm)
b9b61529 1081{
1082 u_int armval;
1083 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1084 genimm_checked(imm,&armval);
b9b61529 1085 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1086}
1087
e2b5e7aa 1088static void emit_not(int rs,int rt)
57871462 1089{
1090 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1091 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1092}
1093
e2b5e7aa 1094static void emit_mvnmi(int rs,int rt)
b9b61529 1095{
1096 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1097 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1098}
1099
e2b5e7aa 1100static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1101{
1102 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1103 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1104}
1105
e2b5e7aa 1106static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1107{
1108 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1109 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1110}
e2b5e7aa 1111
1112static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1113{
1114 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1115 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1116}
1117
e2b5e7aa 1118static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1119{
1120 assert(rs<16);
1121 assert(rt<16);
1122 assert(imm<32);
1123 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1124 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1125}
1126
e2b5e7aa 1127static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1128{
1129 assert(rs<16);
1130 assert(rt<16);
1131 assert(imm<32);
1132 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1133 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1134}
1135
e2b5e7aa 1136static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1137{
1138 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1139 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1140}
1141
e2b5e7aa 1142static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1143{
1144 assert(rs<16);
1145 assert(rt<16);
1146 if(imm!=0) {
57871462 1147 u_int armval;
1148 if(genimm(imm,&armval)) {
1149 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1150 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1151 }else if(genimm(-imm,&armval)) {
8a0a8423 1152 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1153 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1154 #ifdef HAVE_ARMV7
1155 }else if(rt!=rs&&(u_int)imm<65536) {
1156 emit_movw(imm&0x0000ffff,rt);
1157 emit_add(rs,rt,rt);
1158 }else if(rt!=rs&&(u_int)-imm<65536) {
1159 emit_movw(-imm&0x0000ffff,rt);
1160 emit_sub(rs,rt,rt);
1161 #endif
1162 }else if((u_int)-imm<65536) {
57871462 1163 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1165 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1167 }else {
1168 do {
1169 int shift = (ffs(imm) - 1) & ~1;
1170 int imm8 = imm & (0xff << shift);
1171 genimm_checked(imm8,&armval);
1172 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1173 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1174 rs = rt;
1175 imm &= ~imm8;
1176 }
1177 while (imm != 0);
57871462 1178 }
1179 }
1180 else if(rs!=rt) emit_mov(rs,rt);
1181}
1182
e2b5e7aa 1183static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1184{
1185 assert(imm>-65536&&imm<65536);
1186 u_int armval;
1187 if(genimm(imm,&armval)) {
1188 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1189 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1190 }else if(genimm(-imm,&armval)) {
1191 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1192 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1193 }else if(imm<0) {
1194 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1195 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1196 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1197 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1198 }else{
1199 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1200 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1201 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1202 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1203 }
1204}
e2b5e7aa 1205
1206static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1207{
1208 emit_addimm(rt,imm,rt);
1209}
1210
e2b5e7aa 1211static void emit_addnop(u_int r)
57871462 1212{
1213 assert(r<16);
1214 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1215 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1216}
1217
e2b5e7aa 1218static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1219{
1220 u_int armval;
cfbd3c6e 1221 genimm_checked(imm,&armval);
57871462 1222 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1223 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1224}
1edfcc68 1225
e2b5e7aa 1226static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1227{
1228 assert(0);
1229 u_int armval;
cfbd3c6e 1230 genimm_checked(imm,&armval);
57871462 1231 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1232 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1233}
1234
e2b5e7aa 1235static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1236{
1237 // TODO: if(genimm(imm,&armval)) ...
1238 // else
1239 emit_movimm(imm,HOST_TEMPREG);
1240 emit_adds(HOST_TEMPREG,rsl,rtl);
1241 emit_adcimm(rsh,0,rth);
1242}
1243
e2b5e7aa 1244static void emit_andimm(int rs,int imm,int rt)
57871462 1245{
1246 u_int armval;
790ee18e 1247 if(imm==0) {
1248 emit_zeroreg(rt);
1249 }else if(genimm(imm,&armval)) {
57871462 1250 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1251 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1252 }else if(genimm(~imm,&armval)) {
1253 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1254 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1255 }else if(imm==65535) {
332a4533 1256 #ifndef HAVE_ARMV6
57871462 1257 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1258 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1259 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1260 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1261 #else
1262 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1263 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1264 #endif
1265 }else{
1266 assert(imm>0&&imm<65535);
665f33e1 1267 #ifndef HAVE_ARMV7
57871462 1268 assem_debug("mov r14,#%d\n",imm&0xFF00);
1269 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1270 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1271 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1272 #else
1273 emit_movw(imm,HOST_TEMPREG);
1274 #endif
1275 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1276 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1277 }
1278}
1279
e2b5e7aa 1280static void emit_orimm(int rs,int imm,int rt)
57871462 1281{
1282 u_int armval;
790ee18e 1283 if(imm==0) {
1284 if(rs!=rt) emit_mov(rs,rt);
1285 }else if(genimm(imm,&armval)) {
57871462 1286 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1287 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1288 }else{
1289 assert(imm>0&&imm<65536);
1290 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1292 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1294 }
1295}
1296
e2b5e7aa 1297static void emit_xorimm(int rs,int imm,int rt)
57871462 1298{
57871462 1299 u_int armval;
790ee18e 1300 if(imm==0) {
1301 if(rs!=rt) emit_mov(rs,rt);
1302 }else if(genimm(imm,&armval)) {
57871462 1303 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1304 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1305 }else{
514ed0d9 1306 assert(imm>0&&imm<65536);
57871462 1307 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1309 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1311 }
1312}
1313
e2b5e7aa 1314static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1315{
1316 assert(imm>0);
1317 assert(imm<32);
1318 //if(imm==1) ...
1319 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1320 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1321}
1322
e2b5e7aa 1323static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1324{
1325 assert(imm>0);
1326 assert(imm<32);
1327 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1328 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1329}
1330
e2b5e7aa 1331static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1332{
1333 assert(imm>0);
1334 assert(imm<32);
1335 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1336 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1337}
1338
e2b5e7aa 1339static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1340{
1341 assert(imm>0);
1342 assert(imm<32);
1343 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1344 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1345}
1346
e2b5e7aa 1347static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1348{
1349 assert(imm>0);
1350 assert(imm<32);
1351 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1352 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1353}
1354
e2b5e7aa 1355static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1356{
1357 assert(imm>0);
1358 assert(imm<32);
1359 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1360 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1361}
1362
e2b5e7aa 1363static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1364{
1365 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1366 assert(imm>0);
1367 assert(imm<32);
1368 //if(imm==1) ...
1369 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1370 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1371 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1372 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1373}
1374
e2b5e7aa 1375static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1376{
1377 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1378 assert(imm>0);
1379 assert(imm<32);
1380 //if(imm==1) ...
1381 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1382 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1383 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1384 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1385}
1386
e2b5e7aa 1387static void emit_signextend16(int rs,int rt)
b9b61529 1388{
332a4533 1389 #ifndef HAVE_ARMV6
b9b61529 1390 emit_shlimm(rs,16,rt);
1391 emit_sarimm(rt,16,rt);
1392 #else
1393 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1394 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1395 #endif
1396}
1397
e2b5e7aa 1398static void emit_signextend8(int rs,int rt)
c6c3b1b3 1399{
332a4533 1400 #ifndef HAVE_ARMV6
c6c3b1b3 1401 emit_shlimm(rs,24,rt);
1402 emit_sarimm(rt,24,rt);
1403 #else
1404 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1405 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1406 #endif
1407}
1408
e2b5e7aa 1409static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1410{
1411 assert(rs<16);
1412 assert(rt<16);
1413 assert(shift<16);
1414 //if(imm==1) ...
1415 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1416 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1417}
e2b5e7aa 1418
1419static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1420{
1421 assert(rs<16);
1422 assert(rt<16);
1423 assert(shift<16);
1424 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1425 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1426}
e2b5e7aa 1427
1428static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1429{
1430 assert(rs<16);
1431 assert(rt<16);
1432 assert(shift<16);
1433 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1434 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1435}
57871462 1436
e2b5e7aa 1437static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1438{
1439 assert(rs<16);
1440 assert(rt<16);
1441 assert(shift<16);
1442 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1443 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1444}
e2b5e7aa 1445
1446static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1447{
1448 assert(rs<16);
1449 assert(rt<16);
1450 assert(shift<16);
1451 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1452 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1453}
1454
e2b5e7aa 1455static void emit_cmpimm(int rs,int imm)
57871462 1456{
1457 u_int armval;
1458 if(genimm(imm,&armval)) {
5a05d80c 1459 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1460 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1461 }else if(genimm(-imm,&armval)) {
5a05d80c 1462 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1463 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1464 }else if(imm>0) {
1465 assert(imm<65536);
57871462 1466 emit_movimm(imm,HOST_TEMPREG);
57871462 1467 assem_debug("cmp %s,r14\n",regname[rs]);
1468 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1469 }else{
1470 assert(imm>-65536);
57871462 1471 emit_movimm(-imm,HOST_TEMPREG);
57871462 1472 assem_debug("cmn %s,r14\n",regname[rs]);
1473 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1474 }
1475}
1476
e2b5e7aa 1477static void emit_cmovne_imm(int imm,int rt)
57871462 1478{
1479 assem_debug("movne %s,#%d\n",regname[rt],imm);
1480 u_int armval;
cfbd3c6e 1481 genimm_checked(imm,&armval);
57871462 1482 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1483}
e2b5e7aa 1484
1485static void emit_cmovl_imm(int imm,int rt)
57871462 1486{
1487 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1488 u_int armval;
cfbd3c6e 1489 genimm_checked(imm,&armval);
57871462 1490 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1491}
e2b5e7aa 1492
1493static void emit_cmovb_imm(int imm,int rt)
57871462 1494{
1495 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1496 u_int armval;
cfbd3c6e 1497 genimm_checked(imm,&armval);
57871462 1498 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1499}
e2b5e7aa 1500
1501static void emit_cmovs_imm(int imm,int rt)
57871462 1502{
1503 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1504 u_int armval;
cfbd3c6e 1505 genimm_checked(imm,&armval);
57871462 1506 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1507}
e2b5e7aa 1508
1509static void emit_cmove_reg(int rs,int rt)
57871462 1510{
1511 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1512 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1513}
e2b5e7aa 1514
1515static void emit_cmovne_reg(int rs,int rt)
57871462 1516{
1517 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1518 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1519}
e2b5e7aa 1520
1521static void emit_cmovl_reg(int rs,int rt)
57871462 1522{
1523 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1524 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1525}
e2b5e7aa 1526
1527static void emit_cmovs_reg(int rs,int rt)
57871462 1528{
1529 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1530 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1531}
1532
e2b5e7aa 1533static void emit_slti32(int rs,int imm,int rt)
57871462 1534{
1535 if(rs!=rt) emit_zeroreg(rt);
1536 emit_cmpimm(rs,imm);
1537 if(rs==rt) emit_movimm(0,rt);
1538 emit_cmovl_imm(1,rt);
1539}
e2b5e7aa 1540
1541static void emit_sltiu32(int rs,int imm,int rt)
57871462 1542{
1543 if(rs!=rt) emit_zeroreg(rt);
1544 emit_cmpimm(rs,imm);
1545 if(rs==rt) emit_movimm(0,rt);
1546 emit_cmovb_imm(1,rt);
1547}
e2b5e7aa 1548
1549static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1550{
1551 assert(rsh!=rt);
1552 emit_slti32(rsl,imm,rt);
1553 if(imm>=0)
1554 {
1555 emit_test(rsh,rsh);
1556 emit_cmovne_imm(0,rt);
1557 emit_cmovs_imm(1,rt);
1558 }
1559 else
1560 {
1561 emit_cmpimm(rsh,-1);
1562 emit_cmovne_imm(0,rt);
1563 emit_cmovl_imm(1,rt);
1564 }
1565}
e2b5e7aa 1566
1567static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1568{
1569 assert(rsh!=rt);
1570 emit_sltiu32(rsl,imm,rt);
1571 if(imm>=0)
1572 {
1573 emit_test(rsh,rsh);
1574 emit_cmovne_imm(0,rt);
1575 }
1576 else
1577 {
1578 emit_cmpimm(rsh,-1);
1579 emit_cmovne_imm(1,rt);
1580 }
1581}
1582
e2b5e7aa 1583static void emit_cmp(int rs,int rt)
57871462 1584{
1585 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1586 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1587}
e2b5e7aa 1588
1589static void emit_set_gz32(int rs, int rt)
57871462 1590{
1591 //assem_debug("set_gz32\n");
1592 emit_cmpimm(rs,1);
1593 emit_movimm(1,rt);
1594 emit_cmovl_imm(0,rt);
1595}
e2b5e7aa 1596
1597static void emit_set_nz32(int rs, int rt)
57871462 1598{
1599 //assem_debug("set_nz32\n");
1600 if(rs!=rt) emit_movs(rs,rt);
1601 else emit_test(rs,rs);
1602 emit_cmovne_imm(1,rt);
1603}
e2b5e7aa 1604
1605static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1606{
1607 //assem_debug("set_gz64\n");
1608 emit_set_gz32(rsl,rt);
1609 emit_test(rsh,rsh);
1610 emit_cmovne_imm(1,rt);
1611 emit_cmovs_imm(0,rt);
1612}
e2b5e7aa 1613
1614static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1615{
1616 //assem_debug("set_nz64\n");
1617 emit_or_and_set_flags(rsh,rsl,rt);
1618 emit_cmovne_imm(1,rt);
1619}
e2b5e7aa 1620
1621static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1622{
1623 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1624 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1625 emit_cmp(rs1,rs2);
1626 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1627 emit_cmovl_imm(1,rt);
1628}
e2b5e7aa 1629
1630static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1631{
1632 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1633 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1634 emit_cmp(rs1,rs2);
1635 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1636 emit_cmovb_imm(1,rt);
1637}
e2b5e7aa 1638
1639static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1640{
1641 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1642 assert(u1!=rt);
1643 assert(u2!=rt);
1644 emit_cmp(l1,l2);
1645 emit_movimm(0,rt);
1646 emit_sbcs(u1,u2,HOST_TEMPREG);
1647 emit_cmovl_imm(1,rt);
1648}
e2b5e7aa 1649
1650static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1651{
1652 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1653 assert(u1!=rt);
1654 assert(u2!=rt);
1655 emit_cmp(l1,l2);
1656 emit_movimm(0,rt);
1657 emit_sbcs(u1,u2,HOST_TEMPREG);
1658 emit_cmovb_imm(1,rt);
1659}
1660
dd114d7d 1661#ifdef DRC_DBG
1662extern void gen_interupt();
1663extern void do_insn_cmp();
1664#define FUNCNAME(f) { (intptr_t)f, " " #f }
1665static const struct {
1666 intptr_t addr;
1667 const char *name;
1668} function_names[] = {
1669 FUNCNAME(cc_interrupt),
1670 FUNCNAME(gen_interupt),
1671 FUNCNAME(get_addr_ht),
1672 FUNCNAME(get_addr),
1673 FUNCNAME(jump_handler_read8),
1674 FUNCNAME(jump_handler_read16),
1675 FUNCNAME(jump_handler_read32),
1676 FUNCNAME(jump_handler_write8),
1677 FUNCNAME(jump_handler_write16),
1678 FUNCNAME(jump_handler_write32),
1679 FUNCNAME(invalidate_addr),
1680 FUNCNAME(verify_code_vm),
1681 FUNCNAME(verify_code),
1682 FUNCNAME(jump_hlecall),
1683 FUNCNAME(jump_syscall_hle),
1684 FUNCNAME(new_dyna_leave),
1685 FUNCNAME(pcsx_mtc0),
1686 FUNCNAME(pcsx_mtc0_ds),
1687 FUNCNAME(do_insn_cmp),
1688};
1689
1690static const char *func_name(intptr_t a)
1691{
1692 int i;
1693 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1694 if (function_names[i].addr == a)
1695 return function_names[i].name;
1696 return "";
1697}
1698#else
1699#define func_name(x) ""
1700#endif
1701
e2b5e7aa 1702static void emit_call(int a)
57871462 1703{
dd114d7d 1704 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1705 u_int offset=genjmp(a);
1706 output_w32(0xeb000000|offset);
1707}
e2b5e7aa 1708
1709static void emit_jmp(int a)
57871462 1710{
dd114d7d 1711 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1712 u_int offset=genjmp(a);
1713 output_w32(0xea000000|offset);
1714}
e2b5e7aa 1715
1716static void emit_jne(int a)
57871462 1717{
1718 assem_debug("bne %x\n",a);
1719 u_int offset=genjmp(a);
1720 output_w32(0x1a000000|offset);
1721}
e2b5e7aa 1722
1723static void emit_jeq(int a)
57871462 1724{
1725 assem_debug("beq %x\n",a);
1726 u_int offset=genjmp(a);
1727 output_w32(0x0a000000|offset);
1728}
e2b5e7aa 1729
1730static void emit_js(int a)
57871462 1731{
1732 assem_debug("bmi %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x4a000000|offset);
1735}
e2b5e7aa 1736
1737static void emit_jns(int a)
57871462 1738{
1739 assem_debug("bpl %x\n",a);
1740 u_int offset=genjmp(a);
1741 output_w32(0x5a000000|offset);
1742}
e2b5e7aa 1743
1744static void emit_jl(int a)
57871462 1745{
1746 assem_debug("blt %x\n",a);
1747 u_int offset=genjmp(a);
1748 output_w32(0xba000000|offset);
1749}
e2b5e7aa 1750
1751static void emit_jge(int a)
57871462 1752{
1753 assem_debug("bge %x\n",a);
1754 u_int offset=genjmp(a);
1755 output_w32(0xaa000000|offset);
1756}
e2b5e7aa 1757
1758static void emit_jno(int a)
57871462 1759{
1760 assem_debug("bvc %x\n",a);
1761 u_int offset=genjmp(a);
1762 output_w32(0x7a000000|offset);
1763}
e2b5e7aa 1764
1765static void emit_jc(int a)
57871462 1766{
1767 assem_debug("bcs %x\n",a);
1768 u_int offset=genjmp(a);
1769 output_w32(0x2a000000|offset);
1770}
e2b5e7aa 1771
1772static void emit_jcc(int a)
57871462 1773{
1774 assem_debug("bcc %x\n",a);
1775 u_int offset=genjmp(a);
1776 output_w32(0x3a000000|offset);
1777}
1778
e2b5e7aa 1779static void emit_callreg(u_int r)
57871462 1780{
c6c3b1b3 1781 assert(r<15);
1782 assem_debug("blx %s\n",regname[r]);
1783 output_w32(0xe12fff30|r);
57871462 1784}
e2b5e7aa 1785
1786static void emit_jmpreg(u_int r)
57871462 1787{
1788 assem_debug("mov pc,%s\n",regname[r]);
1789 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1790}
1791
e2b5e7aa 1792static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1793{
1794 assert(offset>-4096&&offset<4096);
1795 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1796 if(offset>=0) {
1797 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1798 }else{
1799 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1800 }
1801}
e2b5e7aa 1802
1803static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1804{
1805 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1806 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1807}
e2b5e7aa 1808
1809static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1810{
1811 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1812 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1813}
e2b5e7aa 1814
1815static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1816{
1817 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1818 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1819}
e2b5e7aa 1820
1821static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1822{
1823 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1824 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1825}
e2b5e7aa 1826
1827static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1828{
1829 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1830 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1831}
e2b5e7aa 1832
1833static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1834{
1835 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1836 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1837}
e2b5e7aa 1838
1839static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1840{
1841 if(map<0) emit_readword_indexed(addr, rs, rt);
1842 else {
1843 assert(addr==0);
1844 emit_readword_dualindexedx4(rs, map, rt);
1845 }
1846}
e2b5e7aa 1847
1848static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1849{
1850 if(map<0) {
1851 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1852 emit_readword_indexed(addr+4, rs, rl);
1853 }else{
1854 assert(rh!=rs);
1855 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1856 emit_addimm(map,1,map);
1857 emit_readword_indexed_tlb(addr, rs, map, rl);
1858 }
1859}
e2b5e7aa 1860
1861static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1862{
1863 assert(offset>-256&&offset<256);
1864 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1865 if(offset>=0) {
1866 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1867 }else{
1868 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1869 }
1870}
e2b5e7aa 1871
1872static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1873{
1874 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1875 else {
1876 if(addr==0) {
1877 emit_shlimm(map,2,map);
1878 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1879 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1880 }else{
1881 assert(addr>-256&&addr<256);
1882 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1883 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1884 emit_movsbl_indexed(addr, rt, rt);
1885 }
1886 }
1887}
e2b5e7aa 1888
1889static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1890{
1891 assert(offset>-256&&offset<256);
1892 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1893 if(offset>=0) {
1894 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1895 }else{
1896 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1897 }
1898}
e2b5e7aa 1899
1900static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1901{
1902 assert(offset>-4096&&offset<4096);
1903 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1906 }else{
1907 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1908 }
1909}
e2b5e7aa 1910
1911static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1912{
1913 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1914 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1915}
e2b5e7aa 1916
1917static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1918{
1919 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1920 else {
1921 if(addr==0) {
1922 emit_movzbl_dualindexedx4(rs, map, rt);
1923 }else{
1924 emit_addimm(rs,addr,rt);
1925 emit_movzbl_dualindexedx4(rt, map, rt);
1926 }
1927 }
1928}
e2b5e7aa 1929
1930static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1931{
1932 assert(offset>-256&&offset<256);
1933 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1934 if(offset>=0) {
1935 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1936 }else{
1937 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1938 }
1939}
e2b5e7aa 1940
054175e9 1941static void emit_ldrd(int offset, int rs, int rt)
1942{
1943 assert(offset>-256&&offset<256);
1944 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1945 if(offset>=0) {
1946 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1947 }else{
1948 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1949 }
1950}
e2b5e7aa 1951
1952static void emit_readword(int addr, int rt)
57871462 1953{
1954 u_int offset = addr-(u_int)&dynarec_local;
1955 assert(offset<4096);
1956 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1957 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1958}
e2b5e7aa 1959
1960static unused void emit_movsbl(int addr, int rt)
57871462 1961{
1962 u_int offset = addr-(u_int)&dynarec_local;
1963 assert(offset<256);
1964 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1965 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1966}
e2b5e7aa 1967
1968static unused void emit_movswl(int addr, int rt)
57871462 1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<256);
1972 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1973 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1974}
e2b5e7aa 1975
1976static unused void emit_movzbl(int addr, int rt)
57871462 1977{
1978 u_int offset = addr-(u_int)&dynarec_local;
1979 assert(offset<4096);
1980 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1981 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1982}
e2b5e7aa 1983
1984static unused void emit_movzwl(int addr, int rt)
57871462 1985{
1986 u_int offset = addr-(u_int)&dynarec_local;
1987 assert(offset<256);
1988 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1989 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1990}
57871462 1991
e2b5e7aa 1992static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1993{
1994 assert(offset>-4096&&offset<4096);
1995 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1996 if(offset>=0) {
1997 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1998 }else{
1999 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2000 }
2001}
e2b5e7aa 2002
2003static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 2004{
2005 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2006 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2007}
e2b5e7aa 2008
2009static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2010{
2011 if(map<0) emit_writeword_indexed(rt, addr, rs);
2012 else {
2013 assert(addr==0);
2014 emit_writeword_dualindexedx4(rt, rs, map);
2015 }
2016}
e2b5e7aa 2017
2018static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 2019{
2020 if(map<0) {
2021 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2022 emit_writeword_indexed(rl, addr+4, rs);
2023 }else{
2024 assert(rh>=0);
2025 if(temp!=rs) emit_addimm(map,1,temp);
2026 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2027 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2028 else {
2029 emit_addimm(rs,4,rs);
2030 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2031 }
2032 }
2033}
e2b5e7aa 2034
2035static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 2036{
2037 assert(offset>-256&&offset<256);
2038 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2039 if(offset>=0) {
2040 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2041 }else{
2042 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2043 }
2044}
e2b5e7aa 2045
2046static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 2047{
2048 assert(offset>-4096&&offset<4096);
2049 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2050 if(offset>=0) {
2051 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2052 }else{
2053 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2054 }
2055}
e2b5e7aa 2056
2057static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 2058{
2059 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2060 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2061}
e2b5e7aa 2062
2063static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2064{
2065 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2066 else {
2067 if(addr==0) {
2068 emit_writebyte_dualindexedx4(rt, rs, map);
2069 }else{
2070 emit_addimm(rs,addr,temp);
2071 emit_writebyte_dualindexedx4(rt, temp, map);
2072 }
2073 }
2074}
e2b5e7aa 2075
2076static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2077{
2078 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2079 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2080}
e2b5e7aa 2081
2082static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2083{
2084 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2085 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2086}
e2b5e7aa 2087
2088static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2089{
2090 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2091 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2092}
e2b5e7aa 2093
2094static void emit_writeword(int rt, int addr)
57871462 2095{
2096 u_int offset = addr-(u_int)&dynarec_local;
2097 assert(offset<4096);
2098 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2099 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2100}
e2b5e7aa 2101
2102static unused void emit_writehword(int rt, int addr)
57871462 2103{
2104 u_int offset = addr-(u_int)&dynarec_local;
2105 assert(offset<256);
2106 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2107 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2108}
e2b5e7aa 2109
2110static unused void emit_writebyte(int rt, int addr)
57871462 2111{
2112 u_int offset = addr-(u_int)&dynarec_local;
2113 assert(offset<4096);
74426039 2114 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2115 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2116}
57871462 2117
e2b5e7aa 2118static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2119{
2120 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2121 assert(rs1<16);
2122 assert(rs2<16);
2123 assert(hi<16);
2124 assert(lo<16);
2125 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2126}
e2b5e7aa 2127
2128static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2129{
2130 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2131 assert(rs1<16);
2132 assert(rs2<16);
2133 assert(hi<16);
2134 assert(lo<16);
2135 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2136}
2137
e2b5e7aa 2138static void emit_clz(int rs,int rt)
57871462 2139{
2140 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2141 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2142}
2143
e2b5e7aa 2144static void emit_subcs(int rs1,int rs2,int rt)
57871462 2145{
2146 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2147 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2148}
2149
e2b5e7aa 2150static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2151{
2152 assert(imm>0);
2153 assert(imm<32);
2154 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2155 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2156}
2157
e2b5e7aa 2158static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2159{
2160 assert(imm>0);
2161 assert(imm<32);
2162 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2163 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2164}
2165
e2b5e7aa 2166static void emit_negmi(int rs, int rt)
57871462 2167{
2168 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2169 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2170}
2171
e2b5e7aa 2172static void emit_negsmi(int rs, int rt)
57871462 2173{
2174 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2175 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2176}
2177
e2b5e7aa 2178static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2179{
2180 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2181 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2182}
2183
e2b5e7aa 2184static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2185{
2186 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2187 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2188}
2189
e2b5e7aa 2190static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2191{
2192 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2193 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2194}
2195
e2b5e7aa 2196static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2197{
2198 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2199 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2200}
2201
e2b5e7aa 2202static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2203{
2204 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2205 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2206}
2207
e2b5e7aa 2208static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2209{
2210 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2211 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2212}
2213
e2b5e7aa 2214static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2215{
2216 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2217 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2218}
2219
e2b5e7aa 2220static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2221{
2222 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2223 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2224}
2225
e2b5e7aa 2226static void emit_teq(int rs, int rt)
57871462 2227{
2228 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2229 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2230}
2231
e2b5e7aa 2232static void emit_rsbimm(int rs, int imm, int rt)
57871462 2233{
2234 u_int armval;
cfbd3c6e 2235 genimm_checked(imm,&armval);
57871462 2236 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2237 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2238}
2239
2240// Load 2 immediates optimizing for small code size
e2b5e7aa 2241static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2242{
2243 emit_movimm(imm1,rt1);
2244 u_int armval;
2245 if(genimm(imm2-imm1,&armval)) {
2246 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2247 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2248 }else if(genimm(imm1-imm2,&armval)) {
2249 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2250 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2251 }
2252 else emit_movimm(imm2,rt2);
2253}
2254
2255// Conditionally select one of two immediates, optimizing for small code size
2256// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2257static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2258{
2259 u_int armval;
2260 if(genimm(imm2-imm1,&armval)) {
2261 emit_movimm(imm1,rt);
2262 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2263 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2264 }else if(genimm(imm1-imm2,&armval)) {
2265 emit_movimm(imm1,rt);
2266 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2267 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2268 }
2269 else {
665f33e1 2270 #ifndef HAVE_ARMV7
57871462 2271 emit_movimm(imm1,rt);
2272 add_literal((int)out,imm2);
2273 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2274 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2275 #else
2276 emit_movw(imm1&0x0000FFFF,rt);
2277 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2278 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2279 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2280 }
2281 emit_movt(imm1&0xFFFF0000,rt);
2282 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2283 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2284 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2285 }
2286 #endif
2287 }
2288}
2289
57871462 2290// special case for checking invalid_code
e2b5e7aa 2291static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2292{
2293 assert(imm<128&&imm>=0);
2294 assert(r>=0&&r<16);
2295 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2296 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2297 emit_cmpimm(HOST_TEMPREG,imm);
2298}
2299
e2b5e7aa 2300static void emit_callne(int a)
0bbd1454 2301{
2302 assem_debug("blne %x\n",a);
2303 u_int offset=genjmp(a);
2304 output_w32(0x1b000000|offset);
2305}
2306
57871462 2307// Used to preload hash table entries
e2b5e7aa 2308static unused void emit_prefetchreg(int r)
57871462 2309{
2310 assem_debug("pld %s\n",regname[r]);
2311 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2312}
2313
2314// Special case for mini_ht
e2b5e7aa 2315static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2316{
2317 assert(offset<4096);
2318 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2319 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2320}
2321
e2b5e7aa 2322static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2323{
2324 u_int armval;
cfbd3c6e 2325 genimm_checked(imm,&armval);
57871462 2326 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2327 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2328}
2329
e2b5e7aa 2330static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2331{
2332 u_int armval;
cfbd3c6e 2333 genimm_checked(imm,&armval);
57871462 2334 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2335 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2336}
2337
e2b5e7aa 2338static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2339{
2340 u_int armval;
cfbd3c6e 2341 genimm_checked(imm,&armval);
57871462 2342 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2343 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2344}
2345
e2b5e7aa 2346static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2347{
2348 u_int armval;
cfbd3c6e 2349 genimm_checked(imm,&armval);
57871462 2350 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2351 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2352}
2353
e2b5e7aa 2354static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2355{
2356 u_int armval;
cfbd3c6e 2357 genimm_checked(imm,&armval);
57871462 2358 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2359 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2360}
2361
e2b5e7aa 2362static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2363{
2364 u_int armval;
cfbd3c6e 2365 genimm_checked(imm,&armval);
b9b61529 2366 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2367 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2368}
2369
e2b5e7aa 2370static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2371{
2372 u_int armval;
cfbd3c6e 2373 genimm_checked(imm,&armval);
b9b61529 2374 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2375 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2376}
2377
e2b5e7aa 2378static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2379{
2380 u_int armval;
2381 genimm_checked(imm,&armval);
2382 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2383 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2384}
2385
e2b5e7aa 2386static void emit_jno_unlikely(int a)
57871462 2387{
2388 //emit_jno(a);
2389 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2390 output_w32(0x72800000|rd_rn_rm(15,15,0));
2391}
2392
054175e9 2393static void save_regs_all(u_int reglist)
57871462 2394{
054175e9 2395 int i;
57871462 2396 if(!reglist) return;
2397 assem_debug("stmia fp,{");
054175e9 2398 for(i=0;i<16;i++)
2399 if(reglist&(1<<i))
2400 assem_debug("r%d,",i);
57871462 2401 assem_debug("}\n");
2402 output_w32(0xe88b0000|reglist);
2403}
e2b5e7aa 2404
054175e9 2405static void restore_regs_all(u_int reglist)
57871462 2406{
054175e9 2407 int i;
57871462 2408 if(!reglist) return;
2409 assem_debug("ldmia fp,{");
054175e9 2410 for(i=0;i<16;i++)
2411 if(reglist&(1<<i))
2412 assem_debug("r%d,",i);
57871462 2413 assem_debug("}\n");
2414 output_w32(0xe89b0000|reglist);
2415}
e2b5e7aa 2416
054175e9 2417// Save registers before function call
2418static void save_regs(u_int reglist)
2419{
4d646738 2420 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2421 save_regs_all(reglist);
2422}
e2b5e7aa 2423
054175e9 2424// Restore registers after function call
2425static void restore_regs(u_int reglist)
2426{
4d646738 2427 reglist&=CALLER_SAVE_REGS;
054175e9 2428 restore_regs_all(reglist);
2429}
57871462 2430
57871462 2431/* Stubs/epilogue */
2432
e2b5e7aa 2433static void literal_pool(int n)
57871462 2434{
2435 if(!literalcount) return;
2436 if(n) {
2437 if((int)out-literals[0][0]<4096-n) return;
2438 }
2439 u_int *ptr;
2440 int i;
2441 for(i=0;i<literalcount;i++)
2442 {
77750690 2443 u_int l_addr=(u_int)out;
2444 int j;
2445 for(j=0;j<i;j++) {
2446 if(literals[j][1]==literals[i][1]) {
2447 //printf("dup %08x\n",literals[i][1]);
2448 l_addr=literals[j][0];
2449 break;
2450 }
2451 }
57871462 2452 ptr=(u_int *)literals[i][0];
77750690 2453 u_int offset=l_addr-(u_int)ptr-8;
57871462 2454 assert(offset<4096);
2455 assert(!(offset&3));
2456 *ptr|=offset;
77750690 2457 if(l_addr==(u_int)out) {
2458 literals[i][0]=l_addr; // remember for dupes
2459 output_w32(literals[i][1]);
2460 }
57871462 2461 }
2462 literalcount=0;
2463}
2464
e2b5e7aa 2465static void literal_pool_jumpover(int n)
57871462 2466{
2467 if(!literalcount) return;
2468 if(n) {
2469 if((int)out-literals[0][0]<4096-n) return;
2470 }
df4dc2b1 2471 void *jaddr = out;
57871462 2472 emit_jmp(0);
2473 literal_pool(0);
df4dc2b1 2474 set_jump_target(jaddr, out);
57871462 2475}
2476
e2b5e7aa 2477static void emit_extjump2(u_int addr, int target, int linker)
57871462 2478{
2479 u_char *ptr=(u_char *)addr;
2480 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2481 (void)ptr;
2482
57871462 2483 emit_loadlp(target,0);
2484 emit_loadlp(addr,1);
24385cae 2485 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2486 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2487//DEBUG >
2488#ifdef DEBUG_CYCLE_COUNT
2489 emit_readword((int)&last_count,ECX);
2490 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2491 emit_readword((int)&next_interupt,ECX);
2492 emit_writeword(HOST_CCREG,(int)&Count);
2493 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2494 emit_writeword(ECX,(int)&last_count);
2495#endif
2496//DEBUG <
2497 emit_jmp(linker);
2498}
2499
e2b5e7aa 2500static void emit_extjump(int addr, int target)
57871462 2501{
2502 emit_extjump2(addr, target, (int)dyna_linker);
2503}
e2b5e7aa 2504
2505static void emit_extjump_ds(int addr, int target)
57871462 2506{
2507 emit_extjump2(addr, target, (int)dyna_linker_ds);
2508}
2509
13e35c04 2510// put rt_val into rt, potentially making use of rs with value rs_val
2511static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2512{
8575a877 2513 u_int armval;
2514 int diff;
2515 if(genimm(rt_val,&armval)) {
2516 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2517 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2518 return;
2519 }
2520 if(genimm(~rt_val,&armval)) {
2521 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2522 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2523 return;
2524 }
2525 diff=rt_val-rs_val;
2526 if(genimm(diff,&armval)) {
2527 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2528 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2529 return;
2530 }else if(genimm(-diff,&armval)) {
2531 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2532 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2533 return;
2534 }
2535 emit_movimm(rt_val,rt);
2536}
2537
2538// return 1 if above function can do it's job cheaply
2539static int is_similar_value(u_int v1,u_int v2)
2540{
13e35c04 2541 u_int xs;
8575a877 2542 int diff;
2543 if(v1==v2) return 1;
2544 diff=v2-v1;
2545 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2546 ;
8575a877 2547 if(xs<0x100) return 1;
2548 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2549 ;
2550 if(xs<0x100) return 1;
2551 return 0;
13e35c04 2552}
cbbab9cd 2553
b96d3df7 2554// trashes r2
2555static void pass_args(int a0, int a1)
2556{
2557 if(a0==1&&a1==0) {
2558 // must swap
2559 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2560 }
2561 else if(a0!=0&&a1==0) {
2562 emit_mov(a1,1);
2563 if (a0>=0) emit_mov(a0,0);
2564 }
2565 else {
2566 if(a0>=0&&a0!=0) emit_mov(a0,0);
2567 if(a1>=0&&a1!=1) emit_mov(a1,1);
2568 }
2569}
2570
b1be1eee 2571static void mov_loadtype_adj(int type,int rs,int rt)
2572{
2573 switch(type) {
2574 case LOADB_STUB: emit_signextend8(rs,rt); break;
2575 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2576 case LOADH_STUB: emit_signextend16(rs,rt); break;
2577 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2578 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2579 default: assert(0);
2580 }
2581}
2582
b1be1eee 2583#include "pcsxmem.h"
2584#include "pcsxmem_inline.c"
b1be1eee 2585
e2b5e7aa 2586static void do_readstub(int n)
57871462 2587{
2588 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2589 literal_pool(256);
df4dc2b1 2590 set_jump_target(stubs[n][1], out);
57871462 2591 int type=stubs[n][0];
2592 int i=stubs[n][3];
2593 int rs=stubs[n][4];
2594 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2595 u_int reglist=stubs[n][7];
2596 signed char *i_regmap=i_regs->regmap;
581335b0 2597 int rt;
b9b61529 2598 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2599 rt=get_reg(i_regmap,FTEMP);
2600 }else{
57871462 2601 rt=get_reg(i_regmap,rt1[i]);
2602 }
2603 assert(rs>=0);
df4dc2b1 2604 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2605 void *restore_jump = NULL;
c6c3b1b3 2606 reglist|=(1<<rs);
2607 for(r=0;r<=12;r++) {
2608 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2609 temp=r; break;
2610 }
2611 }
db829eeb 2612 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2613 reglist&=~(1<<rt);
2614 if(temp==-1) {
2615 save_regs(reglist);
2616 regs_saved=1;
2617 temp=(rs==0)?2:0;
2618 }
2619 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2620 temp2=1;
2621 emit_readword((int)&mem_rtab,temp);
2622 emit_shrimm(rs,12,temp2);
2623 emit_readword_dualindexedx4(temp,temp2,temp2);
2624 emit_lsls_imm(temp2,1,temp2);
2625 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2626 switch(type) {
2627 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2628 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2629 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2630 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2631 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2632 }
2633 }
2634 if(regs_saved) {
df4dc2b1 2635 restore_jump=out;
c6c3b1b3 2636 emit_jcc(0); // jump to reg restore
2637 }
2638 else
2639 emit_jcc(stubs[n][2]); // return address
2640
2641 if(!regs_saved)
2642 save_regs(reglist);
2643 int handler=0;
2644 if(type==LOADB_STUB||type==LOADBU_STUB)
2645 handler=(int)jump_handler_read8;
2646 if(type==LOADH_STUB||type==LOADHU_STUB)
2647 handler=(int)jump_handler_read16;
2648 if(type==LOADW_STUB)
2649 handler=(int)jump_handler_read32;
2650 assert(handler!=0);
b96d3df7 2651 pass_args(rs,temp2);
c6c3b1b3 2652 int cc=get_reg(i_regmap,CCREG);
2653 if(cc<0)
2654 emit_loadreg(CCREG,2);
2573466a 2655 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2656 emit_call(handler);
2657 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2658 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2659 }
2660 if(restore_jump)
df4dc2b1 2661 set_jump_target(restore_jump, out);
c6c3b1b3 2662 restore_regs(reglist);
2663 emit_jmp(stubs[n][2]); // return address
57871462 2664}
2665
c6c3b1b3 2666// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2667static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2668{
2669 u_int l1,l2=0;
2670 l1=((u_int *)table)[addr>>12];
2671 if((l1&(1<<31))==0) {
2672 u_int v=l1<<1;
2673 *addr_host=v+addr;
2674 return 0;
2675 }
2676 else {
2677 l1<<=1;
2678 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2679 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2680 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2681 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2682 else
2683 l2=((u_int *)l1)[(addr&0xfff)/4];
2684 if((l2&(1<<31))==0) {
2685 u_int v=l2<<1;
2686 *addr_host=v+(addr&0xfff);
2687 return 0;
2688 }
2689 return l2<<1;
2690 }
2691}
c6c3b1b3 2692
e2b5e7aa 2693static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2694{
2695 int rs=get_reg(regmap,target);
57871462 2696 int rt=get_reg(regmap,target);
535d208a 2697 if(rs<0) rs=get_reg(regmap,-1);
57871462 2698 assert(rs>=0);
b1be1eee 2699 u_int handler,host_addr=0,is_dynamic,far_call=0;
2700 int cc=get_reg(regmap,CCREG);
2701 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2702 return;
c6c3b1b3 2703 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2704 if (handler==0) {
db829eeb 2705 if(rt<0||rt1[i]==0)
c6c3b1b3 2706 return;
13e35c04 2707 if(addr!=host_addr)
2708 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2709 switch(type) {
2710 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2711 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2712 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2713 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2714 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2715 default: assert(0);
2716 }
2717 return;
2718 }
b1be1eee 2719 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2720 if(is_dynamic) {
2721 if(type==LOADB_STUB||type==LOADBU_STUB)
2722 handler=(int)jump_handler_read8;
2723 if(type==LOADH_STUB||type==LOADHU_STUB)
2724 handler=(int)jump_handler_read16;
2725 if(type==LOADW_STUB)
2726 handler=(int)jump_handler_read32;
2727 }
c6c3b1b3 2728
2729 // call a memhandler
db829eeb 2730 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2731 reglist&=~(1<<rt);
2732 save_regs(reglist);
2733 if(target==0)
2734 emit_movimm(addr,0);
2735 else if(rs!=0)
2736 emit_mov(rs,0);
c6c3b1b3 2737 int offset=(int)handler-(int)out-8;
2738 if(offset<-33554432||offset>=33554432) {
2739 // unreachable memhandler, a plugin func perhaps
b1be1eee 2740 emit_movimm(handler,12);
2741 far_call=1;
2742 }
2743 if(cc<0)
2744 emit_loadreg(CCREG,2);
2745 if(is_dynamic) {
2746 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2747 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2748 }
b1be1eee 2749 else {
2750 emit_readword((int)&last_count,3);
2751 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2752 emit_add(2,3,2);
2753 emit_writeword(2,(int)&Count);
2754 }
2755
2756 if(far_call)
2757 emit_callreg(12);
c6c3b1b3 2758 else
2759 emit_call(handler);
b1be1eee 2760
db829eeb 2761 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2762 switch(type) {
2763 case LOADB_STUB: emit_signextend8(0,rt); break;
2764 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2765 case LOADH_STUB: emit_signextend16(0,rt); break;
2766 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2767 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2768 default: assert(0);
2769 }
2770 }
2771 restore_regs(reglist);
57871462 2772}
2773
e2b5e7aa 2774static void do_writestub(int n)
57871462 2775{
2776 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2777 literal_pool(256);
df4dc2b1 2778 set_jump_target(stubs[n][1], out);
57871462 2779 int type=stubs[n][0];
2780 int i=stubs[n][3];
2781 int rs=stubs[n][4];
2782 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2783 u_int reglist=stubs[n][7];
2784 signed char *i_regmap=i_regs->regmap;
581335b0 2785 int rt,r;
b9b61529 2786 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2787 rt=get_reg(i_regmap,r=FTEMP);
2788 }else{
57871462 2789 rt=get_reg(i_regmap,r=rs2[i]);
2790 }
2791 assert(rs>=0);
2792 assert(rt>=0);
df4dc2b1 2793 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,ra;
2794 void *restore_jump = NULL;
b96d3df7 2795 int reglist2=reglist|(1<<rs)|(1<<rt);
2796 for(rtmp=0;rtmp<=12;rtmp++) {
2797 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2798 temp=rtmp; break;
2799 }
2800 }
2801 if(temp==-1) {
2802 save_regs(reglist);
2803 regs_saved=1;
2804 for(rtmp=0;rtmp<=3;rtmp++)
2805 if(rtmp!=rs&&rtmp!=rt)
2806 {temp=rtmp;break;}
2807 }
2808 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2809 temp2=3;
2810 emit_readword((int)&mem_wtab,temp);
2811 emit_shrimm(rs,12,temp2);
2812 emit_readword_dualindexedx4(temp,temp2,temp2);
2813 emit_lsls_imm(temp2,1,temp2);
2814 switch(type) {
2815 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2816 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2817 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2818 default: assert(0);
2819 }
2820 if(regs_saved) {
df4dc2b1 2821 restore_jump=out;
b96d3df7 2822 emit_jcc(0); // jump to reg restore
2823 }
2824 else
2825 emit_jcc(stubs[n][2]); // return address (invcode check)
2826
2827 if(!regs_saved)
2828 save_regs(reglist);
2829 int handler=0;
2830 switch(type) {
2831 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2832 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2833 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2834 }
2835 assert(handler!=0);
2836 pass_args(rs,rt);
2837 if(temp2!=3)
2838 emit_mov(temp2,3);
2839 int cc=get_reg(i_regmap,CCREG);
2840 if(cc<0)
2841 emit_loadreg(CCREG,2);
2573466a 2842 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2843 // returns new cycle_count
2844 emit_call(handler);
2573466a 2845 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2846 if(cc<0)
2847 emit_storereg(CCREG,2);
2848 if(restore_jump)
df4dc2b1 2849 set_jump_target(restore_jump, out);
b96d3df7 2850 restore_regs(reglist);
2851 ra=stubs[n][2];
b96d3df7 2852 emit_jmp(ra);
57871462 2853}
2854
e2b5e7aa 2855static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2856{
2857 int rs=get_reg(regmap,-1);
57871462 2858 int rt=get_reg(regmap,target);
2859 assert(rs>=0);
2860 assert(rt>=0);
b96d3df7 2861 u_int handler,host_addr=0;
b96d3df7 2862 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2863 if (handler==0) {
13e35c04 2864 if(addr!=host_addr)
2865 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2866 switch(type) {
2867 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2868 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2869 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2870 default: assert(0);
2871 }
2872 return;
2873 }
2874
2875 // call a memhandler
2876 save_regs(reglist);
13e35c04 2877 pass_args(rs,rt);
b96d3df7 2878 int cc=get_reg(regmap,CCREG);
2879 if(cc<0)
2880 emit_loadreg(CCREG,2);
2573466a 2881 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2882 emit_movimm(handler,3);
2883 // returns new cycle_count
2884 emit_call((int)jump_handler_write_h);
2573466a 2885 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2886 if(cc<0)
2887 emit_storereg(CCREG,2);
2888 restore_regs(reglist);
57871462 2889}
2890
e2b5e7aa 2891static void do_unalignedwritestub(int n)
57871462 2892{
b7918751 2893 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2894 literal_pool(256);
df4dc2b1 2895 set_jump_target(stubs[n][1], out);
b7918751 2896
2897 int i=stubs[n][3];
2898 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2899 int addr=stubs[n][5];
2900 u_int reglist=stubs[n][7];
2901 signed char *i_regmap=i_regs->regmap;
2902 int temp2=get_reg(i_regmap,FTEMP);
2903 int rt;
b7918751 2904 rt=get_reg(i_regmap,rs2[i]);
2905 assert(rt>=0);
2906 assert(addr>=0);
2907 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2908 reglist|=(1<<addr);
2909 reglist&=~(1<<temp2);
2910
b96d3df7 2911#if 1
2912 // don't bother with it and call write handler
2913 save_regs(reglist);
2914 pass_args(addr,rt);
2915 int cc=get_reg(i_regmap,CCREG);
2916 if(cc<0)
2917 emit_loadreg(CCREG,2);
2573466a 2918 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2919 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2920 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2921 if(cc<0)
2922 emit_storereg(CCREG,2);
2923 restore_regs(reglist);
2924 emit_jmp(stubs[n][2]); // return address
2925#else
b7918751 2926 emit_andimm(addr,0xfffffffc,temp2);
2927 emit_writeword(temp2,(int)&address);
2928
2929 save_regs(reglist);
b7918751 2930 emit_shrimm(addr,16,1);
2931 int cc=get_reg(i_regmap,CCREG);
2932 if(cc<0) {
2933 emit_loadreg(CCREG,2);
2934 }
2935 emit_movimm((u_int)readmem,0);
2936 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2937 emit_call((int)&indirect_jump_indexed);
2938 restore_regs(reglist);
2939
2940 emit_readword((int)&readmem_dword,temp2);
2941 int temp=addr; //hmh
2942 emit_shlimm(addr,3,temp);
2943 emit_andimm(temp,24,temp);
2944#ifdef BIG_ENDIAN_MIPS
2945 if (opcode[i]==0x2e) // SWR
2946#else
2947 if (opcode[i]==0x2a) // SWL
2948#endif
2949 emit_xorimm(temp,24,temp);
2950 emit_movimm(-1,HOST_TEMPREG);
55439448 2951 if (opcode[i]==0x2a) { // SWL
b7918751 2952 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2953 emit_orrshr(rt,temp,temp2);
2954 }else{
2955 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2956 emit_orrshl(rt,temp,temp2);
2957 }
2958 emit_readword((int)&address,addr);
2959 emit_writeword(temp2,(int)&word);
2960 //save_regs(reglist); // don't need to, no state changes
2961 emit_shrimm(addr,16,1);
2962 emit_movimm((u_int)writemem,0);
2963 //emit_call((int)&indirect_jump_indexed);
2964 emit_mov(15,14);
2965 emit_readword_dualindexedx4(0,1,15);
2966 emit_readword((int)&Count,HOST_TEMPREG);
2967 emit_readword((int)&next_interupt,2);
2968 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2969 emit_writeword(2,(int)&last_count);
2970 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2971 if(cc<0) {
2972 emit_storereg(CCREG,HOST_TEMPREG);
2973 }
2974 restore_regs(reglist);
57871462 2975 emit_jmp(stubs[n][2]); // return address
b96d3df7 2976#endif
57871462 2977}
2978
e2b5e7aa 2979static void do_invstub(int n)
57871462 2980{
2981 literal_pool(20);
2982 u_int reglist=stubs[n][3];
df4dc2b1 2983 set_jump_target(stubs[n][1], out);
57871462 2984 save_regs(reglist);
2985 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2986 emit_call((int)&invalidate_addr);
2987 restore_regs(reglist);
2988 emit_jmp(stubs[n][2]); // return address
2989}
2990
df4dc2b1 2991void *do_dirty_stub(int i)
57871462 2992{
2993 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2994 u_int addr=(u_int)source;
57871462 2995 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2996 #ifndef HAVE_ARMV7
ac545b3a 2997 emit_loadlp(addr,1);
57871462 2998 emit_loadlp((int)copy,2);
2999 emit_loadlp(slen*4,3);
3000 #else
ac545b3a 3001 emit_movw(addr&0x0000FFFF,1);
57871462 3002 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3003 emit_movt(addr&0xFFFF0000,1);
57871462 3004 emit_movt(((u_int)copy)&0xFFFF0000,2);
3005 emit_movw(slen*4,3);
3006 #endif
3007 emit_movimm(start+i*4,0);
3008 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
df4dc2b1 3009 void *entry = out;
57871462 3010 load_regs_entry(i);
df4dc2b1 3011 if (entry == out)
3012 entry = instr_addr[i];
57871462 3013 emit_jmp(instr_addr[i]);
3014 return entry;
3015}
3016
e2b5e7aa 3017static void do_dirty_stub_ds()
57871462 3018{
3019 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3020 #ifndef HAVE_ARMV7
57871462 3021 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3022 emit_loadlp((int)copy,2);
3023 emit_loadlp(slen*4,3);
3024 #else
3025 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3026 emit_movw(((u_int)copy)&0x0000FFFF,2);
3027 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3028 emit_movt(((u_int)copy)&0xFFFF0000,2);
3029 emit_movw(slen*4,3);
3030 #endif
3031 emit_movimm(start+1,0);
3032 emit_call((int)&verify_code_ds);
3033}
3034
e2b5e7aa 3035static void do_cop1stub(int n)
57871462 3036{
3037 literal_pool(256);
3038 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
df4dc2b1 3039 set_jump_target(stubs[n][1], out);
57871462 3040 int i=stubs[n][3];
3d624f89 3041// int rs=stubs[n][4];
57871462 3042 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3043 int ds=stubs[n][6];
3044 if(!ds) {
3045 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3046 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3047 }
3048 //else {printf("fp exception in delay slot\n");}
3049 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3050 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3051 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3052 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3053 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3054}
3055
57871462 3056/* Special assem */
3057
e2b5e7aa 3058static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 3059{
3060 if(rt1[i]) {
3061 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3062 {
3063 signed char s,t,shift;
3064 t=get_reg(i_regs->regmap,rt1[i]);
3065 s=get_reg(i_regs->regmap,rs1[i]);
3066 shift=get_reg(i_regs->regmap,rs2[i]);
3067 if(t>=0){
3068 if(rs1[i]==0)
3069 {
3070 emit_zeroreg(t);
3071 }
3072 else if(rs2[i]==0)
3073 {
3074 assert(s>=0);
3075 if(s!=t) emit_mov(s,t);
3076 }
3077 else
3078 {
3079 emit_andimm(shift,31,HOST_TEMPREG);
3080 if(opcode2[i]==4) // SLLV
3081 {
3082 emit_shl(s,HOST_TEMPREG,t);
3083 }
3084 if(opcode2[i]==6) // SRLV
3085 {
3086 emit_shr(s,HOST_TEMPREG,t);
3087 }
3088 if(opcode2[i]==7) // SRAV
3089 {
3090 emit_sar(s,HOST_TEMPREG,t);
3091 }
3092 }
3093 }
3094 } else { // DSLLV/DSRLV/DSRAV
3095 signed char sh,sl,th,tl,shift;
3096 th=get_reg(i_regs->regmap,rt1[i]|64);
3097 tl=get_reg(i_regs->regmap,rt1[i]);
3098 sh=get_reg(i_regs->regmap,rs1[i]|64);
3099 sl=get_reg(i_regs->regmap,rs1[i]);
3100 shift=get_reg(i_regs->regmap,rs2[i]);
3101 if(tl>=0){
3102 if(rs1[i]==0)
3103 {
3104 emit_zeroreg(tl);
3105 if(th>=0) emit_zeroreg(th);
3106 }
3107 else if(rs2[i]==0)
3108 {
3109 assert(sl>=0);
3110 if(sl!=tl) emit_mov(sl,tl);
3111 if(th>=0&&sh!=th) emit_mov(sh,th);
3112 }
3113 else
3114 {
3115 // FIXME: What if shift==tl ?
3116 assert(shift!=tl);
3117 int temp=get_reg(i_regs->regmap,-1);
3118 int real_th=th;
3119 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3120 assert(sl>=0);
3121 assert(sh>=0);
3122 emit_andimm(shift,31,HOST_TEMPREG);
3123 if(opcode2[i]==0x14) // DSLLV
3124 {
3125 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3126 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3127 emit_orrshr(sl,HOST_TEMPREG,th);
3128 emit_andimm(shift,31,HOST_TEMPREG);
3129 emit_testimm(shift,32);
3130 emit_shl(sl,HOST_TEMPREG,tl);
3131 if(th>=0) emit_cmovne_reg(tl,th);
3132 emit_cmovne_imm(0,tl);
3133 }
3134 if(opcode2[i]==0x16) // DSRLV
3135 {
3136 assert(th>=0);
3137 emit_shr(sl,HOST_TEMPREG,tl);
3138 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3139 emit_orrshl(sh,HOST_TEMPREG,tl);
3140 emit_andimm(shift,31,HOST_TEMPREG);
3141 emit_testimm(shift,32);
3142 emit_shr(sh,HOST_TEMPREG,th);
3143 emit_cmovne_reg(th,tl);
3144 if(real_th>=0) emit_cmovne_imm(0,th);
3145 }
3146 if(opcode2[i]==0x17) // DSRAV
3147 {
3148 assert(th>=0);
3149 emit_shr(sl,HOST_TEMPREG,tl);
3150 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3151 if(real_th>=0) {
3152 assert(temp>=0);
3153 emit_sarimm(th,31,temp);
3154 }
3155 emit_orrshl(sh,HOST_TEMPREG,tl);
3156 emit_andimm(shift,31,HOST_TEMPREG);
3157 emit_testimm(shift,32);
3158 emit_sar(sh,HOST_TEMPREG,th);
3159 emit_cmovne_reg(th,tl);
3160 if(real_th>=0) emit_cmovne_reg(temp,th);
3161 }
3162 }
3163 }
3164 }
3165 }
3166}
ffb0b9e0 3167
ffb0b9e0 3168static void speculate_mov(int rs,int rt)
3169{
3170 if(rt!=0) {
3171 smrv_strong_next|=1<<rt;
3172 smrv[rt]=smrv[rs];
3173 }
3174}
3175
3176static void speculate_mov_weak(int rs,int rt)
3177{
3178 if(rt!=0) {
3179 smrv_weak_next|=1<<rt;
3180 smrv[rt]=smrv[rs];
3181 }
3182}
3183
3184static void speculate_register_values(int i)
3185{
3186 if(i==0) {
3187 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3188 // gp,sp are likely to stay the same throughout the block
3189 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3190 smrv_weak_next=~smrv_strong_next;
3191 //printf(" llr %08x\n", smrv[4]);
3192 }
3193 smrv_strong=smrv_strong_next;
3194 smrv_weak=smrv_weak_next;
3195 switch(itype[i]) {
3196 case ALU:
3197 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3198 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3199 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3200 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3201 else {
3202 smrv_strong_next&=~(1<<rt1[i]);
3203 smrv_weak_next&=~(1<<rt1[i]);
3204 }
3205 break;
3206 case SHIFTIMM:
3207 smrv_strong_next&=~(1<<rt1[i]);
3208 smrv_weak_next&=~(1<<rt1[i]);
3209 // fallthrough
3210 case IMM16:
3211 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3212 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3213 if(hr>=0) {
3214 if(get_final_value(hr,i,&value))
3215 smrv[rt1[i]]=value;
3216 else smrv[rt1[i]]=constmap[i][hr];
3217 smrv_strong_next|=1<<rt1[i];
3218 }
3219 }
3220 else {
3221 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3222 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3223 }
3224 break;
3225 case LOAD:
3226 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3227 // special case for BIOS
3228 smrv[rt1[i]]=0xa0000000;
3229 smrv_strong_next|=1<<rt1[i];
3230 break;
3231 }
3232 // fallthrough
3233 case SHIFT:
3234 case LOADLR:
3235 case MOV:
3236 smrv_strong_next&=~(1<<rt1[i]);
3237 smrv_weak_next&=~(1<<rt1[i]);
3238 break;
3239 case COP0:
3240 case COP2:
3241 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3242 smrv_strong_next&=~(1<<rt1[i]);
3243 smrv_weak_next&=~(1<<rt1[i]);
3244 }
3245 break;
3246 case C2LS:
3247 if (opcode[i]==0x32) { // LWC2
3248 smrv_strong_next&=~(1<<rt1[i]);
3249 smrv_weak_next&=~(1<<rt1[i]);
3250 }
3251 break;
3252 }
3253#if 0
3254 int r=4;
3255 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3256 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3257#endif
3258}
3259
3260enum {
3261 MTYPE_8000 = 0,
3262 MTYPE_8020,
3263 MTYPE_0000,
3264 MTYPE_A000,
3265 MTYPE_1F80,
3266};
3267
3268static int get_ptr_mem_type(u_int a)
3269{
3270 if(a < 0x00200000) {
3271 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3272 // return wrong, must use memhandler for BIOS self-test to pass
3273 // 007 does similar stuff from a00 mirror, weird stuff
3274 return MTYPE_8000;
3275 return MTYPE_0000;
3276 }
3277 if(0x1f800000 <= a && a < 0x1f801000)
3278 return MTYPE_1F80;
3279 if(0x80200000 <= a && a < 0x80800000)
3280 return MTYPE_8020;
3281 if(0xa0000000 <= a && a < 0xa0200000)
3282 return MTYPE_A000;
3283 return MTYPE_8000;
3284}
ffb0b9e0 3285
3286static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3287{
581335b0 3288 int jaddr=0,type=0;
ffb0b9e0 3289 int mr=rs1[i];
3290 if(((smrv_strong|smrv_weak)>>mr)&1) {
3291 type=get_ptr_mem_type(smrv[mr]);
3292 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3293 }
3294 else {
3295 // use the mirror we are running on
3296 type=get_ptr_mem_type(start);
3297 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3298 }
3299
3300 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3301 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3302 addr=*addr_reg_override=HOST_TEMPREG;
3303 type=0;
3304 }
3305 else if(type==MTYPE_0000) { // RAM 0 mirror
3306 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3307 addr=*addr_reg_override=HOST_TEMPREG;
3308 type=0;
3309 }
3310 else if(type==MTYPE_A000) { // RAM A mirror
3311 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3312 addr=*addr_reg_override=HOST_TEMPREG;
3313 type=0;
3314 }
3315 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3316 if (psxH == (void *)0x1f800000) {
3317 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3318 emit_cmpimm(HOST_TEMPREG,0x1000);
3319 jaddr=(int)out;
3320 emit_jc(0);
3321 }
3322 else {
3323 // do usual RAM check, jump will go to the right handler
3324 type=0;
3325 }
ffb0b9e0 3326 }
ffb0b9e0 3327
3328 if(type==0)
3329 {
3330 emit_cmpimm(addr,RAM_SIZE);
3331 jaddr=(int)out;
3332 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3333 // Hint to branch predictor that the branch is unlikely to be taken
3334 if(rs1[i]>=28)
3335 emit_jno_unlikely(0);
3336 else
3337 #endif
3338 emit_jno(0);
a327ad27 3339 if(ram_offset!=0) {
3340 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3341 addr=*addr_reg_override=HOST_TEMPREG;
3342 }
ffb0b9e0 3343 }
3344
3345 return jaddr;
3346}
3347
57871462 3348#define shift_assemble shift_assemble_arm
3349
e2b5e7aa 3350static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3351{
3352 int s,th,tl,temp,temp2,addr,map=-1;
3353 int offset;
3354 int jaddr=0;
af4ee1fe 3355 int memtarget=0,c=0;
ffb0b9e0 3356 int fastload_reg_override=0;
57871462 3357 u_int hr,reglist=0;
3358 th=get_reg(i_regs->regmap,rt1[i]|64);
3359 tl=get_reg(i_regs->regmap,rt1[i]);
3360 s=get_reg(i_regs->regmap,rs1[i]);
3361 temp=get_reg(i_regs->regmap,-1);
3362 temp2=get_reg(i_regs->regmap,FTEMP);
3363 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3364 assert(addr<0);
3365 offset=imm[i];
3366 for(hr=0;hr<HOST_REGS;hr++) {
3367 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3368 }
3369 reglist|=1<<temp;
3370 if(offset||s<0||c) addr=temp2;
3371 else addr=s;
3372 if(s>=0) {
3373 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3374 if(c) {
3375 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3376 }
57871462 3377 }
1edfcc68 3378 if(!c) {
3379 #ifdef RAM_OFFSET
3380 map=get_reg(i_regs->regmap,ROREG);
3381 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3382 #endif
3383 emit_shlimm(addr,3,temp);
3384 if (opcode[i]==0x22||opcode[i]==0x26) {
3385 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3386 }else{
3387 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3388 }
1edfcc68 3389 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3390 }
3391 else {
3392 if(ram_offset&&memtarget) {
3393 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3394 fastload_reg_override=HOST_TEMPREG;
57871462 3395 }
1edfcc68 3396 if (opcode[i]==0x22||opcode[i]==0x26) {
3397 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3398 }else{
1edfcc68 3399 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3400 }
535d208a 3401 }
3402 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3403 if(!c||memtarget) {
ffb0b9e0 3404 int a=temp2;
3405 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3406 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3407 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3408 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3409 }
3410 else
3411 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3412 if(rt1[i]) {
3413 assert(tl>=0);
57871462 3414 emit_andimm(temp,24,temp);
2002a1db 3415#ifdef BIG_ENDIAN_MIPS
3416 if (opcode[i]==0x26) // LWR
3417#else
3418 if (opcode[i]==0x22) // LWL
3419#endif
3420 emit_xorimm(temp,24,temp);
57871462 3421 emit_movimm(-1,HOST_TEMPREG);
3422 if (opcode[i]==0x26) {
3423 emit_shr(temp2,temp,temp2);
3424 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3425 }else{
3426 emit_shl(temp2,temp,temp2);
3427 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3428 }
3429 emit_or(temp2,tl,tl);
57871462 3430 }
535d208a 3431 //emit_storereg(rt1[i],tl); // DEBUG
3432 }
3433 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3434 // FIXME: little endian, fastload_reg_override
535d208a 3435 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3436 if(!c||memtarget) {
3437 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3438 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3439 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3440 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3441 }
3442 else
3443 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3444 if(rt1[i]) {
3445 assert(th>=0);
3446 assert(tl>=0);
57871462 3447 emit_testimm(temp,32);
3448 emit_andimm(temp,24,temp);
3449 if (opcode[i]==0x1A) { // LDL
3450 emit_rsbimm(temp,32,HOST_TEMPREG);
3451 emit_shl(temp2h,temp,temp2h);
3452 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3453 emit_movimm(-1,HOST_TEMPREG);
3454 emit_shl(temp2,temp,temp2);
3455 emit_cmove_reg(temp2h,th);
3456 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3457 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3458 emit_orreq(temp2,tl,tl);
3459 emit_orrne(temp2,th,th);
3460 }
3461 if (opcode[i]==0x1B) { // LDR
3462 emit_xorimm(temp,24,temp);
3463 emit_rsbimm(temp,32,HOST_TEMPREG);
3464 emit_shr(temp2,temp,temp2);
3465 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3466 emit_movimm(-1,HOST_TEMPREG);
3467 emit_shr(temp2h,temp,temp2h);
3468 emit_cmovne_reg(temp2,tl);
3469 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3470 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3471 emit_orrne(temp2h,th,th);
3472 emit_orreq(temp2h,tl,tl);
3473 }
3474 }
3475 }
3476}
3477#define loadlr_assemble loadlr_assemble_arm
3478
e2b5e7aa 3479static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3480{
3481 if(opcode2[i]==0) // MFC0
3482 {
3483 signed char t=get_reg(i_regs->regmap,rt1[i]);
3484 char copr=(source[i]>>11)&0x1f;
3485 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3486 if(t>=0&&rt1[i]!=0) {
7139f3c8 3487 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3488 }
3489 }
3490 else if(opcode2[i]==4) // MTC0
3491 {
3492 signed char s=get_reg(i_regs->regmap,rs1[i]);
3493 char copr=(source[i]>>11)&0x1f;
3494 assert(s>=0);
63cb0298 3495 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3496 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3497 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3498 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3499 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3500 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3501 emit_writeword(HOST_CCREG,(int)&Count);
3502 }
3503 // What a mess. The status register (12) can enable interrupts,
3504 // so needs a special case to handle a pending interrupt.
3505 // The interrupt must be taken immediately, because a subsequent
3506 // instruction might disable interrupts again.
7139f3c8 3507 if(copr==12||copr==13) {
fca1aef2 3508 if (is_delayslot) {
3509 // burn cycles to cause cc_interrupt, which will
3510 // reschedule next_interupt. Relies on CCREG from above.
3511 assem_debug("MTC0 DS %d\n", copr);
3512 emit_writeword(HOST_CCREG,(int)&last_count);
3513 emit_movimm(0,HOST_CCREG);
3514 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3515 emit_loadreg(rs1[i],1);
fca1aef2 3516 emit_movimm(copr,0);
3517 emit_call((int)pcsx_mtc0_ds);
042c7287 3518 emit_loadreg(rs1[i],s);
fca1aef2 3519 return;
3520 }
63cb0298 3521 emit_movimm(start+i*4+4,HOST_TEMPREG);
3522 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3523 emit_movimm(0,HOST_TEMPREG);
3524 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3525 }
3526 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3527 //else
caeefe31 3528 if(s==HOST_CCREG)
3529 emit_loadreg(rs1[i],1);
3530 else if(s!=1)
63cb0298 3531 emit_mov(s,1);
fca1aef2 3532 emit_movimm(copr,0);
3533 emit_call((int)pcsx_mtc0);
7139f3c8 3534 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3535 emit_readword((int)&Count,HOST_CCREG);
042c7287 3536 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3537 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3538 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3539 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3540 emit_storereg(CCREG,HOST_CCREG);
3541 }
7139f3c8 3542 if(copr==12||copr==13) {
57871462 3543 assert(!is_delayslot);
3544 emit_readword((int)&pending_exception,14);
042c7287 3545 emit_test(14,14);
3546 emit_jne((int)&do_interrupt);
57871462 3547 }
3548 emit_loadreg(rs1[i],s);
3549 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3550 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3551 cop1_usable=0;
3552 }
3553 else
3554 {
3555 assert(opcode2[i]==0x10);
576bbd8f 3556 if((source[i]&0x3f)==0x10) // RFE
3557 {
3558 emit_readword((int)&Status,0);
3559 emit_andimm(0,0x3c,1);
3560 emit_andimm(0,~0xf,0);
3561 emit_orrshr_imm(1,2,0);
3562 emit_writeword(0,(int)&Status);
3563 }
57871462 3564 }
3565}
3566
b9b61529 3567static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3568{
3569 switch (copr) {
3570 case 1:
3571 case 3:
3572 case 5:
3573 case 8:
3574 case 9:
3575 case 10:
3576 case 11:
3577 emit_readword((int)&reg_cop2d[copr],tl);
3578 emit_signextend16(tl,tl);
3579 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3580 break;
3581 case 7:
3582 case 16:
3583 case 17:
3584 case 18:
3585 case 19:
3586 emit_readword((int)&reg_cop2d[copr],tl);
3587 emit_andimm(tl,0xffff,tl);
3588 emit_writeword(tl,(int)&reg_cop2d[copr]);
3589 break;
3590 case 15:
3591 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3592 emit_writeword(tl,(int)&reg_cop2d[copr]);
3593 break;
3594 case 28:
b9b61529 3595 case 29:
3596 emit_readword((int)&reg_cop2d[9],temp);
3597 emit_testimm(temp,0x8000); // do we need this?
3598 emit_andimm(temp,0xf80,temp);
3599 emit_andne_imm(temp,0,temp);
f70d384d 3600 emit_shrimm(temp,7,tl);
b9b61529 3601 emit_readword((int)&reg_cop2d[10],temp);
3602 emit_testimm(temp,0x8000);
3603 emit_andimm(temp,0xf80,temp);
3604 emit_andne_imm(temp,0,temp);
f70d384d 3605 emit_orrshr_imm(temp,2,tl);
b9b61529 3606 emit_readword((int)&reg_cop2d[11],temp);
3607 emit_testimm(temp,0x8000);
3608 emit_andimm(temp,0xf80,temp);
3609 emit_andne_imm(temp,0,temp);
f70d384d 3610 emit_orrshl_imm(temp,3,tl);
b9b61529 3611 emit_writeword(tl,(int)&reg_cop2d[copr]);
3612 break;
3613 default:
3614 emit_readword((int)&reg_cop2d[copr],tl);
3615 break;
3616 }
3617}
3618
3619static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3620{
3621 switch (copr) {
3622 case 15:
3623 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3624 emit_writeword(sl,(int)&reg_cop2d[copr]);
3625 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3626 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3627 emit_writeword(sl,(int)&reg_cop2d[14]);
3628 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3629 break;
3630 case 28:
3631 emit_andimm(sl,0x001f,temp);
f70d384d 3632 emit_shlimm(temp,7,temp);
b9b61529 3633 emit_writeword(temp,(int)&reg_cop2d[9]);
3634 emit_andimm(sl,0x03e0,temp);
f70d384d 3635 emit_shlimm(temp,2,temp);
b9b61529 3636 emit_writeword(temp,(int)&reg_cop2d[10]);
3637 emit_andimm(sl,0x7c00,temp);
f70d384d 3638 emit_shrimm(temp,3,temp);
b9b61529 3639 emit_writeword(temp,(int)&reg_cop2d[11]);
3640 emit_writeword(sl,(int)&reg_cop2d[28]);
3641 break;
3642 case 30:
3643 emit_movs(sl,temp);
3644 emit_mvnmi(temp,temp);
665f33e1 3645#ifdef HAVE_ARMV5
b9b61529 3646 emit_clz(temp,temp);
665f33e1 3647#else
3648 emit_movs(temp,HOST_TEMPREG);
3649 emit_movimm(0,temp);
3650 emit_jeq((int)out+4*4);
3651 emit_addpl_imm(temp,1,temp);
3652 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3653 emit_jns((int)out-2*4);
3654#endif
b9b61529 3655 emit_writeword(sl,(int)&reg_cop2d[30]);
3656 emit_writeword(temp,(int)&reg_cop2d[31]);
3657 break;
b9b61529 3658 case 31:
3659 break;
3660 default:
3661 emit_writeword(sl,(int)&reg_cop2d[copr]);
3662 break;
3663 }
3664}
3665
e2b5e7aa 3666static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3667{
3668 u_int copr=(source[i]>>11)&0x1f;
3669 signed char temp=get_reg(i_regs->regmap,-1);
3670 if (opcode2[i]==0) { // MFC2
3671 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3672 if(tl>=0&&rt1[i]!=0)
b9b61529 3673 cop2_get_dreg(copr,tl,temp);
3674 }
3675 else if (opcode2[i]==4) { // MTC2
3676 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3677 cop2_put_dreg(copr,sl,temp);
3678 }
3679 else if (opcode2[i]==2) // CFC2
3680 {
3681 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3682 if(tl>=0&&rt1[i]!=0)
b9b61529 3683 emit_readword((int)&reg_cop2c[copr],tl);
3684 }
3685 else if (opcode2[i]==6) // CTC2
3686 {
3687 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3688 switch(copr) {
3689 case 4:
3690 case 12:
3691 case 20:
3692 case 26:
3693 case 27:
3694 case 29:
3695 case 30:
3696 emit_signextend16(sl,temp);
3697 break;
3698 case 31:
3699 //value = value & 0x7ffff000;
3700 //if (value & 0x7f87e000) value |= 0x80000000;
3701 emit_shrimm(sl,12,temp);
3702 emit_shlimm(temp,12,temp);
3703 emit_testimm(temp,0x7f000000);
3704 emit_testeqimm(temp,0x00870000);
3705 emit_testeqimm(temp,0x0000e000);
3706 emit_orrne_imm(temp,0x80000000,temp);
3707 break;
3708 default:
3709 temp=sl;
3710 break;
3711 }
3712 emit_writeword(temp,(int)&reg_cop2c[copr]);
3713 assert(sl>=0);
3714 }
3715}
3716
054175e9 3717static void c2op_prologue(u_int op,u_int reglist)
3718{
3719 save_regs_all(reglist);
82ed88eb 3720#ifdef PCNT
3721 emit_movimm(op,0);
3722 emit_call((int)pcnt_gte_start);
3723#endif
054175e9 3724 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3725}
3726
3727static void c2op_epilogue(u_int op,u_int reglist)
3728{
82ed88eb 3729#ifdef PCNT
3730 emit_movimm(op,0);
3731 emit_call((int)pcnt_gte_end);
3732#endif
054175e9 3733 restore_regs_all(reglist);
3734}
3735
6c0eefaf 3736static void c2op_call_MACtoIR(int lm,int need_flags)
3737{
3738 if(need_flags)
3739 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3740 else
3741 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3742}
3743
3744static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3745{
3746 emit_call((int)func);
3747 // func is C code and trashes r0
3748 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3749 if(need_flags||need_ir)
3750 c2op_call_MACtoIR(lm,need_flags);
3751 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3752}
3753
054175e9 3754static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3755{
b9b61529 3756 u_int c2op=source[i]&0x3f;
6c0eefaf 3757 u_int hr,reglist_full=0,reglist;
054175e9 3758 int need_flags,need_ir;
b9b61529 3759 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3760 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3761 }
4d646738 3762 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3763
3764 if (gte_handlers[c2op]!=NULL) {
bedfea38 3765 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3766 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3767 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3768 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3769 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3770 need_flags=0;
6c0eefaf 3771 int shift = (source[i] >> 19) & 1;
3772 int lm = (source[i] >> 10) & 1;
054175e9 3773 switch(c2op) {
19776aef 3774#ifndef DRC_DBG
054175e9 3775 case GTE_MVMVA: {
82336ba3 3776#ifdef HAVE_ARMV5
054175e9 3777 int v = (source[i] >> 15) & 3;
3778 int cv = (source[i] >> 13) & 3;
3779 int mx = (source[i] >> 17) & 3;
4d646738 3780 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3781 c2op_prologue(c2op,reglist);
3782 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3783 if(v<3)
3784 emit_ldrd(v*8,0,4);
3785 else {
3786 emit_movzwl_indexed(9*4,0,4); // gteIR
3787 emit_movzwl_indexed(10*4,0,6);
3788 emit_movzwl_indexed(11*4,0,5);
3789 emit_orrshl_imm(6,16,4);
3790 }
3791 if(mx<3)
3792 emit_addimm(0,32*4+mx*8*4,6);
3793 else
3794 emit_readword((int)&zeromem_ptr,6);
3795 if(cv<3)
3796 emit_addimm(0,32*4+(cv*8+5)*4,7);
3797 else
3798 emit_readword((int)&zeromem_ptr,7);
3799#ifdef __ARM_NEON__
3800 emit_movimm(source[i],1); // opcode
3801 emit_call((int)gteMVMVA_part_neon);
3802 if(need_flags) {
3803 emit_movimm(lm,1);
3804 emit_call((int)gteMACtoIR_flags_neon);
3805 }
3806#else
3807 if(cv==3&&shift)
3808 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3809 else {
3810 emit_movimm(shift,1);
3811 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3812 }
6c0eefaf 3813 if(need_flags||need_ir)
3814 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3815#endif
3816#else /* if not HAVE_ARMV5 */
3817 c2op_prologue(c2op,reglist);
3818 emit_movimm(source[i],1); // opcode
3819 emit_writeword(1,(int)&psxRegs.code);
3820 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3821#endif
3822 break;
3823 }
6c0eefaf 3824 case GTE_OP:
3825 c2op_prologue(c2op,reglist);
3826 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3827 if(need_flags||need_ir) {
3828 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3829 c2op_call_MACtoIR(lm,need_flags);
3830 }
3831 break;
3832 case GTE_DPCS:
3833 c2op_prologue(c2op,reglist);
3834 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3835 break;
3836 case GTE_INTPL:
3837 c2op_prologue(c2op,reglist);
3838 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3839 break;
3840 case GTE_SQR:
3841 c2op_prologue(c2op,reglist);
3842 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3843 if(need_flags||need_ir) {
3844 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3845 c2op_call_MACtoIR(lm,need_flags);
3846 }
3847 break;
3848 case GTE_DCPL:
3849 c2op_prologue(c2op,reglist);
3850 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3851 break;
3852 case GTE_GPF:
3853 c2op_prologue(c2op,reglist);
3854 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3855 break;
3856 case GTE_GPL:
3857 c2op_prologue(c2op,reglist);
3858 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3859 break;
19776aef 3860#endif
054175e9 3861 default:
054175e9 3862 c2op_prologue(c2op,reglist);
19776aef 3863#ifdef DRC_DBG
3864 emit_movimm(source[i],1); // opcode
3865 emit_writeword(1,(int)&psxRegs.code);
3866#endif
054175e9 3867 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3868 break;
3869 }
3870 c2op_epilogue(c2op,reglist);
3871 }
b9b61529 3872}
3873
e2b5e7aa 3874static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3875{
3876 // XXX: should just just do the exception instead
3877 if(!cop1_usable) {
3878 int jaddr=(int)out;
3879 emit_jmp(0);
3880 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3881 cop1_usable=1;
3882 }
3883}
3884
e2b5e7aa 3885static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3886{
3d624f89 3887 cop1_unusable(i, i_regs);
57871462 3888}
3889
e2b5e7aa 3890static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3891{
3d624f89 3892 cop1_unusable(i, i_regs);
57871462 3893}
3894#define fconv_assemble fconv_assemble_arm
3895
e2b5e7aa 3896static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3897{
3d624f89 3898 cop1_unusable(i, i_regs);
57871462 3899}
3900
e2b5e7aa 3901static void float_assemble(int i,struct regstat *i_regs)
57871462 3902{
3d624f89 3903 cop1_unusable(i, i_regs);
57871462 3904}
3905
e2b5e7aa 3906static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3907{
3908 // case 0x18: MULT
3909 // case 0x19: MULTU
3910 // case 0x1A: DIV
3911 // case 0x1B: DIVU
3912 // case 0x1C: DMULT
3913 // case 0x1D: DMULTU
3914 // case 0x1E: DDIV
3915 // case 0x1F: DDIVU
3916 if(rs1[i]&&rs2[i])
3917 {
3918 if((opcode2[i]&4)==0) // 32-bit
3919 {
3920 if(opcode2[i]==0x18) // MULT
3921 {
3922 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3923 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3924 signed char hi=get_reg(i_regs->regmap,HIREG);
3925 signed char lo=get_reg(i_regs->regmap,LOREG);
3926 assert(m1>=0);
3927 assert(m2>=0);
3928 assert(hi>=0);
3929 assert(lo>=0);
3930 emit_smull(m1,m2,hi,lo);
3931 }
3932 if(opcode2[i]==0x19) // MULTU
3933 {
3934 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3935 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3936 signed char hi=get_reg(i_regs->regmap,HIREG);
3937 signed char lo=get_reg(i_regs->regmap,LOREG);
3938 assert(m1>=0);
3939 assert(m2>=0);
3940 assert(hi>=0);
3941 assert(lo>=0);
3942 emit_umull(m1,m2,hi,lo);
3943 }
3944 if(opcode2[i]==0x1A) // DIV
3945 {
3946 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3947 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3948 assert(d1>=0);
3949 assert(d2>=0);
3950 signed char quotient=get_reg(i_regs->regmap,LOREG);
3951 signed char remainder=get_reg(i_regs->regmap,HIREG);
3952 assert(quotient>=0);
3953 assert(remainder>=0);
3954 emit_movs(d1,remainder);
44a80f6a 3955 emit_movimm(0xffffffff,quotient);
3956 emit_negmi(quotient,quotient); // .. quotient and ..
3957 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3958 emit_movs(d2,HOST_TEMPREG);
3959 emit_jeq((int)out+52); // Division by zero
82336ba3 3960 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3961#ifdef HAVE_ARMV5
57871462 3962 emit_clz(HOST_TEMPREG,quotient);
3963 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3964#else
3965 emit_movimm(0,quotient);
3966 emit_addpl_imm(quotient,1,quotient);
3967 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3968 emit_jns((int)out-2*4);
3969#endif
57871462 3970 emit_orimm(quotient,1<<31,quotient);
3971 emit_shr(quotient,quotient,quotient);
3972 emit_cmp(remainder,HOST_TEMPREG);
3973 emit_subcs(remainder,HOST_TEMPREG,remainder);
3974 emit_adcs(quotient,quotient,quotient);
3975 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3976 emit_jcc((int)out-16); // -4
3977 emit_teq(d1,d2);
3978 emit_negmi(quotient,quotient);
3979 emit_test(d1,d1);
3980 emit_negmi(remainder,remainder);
3981 }
3982 if(opcode2[i]==0x1B) // DIVU
3983 {
3984 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3985 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3986 assert(d1>=0);
3987 assert(d2>=0);
3988 signed char quotient=get_reg(i_regs->regmap,LOREG);
3989 signed char remainder=get_reg(i_regs->regmap,HIREG);
3990 assert(quotient>=0);
3991 assert(remainder>=0);
44a80f6a 3992 emit_mov(d1,remainder);
3993 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3994 emit_test(d2,d2);
44a80f6a 3995 emit_jeq((int)out+40); // Division by zero
665f33e1 3996#ifdef HAVE_ARMV5
57871462 3997 emit_clz(d2,HOST_TEMPREG);
3998 emit_movimm(1<<31,quotient);
3999 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 4000#else
4001 emit_movimm(0,HOST_TEMPREG);
82336ba3 4002 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4003 emit_lslpls_imm(d2,1,d2);
665f33e1 4004 emit_jns((int)out-2*4);
4005 emit_movimm(1<<31,quotient);
4006#endif
57871462 4007 emit_shr(quotient,HOST_TEMPREG,quotient);
4008 emit_cmp(remainder,d2);
4009 emit_subcs(remainder,d2,remainder);
4010 emit_adcs(quotient,quotient,quotient);
4011 emit_shrcc_imm(d2,1,d2);
4012 emit_jcc((int)out-16); // -4
4013 }
4014 }
4015 else // 64-bit
71e490c5 4016 assert(0);
57871462 4017 }
4018 else
4019 {
4020 // Multiply by zero is zero.
4021 // MIPS does not have a divide by zero exception.
4022 // The result is undefined, we return zero.
4023 signed char hr=get_reg(i_regs->regmap,HIREG);
4024 signed char lr=get_reg(i_regs->regmap,LOREG);
4025 if(hr>=0) emit_zeroreg(hr);
4026 if(lr>=0) emit_zeroreg(lr);
4027 }
4028}
4029#define multdiv_assemble multdiv_assemble_arm
4030
e2b5e7aa 4031static void do_preload_rhash(int r) {
57871462 4032 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
4033 // register. On ARM the hash can be done with a single instruction (below)
4034}
4035
e2b5e7aa 4036static void do_preload_rhtbl(int ht) {
57871462 4037 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
4038}
4039
e2b5e7aa 4040static void do_rhash(int rs,int rh) {
57871462 4041 emit_andimm(rs,0xf8,rh);
4042}
4043
e2b5e7aa 4044static void do_miniht_load(int ht,int rh) {
57871462 4045 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
4046 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
4047}
4048
e2b5e7aa 4049static void do_miniht_jump(int rs,int rh,int ht) {
57871462 4050 emit_cmp(rh,rs);
4051 emit_ldreq_indexed(ht,4,15);
4052 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4053 emit_mov(rs,7);
4054 emit_jmp(jump_vaddr_reg[7]);
4055 #else
4056 emit_jmp(jump_vaddr_reg[rs]);
4057 #endif
4058}
4059
e2b5e7aa 4060static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 4061 #ifndef HAVE_ARMV7
57871462 4062 emit_movimm(return_address,rt); // PC into link register
4063 add_to_linker((int)out,return_address,1);
4064 emit_pcreladdr(temp);
4065 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4066 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4067 #else
4068 emit_movw(return_address&0x0000FFFF,rt);
4069 add_to_linker((int)out,return_address,1);
4070 emit_pcreladdr(temp);
4071 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4072 emit_movt(return_address&0xFFFF0000,rt);
4073 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4074 #endif
4075}
4076
e2b5e7aa 4077static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4078{
4079 //if(dirty_pre==dirty) return;
581335b0 4080 int hr,reg;
57871462 4081 for(hr=0;hr<HOST_REGS;hr++) {
4082 if(hr!=EXCLUDE_REG) {
4083 reg=pre[hr];
4084 if(((~u)>>(reg&63))&1) {
f776eb14 4085 if(reg>0) {
57871462 4086 if(((dirty_pre&~dirty)>>hr)&1) {
4087 if(reg>0&&reg<34) {
4088 emit_storereg(reg,hr);
4089 if( ((is32_pre&~uu)>>reg)&1 ) {
4090 emit_sarimm(hr,31,HOST_TEMPREG);
4091 emit_storereg(reg|64,HOST_TEMPREG);
4092 }
4093 }
4094 else if(reg>=64) {
4095 emit_storereg(reg,hr);
4096 }
4097 }
4098 }
57871462 4099 }
4100 }
4101 }
4102}
4103
4104
4105/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4106static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4107{
4108 int hr;
4109 int wrote=-1;
4110 for(hr=HOST_REGS-1;hr>=0;hr--) {
4111 if(hr!=EXCLUDE_REG) {
4112 if(pre[hr]!=entry[hr]) {
4113 if(pre[hr]>=0) {
4114 if((dirty>>hr)&1) {
4115 if(get_reg(entry,pre[hr])<0) {
4116 if(pre[hr]<64) {
4117 if(!((u>>pre[hr])&1)) {
4118 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4119 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4120 emit_sarimm(hr,31,hr+1);
4121 emit_strdreg(pre[hr],hr);
4122 }
4123 else
4124 emit_storereg(pre[hr],hr);
4125 }else{
4126 emit_storereg(pre[hr],hr);
4127 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4128 emit_sarimm(hr,31,hr);
4129 emit_storereg(pre[hr]|64,hr);
4130 }
4131 }
4132 }
4133 }else{
4134 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4135 emit_storereg(pre[hr],hr);
4136 }
4137 }
4138 wrote=hr;
4139 }
4140 }
4141 }
4142 }
4143 }
4144 }
4145 for(hr=0;hr<HOST_REGS;hr++) {
4146 if(hr!=EXCLUDE_REG) {
4147 if(pre[hr]!=entry[hr]) {
4148 if(pre[hr]>=0) {
4149 int nr;
4150 if((nr=get_reg(entry,pre[hr]))>=0) {
4151 emit_mov(hr,nr);
4152 }
4153 }
4154 }
4155 }
4156 }
4157}
4158#define wb_invalidate wb_invalidate_arm
4159*/
4160
d148d265 4161static void mark_clear_cache(void *target)
4162{
4163 u_long offset = (char *)target - (char *)BASE_ADDR;
4164 u_int mask = 1u << ((offset >> 12) & 31);
4165 if (!(needs_clear_cache[offset >> 17] & mask)) {
4166 char *start = (char *)((u_long)target & ~4095ul);
4167 start_tcache_write(start, start + 4096);
4168 needs_clear_cache[offset >> 17] |= mask;
4169 }
4170}
4171
dd3a91a1 4172// Clearing the cache is rather slow on ARM Linux, so mark the areas
4173// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4174static void do_clear_cache()
dd3a91a1 4175{
4176 int i,j;
4177 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4178 {
4179 u_int bitmap=needs_clear_cache[i];
4180 if(bitmap) {
4181 u_int start,end;
9f51b4b9 4182 for(j=0;j<32;j++)
dd3a91a1 4183 {
4184 if(bitmap&(1<<j)) {
bdeade46 4185 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4186 end=start+4095;
4187 j++;
4188 while(j<32) {
4189 if(bitmap&(1<<j)) {
4190 end+=4096;
4191 j++;
4192 }else{
d148d265 4193 end_tcache_write((void *)start,(void *)end);
dd3a91a1 4194 break;
4195 }
4196 }
4197 }
4198 }
4199 needs_clear_cache[i]=0;
4200 }
4201 }
4202}
4203
57871462 4204// CPU-architecture-specific initialization
71e490c5 4205static void arch_init() {
57871462 4206}
b9b61529 4207
4208// vim:shiftwidth=2:expandtab