drc: mark things static
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
a327ad27 31#if !BASE_ADDR_FIXED
bdeade46 32char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
33#endif
34
4d646738 35#ifndef __MACH__
36#define CALLER_SAVE_REGS 0x100f
37#else
38#define CALLER_SAVE_REGS 0x120f
39#endif
40
e2b5e7aa 41#define unused __attribute__((unused))
42
57871462 43extern int cycle_count;
44extern int last_count;
45extern int pcaddr;
46extern int pending_exception;
47extern int branch_target;
48extern uint64_t readmem_dword;
57871462 49extern void *dynarec_local;
57871462 50extern u_int mini_ht[32][2];
57871462 51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68const u_int jump_vaddr_reg[16] = {
69 (int)jump_vaddr_r0,
70 (int)jump_vaddr_r1,
71 (int)jump_vaddr_r2,
72 (int)jump_vaddr_r3,
73 (int)jump_vaddr_r4,
74 (int)jump_vaddr_r5,
75 (int)jump_vaddr_r6,
76 (int)jump_vaddr_r7,
77 (int)jump_vaddr_r8,
78 (int)jump_vaddr_r9,
79 (int)jump_vaddr_r10,
80 0,
81 (int)jump_vaddr_r12,
82 0,
83 0,
84 0};
85
0bbd1454 86void invalidate_addr_r0();
87void invalidate_addr_r1();
88void invalidate_addr_r2();
89void invalidate_addr_r3();
90void invalidate_addr_r4();
91void invalidate_addr_r5();
92void invalidate_addr_r6();
93void invalidate_addr_r7();
94void invalidate_addr_r8();
95void invalidate_addr_r9();
96void invalidate_addr_r10();
97void invalidate_addr_r12();
98
99const u_int invalidate_addr_reg[16] = {
100 (int)invalidate_addr_r0,
101 (int)invalidate_addr_r1,
102 (int)invalidate_addr_r2,
103 (int)invalidate_addr_r3,
104 (int)invalidate_addr_r4,
105 (int)invalidate_addr_r5,
106 (int)invalidate_addr_r6,
107 (int)invalidate_addr_r7,
108 (int)invalidate_addr_r8,
109 (int)invalidate_addr_r9,
110 (int)invalidate_addr_r10,
111 0,
112 (int)invalidate_addr_r12,
113 0,
114 0,
115 0};
116
e2b5e7aa 117static unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 118
57871462 119/* Linker */
120
e2b5e7aa 121static void set_jump_target(int addr,u_int target)
57871462 122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
e2b5e7aa 155#if 0
156static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 157{
158 u_char *ptr=(u_char *)addr;
159 u_int *ptr2=(u_int *)ptr;
160 assert(!copy||ptr2[-1]==0xe28dd000);
161 if(ptr[3]==0xe2) {
162 assert(!copy);
163 assert((target-(u_int)ptr2-8)<4096);
164 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
165 }
166 else {
167 assert((ptr[3]&0x0e)==0xa);
168 u_int target_insn=*(u_int *)target;
169 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
170 copy=0;
171 }
172 if((target_insn&0x0c100000)==0x04100000) { // Load
173 copy=0;
174 }
175 if(target_insn&0x08000000) {
176 copy=0;
177 }
178 if(copy) {
179 ptr2[-1]=target_insn;
180 target+=4;
181 }
182 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
183 }
184}
e2b5e7aa 185#endif
57871462 186
187/* Literal pool */
e2b5e7aa 188static void add_literal(int addr,int val)
57871462 189{
15776b68 190 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 191 literals[literalcount][0]=addr;
192 literals[literalcount][1]=val;
9f51b4b9 193 literalcount++;
194}
57871462 195
e2b5e7aa 196static void *kill_pointer(void *stub)
57871462 197{
198 int *ptr=(int *)(stub+4);
199 assert((*ptr&0x0ff00000)==0x05900000);
200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 204 return i_ptr;
57871462 205}
206
f968d35d 207// find where external branch is liked to using addr of it's stub:
208// get address that insn one after stub loads (dyna_linker arg1),
209// treat it as a pointer to branch insn,
210// return addr where that branch jumps to
e2b5e7aa 211static int get_pointer(void *stub)
57871462 212{
213 //printf("get_pointer(%x)\n",(int)stub);
214 int *ptr=(int *)(stub+4);
f968d35d 215 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 216 u_int offset=*ptr&0xfff;
217 int **l_ptr=(void *)ptr+offset+8;
218 int *i_ptr=*l_ptr;
219 assert((*i_ptr&0x0f000000)==0x0a000000);
220 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
221}
222
223// Find the "clean" entry point from a "dirty" entry point
224// by skipping past the call to verify_code
e2b5e7aa 225static u_int get_clean_addr(int addr)
57871462 226{
227 int *ptr=(int *)addr;
665f33e1 228 #ifndef HAVE_ARMV7
57871462 229 ptr+=4;
230 #else
231 ptr+=6;
232 #endif
233 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
234 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
235 ptr++;
236 if((*ptr&0xFF000000)==0xea000000) {
237 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
238 }
239 return (u_int)ptr;
240}
241
e2b5e7aa 242static int verify_dirty(u_int *ptr)
57871462 243{
244 u_int *ptr=(u_int *)addr;
665f33e1 245 #ifndef HAVE_ARMV7
57871462 246 // get from literal pool
15776b68 247 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 248 u_int offset=*ptr&0xfff;
249 u_int *l_ptr=(void *)ptr+offset+8;
250 u_int source=l_ptr[0];
251 u_int copy=l_ptr[1];
252 u_int len=l_ptr[2];
253 ptr+=4;
254 #else
255 // ARMv7 movw/movt
256 assert((*ptr&0xFFF00000)==0xe3000000);
257 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
258 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
259 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
260 ptr+=6;
261 #endif
262 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
263 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 264 //printf("verify_dirty: %x %x %x\n",source,copy,len);
265 return !memcmp((void *)source,(void *)copy,len);
266}
267
268// This doesn't necessarily find all clean entry points, just
269// guarantees that it's not dirty
e2b5e7aa 270static int isclean(int addr)
57871462 271{
665f33e1 272 #ifndef HAVE_ARMV7
57871462 273 int *ptr=((u_int *)addr)+4;
274 #else
275 int *ptr=((u_int *)addr)+6;
276 #endif
277 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
278 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
279 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
280 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
281 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
282 return 1;
283}
284
4a35de07 285// get source that block at addr was compiled from (host pointers)
e2b5e7aa 286static void get_bounds(int addr,u_int *start,u_int *end)
57871462 287{
288 u_int *ptr=(u_int *)addr;
665f33e1 289 #ifndef HAVE_ARMV7
57871462 290 // get from literal pool
15776b68 291 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 292 u_int offset=*ptr&0xfff;
293 u_int *l_ptr=(void *)ptr+offset+8;
294 u_int source=l_ptr[0];
295 //u_int copy=l_ptr[1];
296 u_int len=l_ptr[2];
297 ptr+=4;
298 #else
299 // ARMv7 movw/movt
300 assert((*ptr&0xFFF00000)==0xe3000000);
301 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
302 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
303 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
304 ptr+=6;
305 #endif
306 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
307 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 308 *start=source;
309 *end=source+len;
310}
311
312/* Register allocation */
313
314// Note: registers are allocated clean (unmodified state)
315// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 316static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 317{
318 int r,hr;
319 int preferred_reg = (reg&7);
320 if(reg==CCREG) preferred_reg=HOST_CCREG;
321 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 322
57871462 323 // Don't allocate unused registers
324 if((cur->u>>reg)&1) return;
9f51b4b9 325
57871462 326 // see if it's already allocated
327 for(hr=0;hr<HOST_REGS;hr++)
328 {
329 if(cur->regmap[hr]==reg) return;
330 }
9f51b4b9 331
57871462 332 // Keep the same mapping if the register was already allocated in a loop
333 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 334
57871462 335 // Try to allocate the preferred register
336 if(cur->regmap[preferred_reg]==-1) {
337 cur->regmap[preferred_reg]=reg;
338 cur->dirty&=~(1<<preferred_reg);
339 cur->isconst&=~(1<<preferred_reg);
340 return;
341 }
342 r=cur->regmap[preferred_reg];
343 if(r<64&&((cur->u>>r)&1)) {
344 cur->regmap[preferred_reg]=reg;
345 cur->dirty&=~(1<<preferred_reg);
346 cur->isconst&=~(1<<preferred_reg);
347 return;
348 }
349 if(r>=64&&((cur->uu>>(r&63))&1)) {
350 cur->regmap[preferred_reg]=reg;
351 cur->dirty&=~(1<<preferred_reg);
352 cur->isconst&=~(1<<preferred_reg);
353 return;
354 }
9f51b4b9 355
57871462 356 // Clear any unneeded registers
357 // We try to keep the mapping consistent, if possible, because it
358 // makes branches easier (especially loops). So we try to allocate
359 // first (see above) before removing old mappings. If this is not
360 // possible then go ahead and clear out the registers that are no
361 // longer needed.
362 for(hr=0;hr<HOST_REGS;hr++)
363 {
364 r=cur->regmap[hr];
365 if(r>=0) {
366 if(r<64) {
367 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
368 }
369 else
370 {
371 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
372 }
373 }
374 }
375 // Try to allocate any available register, but prefer
376 // registers that have not been used recently.
377 if(i>0) {
378 for(hr=0;hr<HOST_REGS;hr++) {
379 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
380 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
381 cur->regmap[hr]=reg;
382 cur->dirty&=~(1<<hr);
383 cur->isconst&=~(1<<hr);
384 return;
385 }
386 }
387 }
388 }
389 // Try to allocate any available register
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
392 cur->regmap[hr]=reg;
393 cur->dirty&=~(1<<hr);
394 cur->isconst&=~(1<<hr);
395 return;
396 }
397 }
9f51b4b9 398
57871462 399 // Ok, now we have to evict someone
400 // Pick a register we hopefully won't need soon
401 u_char hsn[MAXREG+1];
402 memset(hsn,10,sizeof(hsn));
403 int j;
404 lsn(hsn,i,&preferred_reg);
405 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
406 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
407 if(i>0) {
408 // Don't evict the cycle count at entry points, otherwise the entry
409 // stub will have to write it.
410 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
411 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
412 for(j=10;j>=3;j--)
413 {
414 // Alloc preferred register if available
415 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
416 for(hr=0;hr<HOST_REGS;hr++) {
417 // Evict both parts of a 64-bit register
418 if((cur->regmap[hr]&63)==r) {
419 cur->regmap[hr]=-1;
420 cur->dirty&=~(1<<hr);
421 cur->isconst&=~(1<<hr);
422 }
423 }
424 cur->regmap[preferred_reg]=reg;
425 return;
426 }
427 for(r=1;r<=MAXREG;r++)
428 {
429 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
430 for(hr=0;hr<HOST_REGS;hr++) {
431 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
432 if(cur->regmap[hr]==r+64) {
433 cur->regmap[hr]=reg;
434 cur->dirty&=~(1<<hr);
435 cur->isconst&=~(1<<hr);
436 return;
437 }
438 }
439 }
440 for(hr=0;hr<HOST_REGS;hr++) {
441 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
442 if(cur->regmap[hr]==r) {
443 cur->regmap[hr]=reg;
444 cur->dirty&=~(1<<hr);
445 cur->isconst&=~(1<<hr);
446 return;
447 }
448 }
449 }
450 }
451 }
452 }
453 }
454 for(j=10;j>=0;j--)
455 {
456 for(r=1;r<=MAXREG;r++)
457 {
458 if(hsn[r]==j) {
459 for(hr=0;hr<HOST_REGS;hr++) {
460 if(cur->regmap[hr]==r+64) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 for(hr=0;hr<HOST_REGS;hr++) {
468 if(cur->regmap[hr]==r) {
469 cur->regmap[hr]=reg;
470 cur->dirty&=~(1<<hr);
471 cur->isconst&=~(1<<hr);
472 return;
473 }
474 }
475 }
476 }
477 }
c43b5311 478 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 479}
480
e2b5e7aa 481static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 482{
483 int preferred_reg = 8+(reg&1);
484 int r,hr;
9f51b4b9 485
57871462 486 // allocate the lower 32 bits
487 alloc_reg(cur,i,reg);
9f51b4b9 488
57871462 489 // Don't allocate unused registers
490 if((cur->uu>>reg)&1) return;
9f51b4b9 491
57871462 492 // see if the upper half is already allocated
493 for(hr=0;hr<HOST_REGS;hr++)
494 {
495 if(cur->regmap[hr]==reg+64) return;
496 }
9f51b4b9 497
57871462 498 // Keep the same mapping if the register was already allocated in a loop
499 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 500
57871462 501 // Try to allocate the preferred register
502 if(cur->regmap[preferred_reg]==-1) {
503 cur->regmap[preferred_reg]=reg|64;
504 cur->dirty&=~(1<<preferred_reg);
505 cur->isconst&=~(1<<preferred_reg);
506 return;
507 }
508 r=cur->regmap[preferred_reg];
509 if(r<64&&((cur->u>>r)&1)) {
510 cur->regmap[preferred_reg]=reg|64;
511 cur->dirty&=~(1<<preferred_reg);
512 cur->isconst&=~(1<<preferred_reg);
513 return;
514 }
515 if(r>=64&&((cur->uu>>(r&63))&1)) {
516 cur->regmap[preferred_reg]=reg|64;
517 cur->dirty&=~(1<<preferred_reg);
518 cur->isconst&=~(1<<preferred_reg);
519 return;
520 }
9f51b4b9 521
57871462 522 // Clear any unneeded registers
523 // We try to keep the mapping consistent, if possible, because it
524 // makes branches easier (especially loops). So we try to allocate
525 // first (see above) before removing old mappings. If this is not
526 // possible then go ahead and clear out the registers that are no
527 // longer needed.
528 for(hr=HOST_REGS-1;hr>=0;hr--)
529 {
530 r=cur->regmap[hr];
531 if(r>=0) {
532 if(r<64) {
533 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
534 }
535 else
536 {
537 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
538 }
539 }
540 }
541 // Try to allocate any available register, but prefer
542 // registers that have not been used recently.
543 if(i>0) {
544 for(hr=0;hr<HOST_REGS;hr++) {
545 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
546 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
547 cur->regmap[hr]=reg|64;
548 cur->dirty&=~(1<<hr);
549 cur->isconst&=~(1<<hr);
550 return;
551 }
552 }
553 }
554 }
555 // Try to allocate any available register
556 for(hr=0;hr<HOST_REGS;hr++) {
557 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
558 cur->regmap[hr]=reg|64;
559 cur->dirty&=~(1<<hr);
560 cur->isconst&=~(1<<hr);
561 return;
562 }
563 }
9f51b4b9 564
57871462 565 // Ok, now we have to evict someone
566 // Pick a register we hopefully won't need soon
567 u_char hsn[MAXREG+1];
568 memset(hsn,10,sizeof(hsn));
569 int j;
570 lsn(hsn,i,&preferred_reg);
571 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
572 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
573 if(i>0) {
574 // Don't evict the cycle count at entry points, otherwise the entry
575 // stub will have to write it.
576 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
577 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
578 for(j=10;j>=3;j--)
579 {
580 // Alloc preferred register if available
581 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
582 for(hr=0;hr<HOST_REGS;hr++) {
583 // Evict both parts of a 64-bit register
584 if((cur->regmap[hr]&63)==r) {
585 cur->regmap[hr]=-1;
586 cur->dirty&=~(1<<hr);
587 cur->isconst&=~(1<<hr);
588 }
589 }
590 cur->regmap[preferred_reg]=reg|64;
591 return;
592 }
593 for(r=1;r<=MAXREG;r++)
594 {
595 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
596 for(hr=0;hr<HOST_REGS;hr++) {
597 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
598 if(cur->regmap[hr]==r+64) {
599 cur->regmap[hr]=reg|64;
600 cur->dirty&=~(1<<hr);
601 cur->isconst&=~(1<<hr);
602 return;
603 }
604 }
605 }
606 for(hr=0;hr<HOST_REGS;hr++) {
607 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
608 if(cur->regmap[hr]==r) {
609 cur->regmap[hr]=reg|64;
610 cur->dirty&=~(1<<hr);
611 cur->isconst&=~(1<<hr);
612 return;
613 }
614 }
615 }
616 }
617 }
618 }
619 }
620 for(j=10;j>=0;j--)
621 {
622 for(r=1;r<=MAXREG;r++)
623 {
624 if(hsn[r]==j) {
625 for(hr=0;hr<HOST_REGS;hr++) {
626 if(cur->regmap[hr]==r+64) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 for(hr=0;hr<HOST_REGS;hr++) {
634 if(cur->regmap[hr]==r) {
635 cur->regmap[hr]=reg|64;
636 cur->dirty&=~(1<<hr);
637 cur->isconst&=~(1<<hr);
638 return;
639 }
640 }
641 }
642 }
643 }
c43b5311 644 SysPrintf("This shouldn't happen");exit(1);
57871462 645}
646
647// Allocate a temporary register. This is done without regard to
648// dirty status or whether the register we request is on the unneeded list
649// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 650static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 651{
652 int r,hr;
653 int preferred_reg = -1;
9f51b4b9 654
57871462 655 // see if it's already allocated
656 for(hr=0;hr<HOST_REGS;hr++)
657 {
658 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
659 }
9f51b4b9 660
57871462 661 // Try to allocate any available register
662 for(hr=HOST_REGS-1;hr>=0;hr--) {
663 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
664 cur->regmap[hr]=reg;
665 cur->dirty&=~(1<<hr);
666 cur->isconst&=~(1<<hr);
667 return;
668 }
669 }
9f51b4b9 670
57871462 671 // Find an unneeded register
672 for(hr=HOST_REGS-1;hr>=0;hr--)
673 {
674 r=cur->regmap[hr];
675 if(r>=0) {
676 if(r<64) {
677 if((cur->u>>r)&1) {
678 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
679 cur->regmap[hr]=reg;
680 cur->dirty&=~(1<<hr);
681 cur->isconst&=~(1<<hr);
682 return;
683 }
684 }
685 }
686 else
687 {
688 if((cur->uu>>(r&63))&1) {
689 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
690 cur->regmap[hr]=reg;
691 cur->dirty&=~(1<<hr);
692 cur->isconst&=~(1<<hr);
693 return;
694 }
695 }
696 }
697 }
698 }
9f51b4b9 699
57871462 700 // Ok, now we have to evict someone
701 // Pick a register we hopefully won't need soon
702 // TODO: we might want to follow unconditional jumps here
703 // TODO: get rid of dupe code and make this into a function
704 u_char hsn[MAXREG+1];
705 memset(hsn,10,sizeof(hsn));
706 int j;
707 lsn(hsn,i,&preferred_reg);
708 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
709 if(i>0) {
710 // Don't evict the cycle count at entry points, otherwise the entry
711 // stub will have to write it.
712 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
713 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
714 for(j=10;j>=3;j--)
715 {
716 for(r=1;r<=MAXREG;r++)
717 {
718 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
719 for(hr=0;hr<HOST_REGS;hr++) {
720 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
721 if(cur->regmap[hr]==r+64) {
722 cur->regmap[hr]=reg;
723 cur->dirty&=~(1<<hr);
724 cur->isconst&=~(1<<hr);
725 return;
726 }
727 }
728 }
729 for(hr=0;hr<HOST_REGS;hr++) {
730 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
731 if(cur->regmap[hr]==r) {
732 cur->regmap[hr]=reg;
733 cur->dirty&=~(1<<hr);
734 cur->isconst&=~(1<<hr);
735 return;
736 }
737 }
738 }
739 }
740 }
741 }
742 }
743 for(j=10;j>=0;j--)
744 {
745 for(r=1;r<=MAXREG;r++)
746 {
747 if(hsn[r]==j) {
748 for(hr=0;hr<HOST_REGS;hr++) {
749 if(cur->regmap[hr]==r+64) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 for(hr=0;hr<HOST_REGS;hr++) {
757 if(cur->regmap[hr]==r) {
758 cur->regmap[hr]=reg;
759 cur->dirty&=~(1<<hr);
760 cur->isconst&=~(1<<hr);
761 return;
762 }
763 }
764 }
765 }
766 }
c43b5311 767 SysPrintf("This shouldn't happen");exit(1);
57871462 768}
e2b5e7aa 769
57871462 770// Allocate a specific ARM register.
e2b5e7aa 771static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 772{
773 int n;
f776eb14 774 int dirty=0;
9f51b4b9 775
57871462 776 // see if it's already allocated (and dealloc it)
777 for(n=0;n<HOST_REGS;n++)
778 {
f776eb14 779 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
780 dirty=(cur->dirty>>n)&1;
781 cur->regmap[n]=-1;
782 }
57871462 783 }
9f51b4b9 784
57871462 785 cur->regmap[hr]=reg;
786 cur->dirty&=~(1<<hr);
f776eb14 787 cur->dirty|=dirty<<hr;
57871462 788 cur->isconst&=~(1<<hr);
789}
790
791// Alloc cycle count into dedicated register
e2b5e7aa 792static void alloc_cc(struct regstat *cur,int i)
57871462 793{
794 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
795}
796
797/* Special alloc */
798
799
800/* Assembler */
801
e2b5e7aa 802static unused char regname[16][4] = {
57871462 803 "r0",
804 "r1",
805 "r2",
806 "r3",
807 "r4",
808 "r5",
809 "r6",
810 "r7",
811 "r8",
812 "r9",
813 "r10",
814 "fp",
815 "r12",
816 "sp",
817 "lr",
818 "pc"};
819
e2b5e7aa 820static void output_w32(u_int word)
57871462 821{
822 *((u_int *)out)=word;
823 out+=4;
824}
e2b5e7aa 825
826static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 827{
828 assert(rd<16);
829 assert(rn<16);
830 assert(rm<16);
831 return((rn<<16)|(rd<<12)|rm);
832}
e2b5e7aa 833
834static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 835{
836 assert(rd<16);
837 assert(rn<16);
838 assert(imm<256);
839 assert((shift&1)==0);
840 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
841}
e2b5e7aa 842
843static u_int genimm(u_int imm,u_int *encoded)
57871462 844{
c2e3bd42 845 *encoded=0;
846 if(imm==0) return 1;
57871462 847 int i=32;
848 while(i>0)
849 {
850 if(imm<256) {
851 *encoded=((i&30)<<7)|imm;
852 return 1;
853 }
854 imm=(imm>>2)|(imm<<30);i-=2;
855 }
856 return 0;
857}
e2b5e7aa 858
859static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 860{
861 u_int ret=genimm(imm,encoded);
862 assert(ret);
863}
e2b5e7aa 864
865static u_int genjmp(u_int addr)
57871462 866{
867 int offset=addr-(int)out-8;
e80343e2 868 if(offset<-33554432||offset>=33554432) {
869 if (addr>2) {
c43b5311 870 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 871 exit(1);
872 }
873 return 0;
874 }
57871462 875 return ((u_int)offset>>2)&0xffffff;
876}
877
e2b5e7aa 878static void emit_mov(int rs,int rt)
57871462 879{
880 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
881 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
882}
883
e2b5e7aa 884static void emit_movs(int rs,int rt)
57871462 885{
886 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
887 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
888}
889
e2b5e7aa 890static void emit_add(int rs1,int rs2,int rt)
57871462 891{
892 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
893 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
894}
895
e2b5e7aa 896static void emit_adds(int rs1,int rs2,int rt)
57871462 897{
898 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
899 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
900}
901
e2b5e7aa 902static void emit_adcs(int rs1,int rs2,int rt)
57871462 903{
904 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
906}
907
e2b5e7aa 908static void emit_sbc(int rs1,int rs2,int rt)
57871462 909{
910 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
912}
913
e2b5e7aa 914static void emit_sbcs(int rs1,int rs2,int rt)
57871462 915{
916 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
918}
919
e2b5e7aa 920static void emit_neg(int rs, int rt)
57871462 921{
922 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
923 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
924}
925
e2b5e7aa 926static void emit_negs(int rs, int rt)
57871462 927{
928 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
929 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
930}
931
e2b5e7aa 932static void emit_sub(int rs1,int rs2,int rt)
57871462 933{
934 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
935 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
936}
937
e2b5e7aa 938static void emit_subs(int rs1,int rs2,int rt)
57871462 939{
940 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
942}
943
e2b5e7aa 944static void emit_zeroreg(int rt)
57871462 945{
946 assem_debug("mov %s,#0\n",regname[rt]);
947 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
948}
949
e2b5e7aa 950static void emit_loadlp(u_int imm,u_int rt)
790ee18e 951{
952 add_literal((int)out,imm);
953 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
954 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
955}
e2b5e7aa 956
957static void emit_movw(u_int imm,u_int rt)
790ee18e 958{
959 assert(imm<65536);
960 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
961 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
962}
e2b5e7aa 963
964static void emit_movt(u_int imm,u_int rt)
790ee18e 965{
966 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
967 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
968}
e2b5e7aa 969
970static void emit_movimm(u_int imm,u_int rt)
790ee18e 971{
972 u_int armval;
973 if(genimm(imm,&armval)) {
974 assem_debug("mov %s,#%d\n",regname[rt],imm);
975 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
976 }else if(genimm(~imm,&armval)) {
977 assem_debug("mvn %s,#%d\n",regname[rt],imm);
978 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
979 }else if(imm<65536) {
665f33e1 980 #ifndef HAVE_ARMV7
790ee18e 981 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
982 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
983 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
984 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
985 #else
986 emit_movw(imm,rt);
987 #endif
988 }else{
665f33e1 989 #ifndef HAVE_ARMV7
790ee18e 990 emit_loadlp(imm,rt);
991 #else
992 emit_movw(imm&0x0000FFFF,rt);
993 emit_movt(imm&0xFFFF0000,rt);
994 #endif
995 }
996}
e2b5e7aa 997
998static void emit_pcreladdr(u_int rt)
790ee18e 999{
1000 assem_debug("add %s,pc,#?\n",regname[rt]);
1001 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1002}
1003
e2b5e7aa 1004static void emit_loadreg(int r, int hr)
57871462 1005{
3d624f89 1006 if(r&64) {
c43b5311 1007 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1008 assert(0);
1009 return;
3d624f89 1010 }
57871462 1011 if((r&63)==0)
1012 emit_zeroreg(hr);
1013 else {
3d624f89 1014 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1015 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1016 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1017 if(r==CCREG) addr=(int)&cycle_count;
1018 if(r==CSREG) addr=(int)&Status;
1019 if(r==FSREG) addr=(int)&FCR31;
1020 if(r==INVCP) addr=(int)&invc_ptr;
1021 u_int offset = addr-(u_int)&dynarec_local;
1022 assert(offset<4096);
1023 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1024 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1025 }
1026}
e2b5e7aa 1027
1028static void emit_storereg(int r, int hr)
57871462 1029{
3d624f89 1030 if(r&64) {
c43b5311 1031 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1032 assert(0);
1033 return;
3d624f89 1034 }
3d624f89 1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1044}
1045
e2b5e7aa 1046static void emit_test(int rs, int rt)
57871462 1047{
1048 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1049 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1050}
1051
e2b5e7aa 1052static void emit_testimm(int rs,int imm)
57871462 1053{
1054 u_int armval;
5a05d80c 1055 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1056 genimm_checked(imm,&armval);
57871462 1057 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1058}
1059
e2b5e7aa 1060static void emit_testeqimm(int rs,int imm)
b9b61529 1061{
1062 u_int armval;
1063 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1064 genimm_checked(imm,&armval);
b9b61529 1065 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1066}
1067
e2b5e7aa 1068static void emit_not(int rs,int rt)
57871462 1069{
1070 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1071 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1072}
1073
e2b5e7aa 1074static void emit_mvnmi(int rs,int rt)
b9b61529 1075{
1076 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1077 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1078}
1079
e2b5e7aa 1080static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1081{
1082 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1083 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1084}
1085
e2b5e7aa 1086static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1087{
1088 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1089 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1090}
e2b5e7aa 1091
1092static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1093{
1094 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1095 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1096}
1097
e2b5e7aa 1098static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1099{
1100 assert(rs<16);
1101 assert(rt<16);
1102 assert(imm<32);
1103 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1104 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1105}
1106
e2b5e7aa 1107static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1108{
1109 assert(rs<16);
1110 assert(rt<16);
1111 assert(imm<32);
1112 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1113 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1114}
1115
e2b5e7aa 1116static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1117{
1118 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1119 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1120}
1121
e2b5e7aa 1122static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1123{
1124 assert(rs<16);
1125 assert(rt<16);
1126 if(imm!=0) {
57871462 1127 u_int armval;
1128 if(genimm(imm,&armval)) {
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(genimm(-imm,&armval)) {
8a0a8423 1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1133 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1134 #ifdef HAVE_ARMV7
1135 }else if(rt!=rs&&(u_int)imm<65536) {
1136 emit_movw(imm&0x0000ffff,rt);
1137 emit_add(rs,rt,rt);
1138 }else if(rt!=rs&&(u_int)-imm<65536) {
1139 emit_movw(-imm&0x0000ffff,rt);
1140 emit_sub(rs,rt,rt);
1141 #endif
1142 }else if((u_int)-imm<65536) {
57871462 1143 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1144 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1145 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1146 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1147 }else {
1148 do {
1149 int shift = (ffs(imm) - 1) & ~1;
1150 int imm8 = imm & (0xff << shift);
1151 genimm_checked(imm8,&armval);
1152 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1153 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1154 rs = rt;
1155 imm &= ~imm8;
1156 }
1157 while (imm != 0);
57871462 1158 }
1159 }
1160 else if(rs!=rt) emit_mov(rs,rt);
1161}
1162
e2b5e7aa 1163static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1164{
1165 assert(imm>-65536&&imm<65536);
1166 u_int armval;
1167 if(genimm(imm,&armval)) {
1168 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1169 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1170 }else if(genimm(-imm,&armval)) {
1171 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1172 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1173 }else if(imm<0) {
1174 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1175 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1176 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1177 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1178 }else{
1179 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1180 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1181 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1182 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1183 }
1184}
e2b5e7aa 1185
1186static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1187{
1188 emit_addimm(rt,imm,rt);
1189}
1190
e2b5e7aa 1191static void emit_addnop(u_int r)
57871462 1192{
1193 assert(r<16);
1194 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1195 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1196}
1197
e2b5e7aa 1198static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1199{
1200 u_int armval;
cfbd3c6e 1201 genimm_checked(imm,&armval);
57871462 1202 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1203 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1204}
1edfcc68 1205
e2b5e7aa 1206static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1207{
1208 assert(0);
1209 u_int armval;
cfbd3c6e 1210 genimm_checked(imm,&armval);
57871462 1211 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1212 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1213}
1214
e2b5e7aa 1215static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1216{
1217 // TODO: if(genimm(imm,&armval)) ...
1218 // else
1219 emit_movimm(imm,HOST_TEMPREG);
1220 emit_adds(HOST_TEMPREG,rsl,rtl);
1221 emit_adcimm(rsh,0,rth);
1222}
1223
e2b5e7aa 1224static void emit_andimm(int rs,int imm,int rt)
57871462 1225{
1226 u_int armval;
790ee18e 1227 if(imm==0) {
1228 emit_zeroreg(rt);
1229 }else if(genimm(imm,&armval)) {
57871462 1230 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1232 }else if(genimm(~imm,&armval)) {
1233 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1234 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1235 }else if(imm==65535) {
332a4533 1236 #ifndef HAVE_ARMV6
57871462 1237 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1238 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1239 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1240 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1241 #else
1242 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1243 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1244 #endif
1245 }else{
1246 assert(imm>0&&imm<65535);
665f33e1 1247 #ifndef HAVE_ARMV7
57871462 1248 assem_debug("mov r14,#%d\n",imm&0xFF00);
1249 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1250 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1251 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1252 #else
1253 emit_movw(imm,HOST_TEMPREG);
1254 #endif
1255 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1256 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1257 }
1258}
1259
e2b5e7aa 1260static void emit_orimm(int rs,int imm,int rt)
57871462 1261{
1262 u_int armval;
790ee18e 1263 if(imm==0) {
1264 if(rs!=rt) emit_mov(rs,rt);
1265 }else if(genimm(imm,&armval)) {
57871462 1266 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1267 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1268 }else{
1269 assert(imm>0&&imm<65536);
1270 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1271 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1272 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1273 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1274 }
1275}
1276
e2b5e7aa 1277static void emit_xorimm(int rs,int imm,int rt)
57871462 1278{
57871462 1279 u_int armval;
790ee18e 1280 if(imm==0) {
1281 if(rs!=rt) emit_mov(rs,rt);
1282 }else if(genimm(imm,&armval)) {
57871462 1283 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1284 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1285 }else{
514ed0d9 1286 assert(imm>0&&imm<65536);
57871462 1287 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1288 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1289 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1290 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1291 }
1292}
1293
e2b5e7aa 1294static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1295{
1296 assert(imm>0);
1297 assert(imm<32);
1298 //if(imm==1) ...
1299 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1300 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1301}
1302
e2b5e7aa 1303static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1304{
1305 assert(imm>0);
1306 assert(imm<32);
1307 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1308 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1309}
1310
e2b5e7aa 1311static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1312{
1313 assert(imm>0);
1314 assert(imm<32);
1315 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1316 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1317}
1318
e2b5e7aa 1319static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1320{
1321 assert(imm>0);
1322 assert(imm<32);
1323 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1325}
1326
e2b5e7aa 1327static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1328{
1329 assert(imm>0);
1330 assert(imm<32);
1331 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1332 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1333}
1334
e2b5e7aa 1335static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1336{
1337 assert(imm>0);
1338 assert(imm<32);
1339 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1341}
1342
e2b5e7aa 1343static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1344{
1345 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1346 assert(imm>0);
1347 assert(imm<32);
1348 //if(imm==1) ...
1349 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1351 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1352 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1353}
1354
e2b5e7aa 1355static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1356{
1357 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1358 assert(imm>0);
1359 assert(imm<32);
1360 //if(imm==1) ...
1361 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1362 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1363 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1364 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1365}
1366
e2b5e7aa 1367static void emit_signextend16(int rs,int rt)
b9b61529 1368{
332a4533 1369 #ifndef HAVE_ARMV6
b9b61529 1370 emit_shlimm(rs,16,rt);
1371 emit_sarimm(rt,16,rt);
1372 #else
1373 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1374 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1375 #endif
1376}
1377
e2b5e7aa 1378static void emit_signextend8(int rs,int rt)
c6c3b1b3 1379{
332a4533 1380 #ifndef HAVE_ARMV6
c6c3b1b3 1381 emit_shlimm(rs,24,rt);
1382 emit_sarimm(rt,24,rt);
1383 #else
1384 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1385 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1386 #endif
1387}
1388
e2b5e7aa 1389static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1390{
1391 assert(rs<16);
1392 assert(rt<16);
1393 assert(shift<16);
1394 //if(imm==1) ...
1395 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1397}
e2b5e7aa 1398
1399static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1400{
1401 assert(rs<16);
1402 assert(rt<16);
1403 assert(shift<16);
1404 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1405 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1406}
e2b5e7aa 1407
1408static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1409{
1410 assert(rs<16);
1411 assert(rt<16);
1412 assert(shift<16);
1413 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1414 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1415}
57871462 1416
e2b5e7aa 1417static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1418{
1419 assert(rs<16);
1420 assert(rt<16);
1421 assert(shift<16);
1422 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1423 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1424}
e2b5e7aa 1425
1426static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1433}
1434
e2b5e7aa 1435static void emit_cmpimm(int rs,int imm)
57871462 1436{
1437 u_int armval;
1438 if(genimm(imm,&armval)) {
5a05d80c 1439 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1440 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1441 }else if(genimm(-imm,&armval)) {
5a05d80c 1442 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1443 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1444 }else if(imm>0) {
1445 assert(imm<65536);
57871462 1446 emit_movimm(imm,HOST_TEMPREG);
57871462 1447 assem_debug("cmp %s,r14\n",regname[rs]);
1448 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1449 }else{
1450 assert(imm>-65536);
57871462 1451 emit_movimm(-imm,HOST_TEMPREG);
57871462 1452 assem_debug("cmn %s,r14\n",regname[rs]);
1453 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1454 }
1455}
1456
e2b5e7aa 1457static void emit_cmovne_imm(int imm,int rt)
57871462 1458{
1459 assem_debug("movne %s,#%d\n",regname[rt],imm);
1460 u_int armval;
cfbd3c6e 1461 genimm_checked(imm,&armval);
57871462 1462 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1463}
e2b5e7aa 1464
1465static void emit_cmovl_imm(int imm,int rt)
57871462 1466{
1467 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1468 u_int armval;
cfbd3c6e 1469 genimm_checked(imm,&armval);
57871462 1470 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1471}
e2b5e7aa 1472
1473static void emit_cmovb_imm(int imm,int rt)
57871462 1474{
1475 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1476 u_int armval;
cfbd3c6e 1477 genimm_checked(imm,&armval);
57871462 1478 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1479}
e2b5e7aa 1480
1481static void emit_cmovs_imm(int imm,int rt)
57871462 1482{
1483 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1484 u_int armval;
cfbd3c6e 1485 genimm_checked(imm,&armval);
57871462 1486 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1487}
e2b5e7aa 1488
1489static void emit_cmove_reg(int rs,int rt)
57871462 1490{
1491 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1492 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1493}
e2b5e7aa 1494
1495static void emit_cmovne_reg(int rs,int rt)
57871462 1496{
1497 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1498 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1499}
e2b5e7aa 1500
1501static void emit_cmovl_reg(int rs,int rt)
57871462 1502{
1503 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1504 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1505}
e2b5e7aa 1506
1507static void emit_cmovs_reg(int rs,int rt)
57871462 1508{
1509 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1510 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1511}
1512
e2b5e7aa 1513static void emit_slti32(int rs,int imm,int rt)
57871462 1514{
1515 if(rs!=rt) emit_zeroreg(rt);
1516 emit_cmpimm(rs,imm);
1517 if(rs==rt) emit_movimm(0,rt);
1518 emit_cmovl_imm(1,rt);
1519}
e2b5e7aa 1520
1521static void emit_sltiu32(int rs,int imm,int rt)
57871462 1522{
1523 if(rs!=rt) emit_zeroreg(rt);
1524 emit_cmpimm(rs,imm);
1525 if(rs==rt) emit_movimm(0,rt);
1526 emit_cmovb_imm(1,rt);
1527}
e2b5e7aa 1528
1529static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1530{
1531 assert(rsh!=rt);
1532 emit_slti32(rsl,imm,rt);
1533 if(imm>=0)
1534 {
1535 emit_test(rsh,rsh);
1536 emit_cmovne_imm(0,rt);
1537 emit_cmovs_imm(1,rt);
1538 }
1539 else
1540 {
1541 emit_cmpimm(rsh,-1);
1542 emit_cmovne_imm(0,rt);
1543 emit_cmovl_imm(1,rt);
1544 }
1545}
e2b5e7aa 1546
1547static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1548{
1549 assert(rsh!=rt);
1550 emit_sltiu32(rsl,imm,rt);
1551 if(imm>=0)
1552 {
1553 emit_test(rsh,rsh);
1554 emit_cmovne_imm(0,rt);
1555 }
1556 else
1557 {
1558 emit_cmpimm(rsh,-1);
1559 emit_cmovne_imm(1,rt);
1560 }
1561}
1562
e2b5e7aa 1563static void emit_cmp(int rs,int rt)
57871462 1564{
1565 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1566 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1567}
e2b5e7aa 1568
1569static void emit_set_gz32(int rs, int rt)
57871462 1570{
1571 //assem_debug("set_gz32\n");
1572 emit_cmpimm(rs,1);
1573 emit_movimm(1,rt);
1574 emit_cmovl_imm(0,rt);
1575}
e2b5e7aa 1576
1577static void emit_set_nz32(int rs, int rt)
57871462 1578{
1579 //assem_debug("set_nz32\n");
1580 if(rs!=rt) emit_movs(rs,rt);
1581 else emit_test(rs,rs);
1582 emit_cmovne_imm(1,rt);
1583}
e2b5e7aa 1584
1585static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1586{
1587 //assem_debug("set_gz64\n");
1588 emit_set_gz32(rsl,rt);
1589 emit_test(rsh,rsh);
1590 emit_cmovne_imm(1,rt);
1591 emit_cmovs_imm(0,rt);
1592}
e2b5e7aa 1593
1594static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1595{
1596 //assem_debug("set_nz64\n");
1597 emit_or_and_set_flags(rsh,rsl,rt);
1598 emit_cmovne_imm(1,rt);
1599}
e2b5e7aa 1600
1601static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1602{
1603 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1604 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1605 emit_cmp(rs1,rs2);
1606 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1607 emit_cmovl_imm(1,rt);
1608}
e2b5e7aa 1609
1610static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1611{
1612 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1613 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1614 emit_cmp(rs1,rs2);
1615 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1616 emit_cmovb_imm(1,rt);
1617}
e2b5e7aa 1618
1619static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1620{
1621 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1622 assert(u1!=rt);
1623 assert(u2!=rt);
1624 emit_cmp(l1,l2);
1625 emit_movimm(0,rt);
1626 emit_sbcs(u1,u2,HOST_TEMPREG);
1627 emit_cmovl_imm(1,rt);
1628}
e2b5e7aa 1629
1630static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1631{
1632 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1633 assert(u1!=rt);
1634 assert(u2!=rt);
1635 emit_cmp(l1,l2);
1636 emit_movimm(0,rt);
1637 emit_sbcs(u1,u2,HOST_TEMPREG);
1638 emit_cmovb_imm(1,rt);
1639}
1640
e2b5e7aa 1641static void emit_call(int a)
57871462 1642{
1643 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1644 u_int offset=genjmp(a);
1645 output_w32(0xeb000000|offset);
1646}
e2b5e7aa 1647
1648static void emit_jmp(int a)
57871462 1649{
1650 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1651 u_int offset=genjmp(a);
1652 output_w32(0xea000000|offset);
1653}
e2b5e7aa 1654
1655static void emit_jne(int a)
57871462 1656{
1657 assem_debug("bne %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0x1a000000|offset);
1660}
e2b5e7aa 1661
1662static void emit_jeq(int a)
57871462 1663{
1664 assem_debug("beq %x\n",a);
1665 u_int offset=genjmp(a);
1666 output_w32(0x0a000000|offset);
1667}
e2b5e7aa 1668
1669static void emit_js(int a)
57871462 1670{
1671 assem_debug("bmi %x\n",a);
1672 u_int offset=genjmp(a);
1673 output_w32(0x4a000000|offset);
1674}
e2b5e7aa 1675
1676static void emit_jns(int a)
57871462 1677{
1678 assem_debug("bpl %x\n",a);
1679 u_int offset=genjmp(a);
1680 output_w32(0x5a000000|offset);
1681}
e2b5e7aa 1682
1683static void emit_jl(int a)
57871462 1684{
1685 assem_debug("blt %x\n",a);
1686 u_int offset=genjmp(a);
1687 output_w32(0xba000000|offset);
1688}
e2b5e7aa 1689
1690static void emit_jge(int a)
57871462 1691{
1692 assem_debug("bge %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0xaa000000|offset);
1695}
e2b5e7aa 1696
1697static void emit_jno(int a)
57871462 1698{
1699 assem_debug("bvc %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0x7a000000|offset);
1702}
e2b5e7aa 1703
1704static void emit_jc(int a)
57871462 1705{
1706 assem_debug("bcs %x\n",a);
1707 u_int offset=genjmp(a);
1708 output_w32(0x2a000000|offset);
1709}
e2b5e7aa 1710
1711static void emit_jcc(int a)
57871462 1712{
1713 assem_debug("bcc %x\n",a);
1714 u_int offset=genjmp(a);
1715 output_w32(0x3a000000|offset);
1716}
1717
e2b5e7aa 1718static void emit_callreg(u_int r)
57871462 1719{
c6c3b1b3 1720 assert(r<15);
1721 assem_debug("blx %s\n",regname[r]);
1722 output_w32(0xe12fff30|r);
57871462 1723}
e2b5e7aa 1724
1725static void emit_jmpreg(u_int r)
57871462 1726{
1727 assem_debug("mov pc,%s\n",regname[r]);
1728 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1729}
1730
e2b5e7aa 1731static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1732{
1733 assert(offset>-4096&&offset<4096);
1734 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1735 if(offset>=0) {
1736 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1737 }else{
1738 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1739 }
1740}
e2b5e7aa 1741
1742static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1743{
1744 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1745 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1746}
e2b5e7aa 1747
1748static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1749{
1750 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1751 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1752}
e2b5e7aa 1753
1754static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1755{
1756 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1757 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1758}
e2b5e7aa 1759
1760static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1761{
1762 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1763 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1764}
e2b5e7aa 1765
1766static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1767{
1768 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1769 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1770}
e2b5e7aa 1771
1772static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1773{
1774 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1775 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1776}
e2b5e7aa 1777
1778static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1779{
1780 if(map<0) emit_readword_indexed(addr, rs, rt);
1781 else {
1782 assert(addr==0);
1783 emit_readword_dualindexedx4(rs, map, rt);
1784 }
1785}
e2b5e7aa 1786
1787static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1788{
1789 if(map<0) {
1790 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1791 emit_readword_indexed(addr+4, rs, rl);
1792 }else{
1793 assert(rh!=rs);
1794 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1795 emit_addimm(map,1,map);
1796 emit_readword_indexed_tlb(addr, rs, map, rl);
1797 }
1798}
e2b5e7aa 1799
1800static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1801{
1802 assert(offset>-256&&offset<256);
1803 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1804 if(offset>=0) {
1805 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1806 }else{
1807 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1808 }
1809}
e2b5e7aa 1810
1811static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1812{
1813 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1814 else {
1815 if(addr==0) {
1816 emit_shlimm(map,2,map);
1817 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1818 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1819 }else{
1820 assert(addr>-256&&addr<256);
1821 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1822 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1823 emit_movsbl_indexed(addr, rt, rt);
1824 }
1825 }
1826}
e2b5e7aa 1827
1828static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1829{
1830 assert(offset>-256&&offset<256);
1831 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1832 if(offset>=0) {
1833 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1834 }else{
1835 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1836 }
1837}
e2b5e7aa 1838
1839static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1840{
1841 assert(offset>-4096&&offset<4096);
1842 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1843 if(offset>=0) {
1844 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1845 }else{
1846 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1847 }
1848}
e2b5e7aa 1849
1850static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1851{
1852 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1853 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1854}
e2b5e7aa 1855
1856static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1857{
1858 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1859 else {
1860 if(addr==0) {
1861 emit_movzbl_dualindexedx4(rs, map, rt);
1862 }else{
1863 emit_addimm(rs,addr,rt);
1864 emit_movzbl_dualindexedx4(rt, map, rt);
1865 }
1866 }
1867}
e2b5e7aa 1868
1869static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1870{
1871 assert(offset>-256&&offset<256);
1872 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1873 if(offset>=0) {
1874 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1875 }else{
1876 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1877 }
1878}
e2b5e7aa 1879
054175e9 1880static void emit_ldrd(int offset, int rs, int rt)
1881{
1882 assert(offset>-256&&offset<256);
1883 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1884 if(offset>=0) {
1885 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1886 }else{
1887 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1888 }
1889}
e2b5e7aa 1890
1891static void emit_readword(int addr, int rt)
57871462 1892{
1893 u_int offset = addr-(u_int)&dynarec_local;
1894 assert(offset<4096);
1895 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1896 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1897}
e2b5e7aa 1898
1899static unused void emit_movsbl(int addr, int rt)
57871462 1900{
1901 u_int offset = addr-(u_int)&dynarec_local;
1902 assert(offset<256);
1903 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1904 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1905}
e2b5e7aa 1906
1907static unused void emit_movswl(int addr, int rt)
57871462 1908{
1909 u_int offset = addr-(u_int)&dynarec_local;
1910 assert(offset<256);
1911 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1912 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1913}
e2b5e7aa 1914
1915static unused void emit_movzbl(int addr, int rt)
57871462 1916{
1917 u_int offset = addr-(u_int)&dynarec_local;
1918 assert(offset<4096);
1919 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1920 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1921}
e2b5e7aa 1922
1923static unused void emit_movzwl(int addr, int rt)
57871462 1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<256);
1927 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1929}
57871462 1930
e2b5e7aa 1931static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1932{
1933 assert(offset>-4096&&offset<4096);
1934 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1935 if(offset>=0) {
1936 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1937 }else{
1938 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1939 }
1940}
e2b5e7aa 1941
1942static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 1943{
1944 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1945 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1946}
e2b5e7aa 1947
1948static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 1949{
1950 if(map<0) emit_writeword_indexed(rt, addr, rs);
1951 else {
1952 assert(addr==0);
1953 emit_writeword_dualindexedx4(rt, rs, map);
1954 }
1955}
e2b5e7aa 1956
1957static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 1958{
1959 if(map<0) {
1960 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1961 emit_writeword_indexed(rl, addr+4, rs);
1962 }else{
1963 assert(rh>=0);
1964 if(temp!=rs) emit_addimm(map,1,temp);
1965 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1966 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1967 else {
1968 emit_addimm(rs,4,rs);
1969 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1970 }
1971 }
1972}
e2b5e7aa 1973
1974static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1975{
1976 assert(offset>-256&&offset<256);
1977 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1978 if(offset>=0) {
1979 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1980 }else{
1981 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1982 }
1983}
e2b5e7aa 1984
1985static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1986{
1987 assert(offset>-4096&&offset<4096);
1988 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1989 if(offset>=0) {
1990 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1991 }else{
1992 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1993 }
1994}
e2b5e7aa 1995
1996static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 1997{
1998 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1999 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2000}
e2b5e7aa 2001
2002static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2003{
2004 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2005 else {
2006 if(addr==0) {
2007 emit_writebyte_dualindexedx4(rt, rs, map);
2008 }else{
2009 emit_addimm(rs,addr,temp);
2010 emit_writebyte_dualindexedx4(rt, temp, map);
2011 }
2012 }
2013}
e2b5e7aa 2014
2015static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2016{
2017 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2018 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2019}
e2b5e7aa 2020
2021static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2022{
2023 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2024 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2025}
e2b5e7aa 2026
2027static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2028{
2029 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2030 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2031}
e2b5e7aa 2032
2033static void emit_writeword(int rt, int addr)
57871462 2034{
2035 u_int offset = addr-(u_int)&dynarec_local;
2036 assert(offset<4096);
2037 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2038 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2039}
e2b5e7aa 2040
2041static unused void emit_writehword(int rt, int addr)
57871462 2042{
2043 u_int offset = addr-(u_int)&dynarec_local;
2044 assert(offset<256);
2045 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2046 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2047}
e2b5e7aa 2048
2049static unused void emit_writebyte(int rt, int addr)
57871462 2050{
2051 u_int offset = addr-(u_int)&dynarec_local;
2052 assert(offset<4096);
74426039 2053 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2054 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2055}
57871462 2056
e2b5e7aa 2057static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2058{
2059 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2060 assert(rs1<16);
2061 assert(rs2<16);
2062 assert(hi<16);
2063 assert(lo<16);
2064 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2065}
e2b5e7aa 2066
2067static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2068{
2069 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2070 assert(rs1<16);
2071 assert(rs2<16);
2072 assert(hi<16);
2073 assert(lo<16);
2074 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2075}
2076
e2b5e7aa 2077static void emit_clz(int rs,int rt)
57871462 2078{
2079 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2080 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2081}
2082
e2b5e7aa 2083static void emit_subcs(int rs1,int rs2,int rt)
57871462 2084{
2085 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2086 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2087}
2088
e2b5e7aa 2089static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2090{
2091 assert(imm>0);
2092 assert(imm<32);
2093 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2094 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2095}
2096
e2b5e7aa 2097static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2098{
2099 assert(imm>0);
2100 assert(imm<32);
2101 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2102 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2103}
2104
e2b5e7aa 2105static void emit_negmi(int rs, int rt)
57871462 2106{
2107 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2108 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2109}
2110
e2b5e7aa 2111static void emit_negsmi(int rs, int rt)
57871462 2112{
2113 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2114 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2115}
2116
e2b5e7aa 2117static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2118{
2119 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2120 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2121}
2122
e2b5e7aa 2123static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2124{
2125 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2126 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2127}
2128
e2b5e7aa 2129static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2130{
2131 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2132 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2133}
2134
e2b5e7aa 2135static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2136{
2137 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2138 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2139}
2140
e2b5e7aa 2141static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2142{
2143 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2144 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2145}
2146
e2b5e7aa 2147static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2148{
2149 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2150 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2151}
2152
e2b5e7aa 2153static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2154{
2155 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2156 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2157}
2158
e2b5e7aa 2159static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2160{
2161 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2162 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2163}
2164
e2b5e7aa 2165static void emit_teq(int rs, int rt)
57871462 2166{
2167 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2168 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2169}
2170
e2b5e7aa 2171static void emit_rsbimm(int rs, int imm, int rt)
57871462 2172{
2173 u_int armval;
cfbd3c6e 2174 genimm_checked(imm,&armval);
57871462 2175 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2176 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2177}
2178
2179// Load 2 immediates optimizing for small code size
e2b5e7aa 2180static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2181{
2182 emit_movimm(imm1,rt1);
2183 u_int armval;
2184 if(genimm(imm2-imm1,&armval)) {
2185 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2186 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2187 }else if(genimm(imm1-imm2,&armval)) {
2188 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2189 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2190 }
2191 else emit_movimm(imm2,rt2);
2192}
2193
2194// Conditionally select one of two immediates, optimizing for small code size
2195// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2196static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2197{
2198 u_int armval;
2199 if(genimm(imm2-imm1,&armval)) {
2200 emit_movimm(imm1,rt);
2201 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2202 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2203 }else if(genimm(imm1-imm2,&armval)) {
2204 emit_movimm(imm1,rt);
2205 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2206 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2207 }
2208 else {
665f33e1 2209 #ifndef HAVE_ARMV7
57871462 2210 emit_movimm(imm1,rt);
2211 add_literal((int)out,imm2);
2212 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2213 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2214 #else
2215 emit_movw(imm1&0x0000FFFF,rt);
2216 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2217 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2218 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2219 }
2220 emit_movt(imm1&0xFFFF0000,rt);
2221 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2222 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2223 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2224 }
2225 #endif
2226 }
2227}
2228
57871462 2229// special case for checking invalid_code
e2b5e7aa 2230static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2231{
2232 assert(imm<128&&imm>=0);
2233 assert(r>=0&&r<16);
2234 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2235 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2236 emit_cmpimm(HOST_TEMPREG,imm);
2237}
2238
e2b5e7aa 2239static void emit_callne(int a)
0bbd1454 2240{
2241 assem_debug("blne %x\n",a);
2242 u_int offset=genjmp(a);
2243 output_w32(0x1b000000|offset);
2244}
2245
57871462 2246// Used to preload hash table entries
e2b5e7aa 2247static unused void emit_prefetchreg(int r)
57871462 2248{
2249 assem_debug("pld %s\n",regname[r]);
2250 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2251}
2252
2253// Special case for mini_ht
e2b5e7aa 2254static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2255{
2256 assert(offset<4096);
2257 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2258 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2259}
2260
e2b5e7aa 2261static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2262{
2263 u_int armval;
cfbd3c6e 2264 genimm_checked(imm,&armval);
57871462 2265 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2266 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2267}
2268
e2b5e7aa 2269static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2270{
2271 u_int armval;
cfbd3c6e 2272 genimm_checked(imm,&armval);
57871462 2273 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2274 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2275}
2276
e2b5e7aa 2277static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2278{
2279 u_int armval;
cfbd3c6e 2280 genimm_checked(imm,&armval);
57871462 2281 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2282 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2283}
2284
e2b5e7aa 2285static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2286{
2287 u_int armval;
cfbd3c6e 2288 genimm_checked(imm,&armval);
57871462 2289 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2290 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2291}
2292
e2b5e7aa 2293static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2294{
2295 u_int armval;
cfbd3c6e 2296 genimm_checked(imm,&armval);
57871462 2297 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2298 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2299}
2300
e2b5e7aa 2301static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2302{
2303 u_int armval;
cfbd3c6e 2304 genimm_checked(imm,&armval);
b9b61529 2305 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2306 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2307}
2308
e2b5e7aa 2309static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2310{
2311 u_int armval;
cfbd3c6e 2312 genimm_checked(imm,&armval);
b9b61529 2313 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2314 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2315}
2316
e2b5e7aa 2317static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2318{
2319 u_int armval;
2320 genimm_checked(imm,&armval);
2321 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2322 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2323}
2324
e2b5e7aa 2325static void emit_jno_unlikely(int a)
57871462 2326{
2327 //emit_jno(a);
2328 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2329 output_w32(0x72800000|rd_rn_rm(15,15,0));
2330}
2331
054175e9 2332static void save_regs_all(u_int reglist)
57871462 2333{
054175e9 2334 int i;
57871462 2335 if(!reglist) return;
2336 assem_debug("stmia fp,{");
054175e9 2337 for(i=0;i<16;i++)
2338 if(reglist&(1<<i))
2339 assem_debug("r%d,",i);
57871462 2340 assem_debug("}\n");
2341 output_w32(0xe88b0000|reglist);
2342}
e2b5e7aa 2343
054175e9 2344static void restore_regs_all(u_int reglist)
57871462 2345{
054175e9 2346 int i;
57871462 2347 if(!reglist) return;
2348 assem_debug("ldmia fp,{");
054175e9 2349 for(i=0;i<16;i++)
2350 if(reglist&(1<<i))
2351 assem_debug("r%d,",i);
57871462 2352 assem_debug("}\n");
2353 output_w32(0xe89b0000|reglist);
2354}
e2b5e7aa 2355
054175e9 2356// Save registers before function call
2357static void save_regs(u_int reglist)
2358{
4d646738 2359 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2360 save_regs_all(reglist);
2361}
e2b5e7aa 2362
054175e9 2363// Restore registers after function call
2364static void restore_regs(u_int reglist)
2365{
4d646738 2366 reglist&=CALLER_SAVE_REGS;
054175e9 2367 restore_regs_all(reglist);
2368}
57871462 2369
57871462 2370/* Stubs/epilogue */
2371
e2b5e7aa 2372static void literal_pool(int n)
57871462 2373{
2374 if(!literalcount) return;
2375 if(n) {
2376 if((int)out-literals[0][0]<4096-n) return;
2377 }
2378 u_int *ptr;
2379 int i;
2380 for(i=0;i<literalcount;i++)
2381 {
77750690 2382 u_int l_addr=(u_int)out;
2383 int j;
2384 for(j=0;j<i;j++) {
2385 if(literals[j][1]==literals[i][1]) {
2386 //printf("dup %08x\n",literals[i][1]);
2387 l_addr=literals[j][0];
2388 break;
2389 }
2390 }
57871462 2391 ptr=(u_int *)literals[i][0];
77750690 2392 u_int offset=l_addr-(u_int)ptr-8;
57871462 2393 assert(offset<4096);
2394 assert(!(offset&3));
2395 *ptr|=offset;
77750690 2396 if(l_addr==(u_int)out) {
2397 literals[i][0]=l_addr; // remember for dupes
2398 output_w32(literals[i][1]);
2399 }
57871462 2400 }
2401 literalcount=0;
2402}
2403
e2b5e7aa 2404static void literal_pool_jumpover(int n)
57871462 2405{
2406 if(!literalcount) return;
2407 if(n) {
2408 if((int)out-literals[0][0]<4096-n) return;
2409 }
2410 int jaddr=(int)out;
2411 emit_jmp(0);
2412 literal_pool(0);
2413 set_jump_target(jaddr,(int)out);
2414}
2415
e2b5e7aa 2416static void emit_extjump2(u_int addr, int target, int linker)
57871462 2417{
2418 u_char *ptr=(u_char *)addr;
2419 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2420 (void)ptr;
2421
57871462 2422 emit_loadlp(target,0);
2423 emit_loadlp(addr,1);
24385cae 2424 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2425 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2426//DEBUG >
2427#ifdef DEBUG_CYCLE_COUNT
2428 emit_readword((int)&last_count,ECX);
2429 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2430 emit_readword((int)&next_interupt,ECX);
2431 emit_writeword(HOST_CCREG,(int)&Count);
2432 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2433 emit_writeword(ECX,(int)&last_count);
2434#endif
2435//DEBUG <
2436 emit_jmp(linker);
2437}
2438
e2b5e7aa 2439static void emit_extjump(int addr, int target)
57871462 2440{
2441 emit_extjump2(addr, target, (int)dyna_linker);
2442}
e2b5e7aa 2443
2444static void emit_extjump_ds(int addr, int target)
57871462 2445{
2446 emit_extjump2(addr, target, (int)dyna_linker_ds);
2447}
2448
13e35c04 2449// put rt_val into rt, potentially making use of rs with value rs_val
2450static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2451{
8575a877 2452 u_int armval;
2453 int diff;
2454 if(genimm(rt_val,&armval)) {
2455 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2456 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2457 return;
2458 }
2459 if(genimm(~rt_val,&armval)) {
2460 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2461 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2462 return;
2463 }
2464 diff=rt_val-rs_val;
2465 if(genimm(diff,&armval)) {
2466 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2467 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2468 return;
2469 }else if(genimm(-diff,&armval)) {
2470 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2471 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2472 return;
2473 }
2474 emit_movimm(rt_val,rt);
2475}
2476
2477// return 1 if above function can do it's job cheaply
2478static int is_similar_value(u_int v1,u_int v2)
2479{
13e35c04 2480 u_int xs;
8575a877 2481 int diff;
2482 if(v1==v2) return 1;
2483 diff=v2-v1;
2484 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2485 ;
8575a877 2486 if(xs<0x100) return 1;
2487 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2488 ;
2489 if(xs<0x100) return 1;
2490 return 0;
13e35c04 2491}
cbbab9cd 2492
b96d3df7 2493// trashes r2
2494static void pass_args(int a0, int a1)
2495{
2496 if(a0==1&&a1==0) {
2497 // must swap
2498 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2499 }
2500 else if(a0!=0&&a1==0) {
2501 emit_mov(a1,1);
2502 if (a0>=0) emit_mov(a0,0);
2503 }
2504 else {
2505 if(a0>=0&&a0!=0) emit_mov(a0,0);
2506 if(a1>=0&&a1!=1) emit_mov(a1,1);
2507 }
2508}
2509
b1be1eee 2510static void mov_loadtype_adj(int type,int rs,int rt)
2511{
2512 switch(type) {
2513 case LOADB_STUB: emit_signextend8(rs,rt); break;
2514 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2515 case LOADH_STUB: emit_signextend16(rs,rt); break;
2516 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2517 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2518 default: assert(0);
2519 }
2520}
2521
b1be1eee 2522#include "pcsxmem.h"
2523#include "pcsxmem_inline.c"
b1be1eee 2524
e2b5e7aa 2525static void do_readstub(int n)
57871462 2526{
2527 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2528 literal_pool(256);
2529 set_jump_target(stubs[n][1],(int)out);
2530 int type=stubs[n][0];
2531 int i=stubs[n][3];
2532 int rs=stubs[n][4];
2533 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2534 u_int reglist=stubs[n][7];
2535 signed char *i_regmap=i_regs->regmap;
2536 int addr=get_reg(i_regmap,AGEN1+(i&1));
2537 int rth,rt;
2538 int ds;
b9b61529 2539 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2540 rth=get_reg(i_regmap,FTEMP|64);
2541 rt=get_reg(i_regmap,FTEMP);
2542 }else{
2543 rth=get_reg(i_regmap,rt1[i]|64);
2544 rt=get_reg(i_regmap,rt1[i]);
2545 }
2546 assert(rs>=0);
c6c3b1b3 2547 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2548 reglist|=(1<<rs);
2549 for(r=0;r<=12;r++) {
2550 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2551 temp=r; break;
2552 }
2553 }
db829eeb 2554 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2555 reglist&=~(1<<rt);
2556 if(temp==-1) {
2557 save_regs(reglist);
2558 regs_saved=1;
2559 temp=(rs==0)?2:0;
2560 }
2561 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2562 temp2=1;
2563 emit_readword((int)&mem_rtab,temp);
2564 emit_shrimm(rs,12,temp2);
2565 emit_readword_dualindexedx4(temp,temp2,temp2);
2566 emit_lsls_imm(temp2,1,temp2);
2567 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2568 switch(type) {
2569 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2570 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2571 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2572 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2573 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2574 }
2575 }
2576 if(regs_saved) {
2577 restore_jump=(int)out;
2578 emit_jcc(0); // jump to reg restore
2579 }
2580 else
2581 emit_jcc(stubs[n][2]); // return address
2582
2583 if(!regs_saved)
2584 save_regs(reglist);
2585 int handler=0;
2586 if(type==LOADB_STUB||type==LOADBU_STUB)
2587 handler=(int)jump_handler_read8;
2588 if(type==LOADH_STUB||type==LOADHU_STUB)
2589 handler=(int)jump_handler_read16;
2590 if(type==LOADW_STUB)
2591 handler=(int)jump_handler_read32;
2592 assert(handler!=0);
b96d3df7 2593 pass_args(rs,temp2);
c6c3b1b3 2594 int cc=get_reg(i_regmap,CCREG);
2595 if(cc<0)
2596 emit_loadreg(CCREG,2);
2573466a 2597 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2598 emit_call(handler);
2599 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2600 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2601 }
2602 if(restore_jump)
2603 set_jump_target(restore_jump,(int)out);
2604 restore_regs(reglist);
2605 emit_jmp(stubs[n][2]); // return address
57871462 2606}
2607
c6c3b1b3 2608// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2609static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2610{
2611 u_int l1,l2=0;
2612 l1=((u_int *)table)[addr>>12];
2613 if((l1&(1<<31))==0) {
2614 u_int v=l1<<1;
2615 *addr_host=v+addr;
2616 return 0;
2617 }
2618 else {
2619 l1<<=1;
2620 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2621 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2622 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2623 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2624 else
2625 l2=((u_int *)l1)[(addr&0xfff)/4];
2626 if((l2&(1<<31))==0) {
2627 u_int v=l2<<1;
2628 *addr_host=v+(addr&0xfff);
2629 return 0;
2630 }
2631 return l2<<1;
2632 }
2633}
c6c3b1b3 2634
e2b5e7aa 2635static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2636{
2637 int rs=get_reg(regmap,target);
2638 int rth=get_reg(regmap,target|64);
2639 int rt=get_reg(regmap,target);
535d208a 2640 if(rs<0) rs=get_reg(regmap,-1);
57871462 2641 assert(rs>=0);
b1be1eee 2642 u_int handler,host_addr=0,is_dynamic,far_call=0;
2643 int cc=get_reg(regmap,CCREG);
2644 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2645 return;
c6c3b1b3 2646 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2647 if (handler==0) {
db829eeb 2648 if(rt<0||rt1[i]==0)
c6c3b1b3 2649 return;
13e35c04 2650 if(addr!=host_addr)
2651 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2652 switch(type) {
2653 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2654 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2655 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2656 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2657 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2658 default: assert(0);
2659 }
2660 return;
2661 }
b1be1eee 2662 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2663 if(is_dynamic) {
2664 if(type==LOADB_STUB||type==LOADBU_STUB)
2665 handler=(int)jump_handler_read8;
2666 if(type==LOADH_STUB||type==LOADHU_STUB)
2667 handler=(int)jump_handler_read16;
2668 if(type==LOADW_STUB)
2669 handler=(int)jump_handler_read32;
2670 }
c6c3b1b3 2671
2672 // call a memhandler
db829eeb 2673 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2674 reglist&=~(1<<rt);
2675 save_regs(reglist);
2676 if(target==0)
2677 emit_movimm(addr,0);
2678 else if(rs!=0)
2679 emit_mov(rs,0);
c6c3b1b3 2680 int offset=(int)handler-(int)out-8;
2681 if(offset<-33554432||offset>=33554432) {
2682 // unreachable memhandler, a plugin func perhaps
b1be1eee 2683 emit_movimm(handler,12);
2684 far_call=1;
2685 }
2686 if(cc<0)
2687 emit_loadreg(CCREG,2);
2688 if(is_dynamic) {
2689 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2690 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2691 }
b1be1eee 2692 else {
2693 emit_readword((int)&last_count,3);
2694 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2695 emit_add(2,3,2);
2696 emit_writeword(2,(int)&Count);
2697 }
2698
2699 if(far_call)
2700 emit_callreg(12);
c6c3b1b3 2701 else
2702 emit_call(handler);
b1be1eee 2703
db829eeb 2704 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2705 switch(type) {
2706 case LOADB_STUB: emit_signextend8(0,rt); break;
2707 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2708 case LOADH_STUB: emit_signextend16(0,rt); break;
2709 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2710 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2711 default: assert(0);
2712 }
2713 }
2714 restore_regs(reglist);
57871462 2715}
2716
e2b5e7aa 2717static void do_writestub(int n)
57871462 2718{
2719 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2720 literal_pool(256);
2721 set_jump_target(stubs[n][1],(int)out);
2722 int type=stubs[n][0];
2723 int i=stubs[n][3];
2724 int rs=stubs[n][4];
2725 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2726 u_int reglist=stubs[n][7];
2727 signed char *i_regmap=i_regs->regmap;
2728 int addr=get_reg(i_regmap,AGEN1+(i&1));
2729 int rth,rt,r;
2730 int ds;
b9b61529 2731 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2732 rth=get_reg(i_regmap,FTEMP|64);
2733 rt=get_reg(i_regmap,r=FTEMP);
2734 }else{
2735 rth=get_reg(i_regmap,rs2[i]|64);
2736 rt=get_reg(i_regmap,r=rs2[i]);
2737 }
2738 assert(rs>=0);
2739 assert(rt>=0);
b96d3df7 2740 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2741 int reglist2=reglist|(1<<rs)|(1<<rt);
2742 for(rtmp=0;rtmp<=12;rtmp++) {
2743 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2744 temp=rtmp; break;
2745 }
2746 }
2747 if(temp==-1) {
2748 save_regs(reglist);
2749 regs_saved=1;
2750 for(rtmp=0;rtmp<=3;rtmp++)
2751 if(rtmp!=rs&&rtmp!=rt)
2752 {temp=rtmp;break;}
2753 }
2754 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2755 temp2=3;
2756 emit_readword((int)&mem_wtab,temp);
2757 emit_shrimm(rs,12,temp2);
2758 emit_readword_dualindexedx4(temp,temp2,temp2);
2759 emit_lsls_imm(temp2,1,temp2);
2760 switch(type) {
2761 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2762 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2763 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2764 default: assert(0);
2765 }
2766 if(regs_saved) {
2767 restore_jump=(int)out;
2768 emit_jcc(0); // jump to reg restore
2769 }
2770 else
2771 emit_jcc(stubs[n][2]); // return address (invcode check)
2772
2773 if(!regs_saved)
2774 save_regs(reglist);
2775 int handler=0;
2776 switch(type) {
2777 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2778 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2779 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2780 }
2781 assert(handler!=0);
2782 pass_args(rs,rt);
2783 if(temp2!=3)
2784 emit_mov(temp2,3);
2785 int cc=get_reg(i_regmap,CCREG);
2786 if(cc<0)
2787 emit_loadreg(CCREG,2);
2573466a 2788 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2789 // returns new cycle_count
2790 emit_call(handler);
2573466a 2791 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2792 if(cc<0)
2793 emit_storereg(CCREG,2);
2794 if(restore_jump)
2795 set_jump_target(restore_jump,(int)out);
2796 restore_regs(reglist);
2797 ra=stubs[n][2];
b96d3df7 2798 emit_jmp(ra);
57871462 2799}
2800
e2b5e7aa 2801static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2802{
2803 int rs=get_reg(regmap,-1);
2804 int rth=get_reg(regmap,target|64);
2805 int rt=get_reg(regmap,target);
2806 assert(rs>=0);
2807 assert(rt>=0);
b96d3df7 2808 u_int handler,host_addr=0;
b96d3df7 2809 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2810 if (handler==0) {
13e35c04 2811 if(addr!=host_addr)
2812 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2813 switch(type) {
2814 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2815 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2816 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2817 default: assert(0);
2818 }
2819 return;
2820 }
2821
2822 // call a memhandler
2823 save_regs(reglist);
13e35c04 2824 pass_args(rs,rt);
b96d3df7 2825 int cc=get_reg(regmap,CCREG);
2826 if(cc<0)
2827 emit_loadreg(CCREG,2);
2573466a 2828 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2829 emit_movimm(handler,3);
2830 // returns new cycle_count
2831 emit_call((int)jump_handler_write_h);
2573466a 2832 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2833 if(cc<0)
2834 emit_storereg(CCREG,2);
2835 restore_regs(reglist);
57871462 2836}
2837
e2b5e7aa 2838static void do_unalignedwritestub(int n)
57871462 2839{
b7918751 2840 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2841 literal_pool(256);
57871462 2842 set_jump_target(stubs[n][1],(int)out);
b7918751 2843
2844 int i=stubs[n][3];
2845 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2846 int addr=stubs[n][5];
2847 u_int reglist=stubs[n][7];
2848 signed char *i_regmap=i_regs->regmap;
2849 int temp2=get_reg(i_regmap,FTEMP);
2850 int rt;
2851 int ds, real_rs;
2852 rt=get_reg(i_regmap,rs2[i]);
2853 assert(rt>=0);
2854 assert(addr>=0);
2855 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2856 reglist|=(1<<addr);
2857 reglist&=~(1<<temp2);
2858
b96d3df7 2859#if 1
2860 // don't bother with it and call write handler
2861 save_regs(reglist);
2862 pass_args(addr,rt);
2863 int cc=get_reg(i_regmap,CCREG);
2864 if(cc<0)
2865 emit_loadreg(CCREG,2);
2573466a 2866 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2867 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2868 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2869 if(cc<0)
2870 emit_storereg(CCREG,2);
2871 restore_regs(reglist);
2872 emit_jmp(stubs[n][2]); // return address
2873#else
b7918751 2874 emit_andimm(addr,0xfffffffc,temp2);
2875 emit_writeword(temp2,(int)&address);
2876
2877 save_regs(reglist);
b7918751 2878 emit_shrimm(addr,16,1);
2879 int cc=get_reg(i_regmap,CCREG);
2880 if(cc<0) {
2881 emit_loadreg(CCREG,2);
2882 }
2883 emit_movimm((u_int)readmem,0);
2884 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2885 emit_call((int)&indirect_jump_indexed);
2886 restore_regs(reglist);
2887
2888 emit_readword((int)&readmem_dword,temp2);
2889 int temp=addr; //hmh
2890 emit_shlimm(addr,3,temp);
2891 emit_andimm(temp,24,temp);
2892#ifdef BIG_ENDIAN_MIPS
2893 if (opcode[i]==0x2e) // SWR
2894#else
2895 if (opcode[i]==0x2a) // SWL
2896#endif
2897 emit_xorimm(temp,24,temp);
2898 emit_movimm(-1,HOST_TEMPREG);
55439448 2899 if (opcode[i]==0x2a) { // SWL
b7918751 2900 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2901 emit_orrshr(rt,temp,temp2);
2902 }else{
2903 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2904 emit_orrshl(rt,temp,temp2);
2905 }
2906 emit_readword((int)&address,addr);
2907 emit_writeword(temp2,(int)&word);
2908 //save_regs(reglist); // don't need to, no state changes
2909 emit_shrimm(addr,16,1);
2910 emit_movimm((u_int)writemem,0);
2911 //emit_call((int)&indirect_jump_indexed);
2912 emit_mov(15,14);
2913 emit_readword_dualindexedx4(0,1,15);
2914 emit_readword((int)&Count,HOST_TEMPREG);
2915 emit_readword((int)&next_interupt,2);
2916 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2917 emit_writeword(2,(int)&last_count);
2918 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2919 if(cc<0) {
2920 emit_storereg(CCREG,HOST_TEMPREG);
2921 }
2922 restore_regs(reglist);
57871462 2923 emit_jmp(stubs[n][2]); // return address
b96d3df7 2924#endif
57871462 2925}
2926
e2b5e7aa 2927static void do_invstub(int n)
57871462 2928{
2929 literal_pool(20);
2930 u_int reglist=stubs[n][3];
2931 set_jump_target(stubs[n][1],(int)out);
2932 save_regs(reglist);
2933 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2934 emit_call((int)&invalidate_addr);
2935 restore_regs(reglist);
2936 emit_jmp(stubs[n][2]); // return address
2937}
2938
2939int do_dirty_stub(int i)
2940{
2941 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2942 u_int addr=(u_int)source;
57871462 2943 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2944 #ifndef HAVE_ARMV7
ac545b3a 2945 emit_loadlp(addr,1);
57871462 2946 emit_loadlp((int)copy,2);
2947 emit_loadlp(slen*4,3);
2948 #else
ac545b3a 2949 emit_movw(addr&0x0000FFFF,1);
57871462 2950 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2951 emit_movt(addr&0xFFFF0000,1);
57871462 2952 emit_movt(((u_int)copy)&0xFFFF0000,2);
2953 emit_movw(slen*4,3);
2954 #endif
2955 emit_movimm(start+i*4,0);
2956 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2957 int entry=(int)out;
2958 load_regs_entry(i);
2959 if(entry==(int)out) entry=instr_addr[i];
2960 emit_jmp(instr_addr[i]);
2961 return entry;
2962}
2963
e2b5e7aa 2964static void do_dirty_stub_ds()
57871462 2965{
2966 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2967 #ifndef HAVE_ARMV7
57871462 2968 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2969 emit_loadlp((int)copy,2);
2970 emit_loadlp(slen*4,3);
2971 #else
2972 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2973 emit_movw(((u_int)copy)&0x0000FFFF,2);
2974 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2975 emit_movt(((u_int)copy)&0xFFFF0000,2);
2976 emit_movw(slen*4,3);
2977 #endif
2978 emit_movimm(start+1,0);
2979 emit_call((int)&verify_code_ds);
2980}
2981
e2b5e7aa 2982static void do_cop1stub(int n)
57871462 2983{
2984 literal_pool(256);
2985 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2986 set_jump_target(stubs[n][1],(int)out);
2987 int i=stubs[n][3];
3d624f89 2988// int rs=stubs[n][4];
57871462 2989 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2990 int ds=stubs[n][6];
2991 if(!ds) {
2992 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2993 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2994 }
2995 //else {printf("fp exception in delay slot\n");}
2996 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2997 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2998 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2999 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3000 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3001}
3002
57871462 3003/* Special assem */
3004
e2b5e7aa 3005static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 3006{
3007 if(rt1[i]) {
3008 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3009 {
3010 signed char s,t,shift;
3011 t=get_reg(i_regs->regmap,rt1[i]);
3012 s=get_reg(i_regs->regmap,rs1[i]);
3013 shift=get_reg(i_regs->regmap,rs2[i]);
3014 if(t>=0){
3015 if(rs1[i]==0)
3016 {
3017 emit_zeroreg(t);
3018 }
3019 else if(rs2[i]==0)
3020 {
3021 assert(s>=0);
3022 if(s!=t) emit_mov(s,t);
3023 }
3024 else
3025 {
3026 emit_andimm(shift,31,HOST_TEMPREG);
3027 if(opcode2[i]==4) // SLLV
3028 {
3029 emit_shl(s,HOST_TEMPREG,t);
3030 }
3031 if(opcode2[i]==6) // SRLV
3032 {
3033 emit_shr(s,HOST_TEMPREG,t);
3034 }
3035 if(opcode2[i]==7) // SRAV
3036 {
3037 emit_sar(s,HOST_TEMPREG,t);
3038 }
3039 }
3040 }
3041 } else { // DSLLV/DSRLV/DSRAV
3042 signed char sh,sl,th,tl,shift;
3043 th=get_reg(i_regs->regmap,rt1[i]|64);
3044 tl=get_reg(i_regs->regmap,rt1[i]);
3045 sh=get_reg(i_regs->regmap,rs1[i]|64);
3046 sl=get_reg(i_regs->regmap,rs1[i]);
3047 shift=get_reg(i_regs->regmap,rs2[i]);
3048 if(tl>=0){
3049 if(rs1[i]==0)
3050 {
3051 emit_zeroreg(tl);
3052 if(th>=0) emit_zeroreg(th);
3053 }
3054 else if(rs2[i]==0)
3055 {
3056 assert(sl>=0);
3057 if(sl!=tl) emit_mov(sl,tl);
3058 if(th>=0&&sh!=th) emit_mov(sh,th);
3059 }
3060 else
3061 {
3062 // FIXME: What if shift==tl ?
3063 assert(shift!=tl);
3064 int temp=get_reg(i_regs->regmap,-1);
3065 int real_th=th;
3066 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3067 assert(sl>=0);
3068 assert(sh>=0);
3069 emit_andimm(shift,31,HOST_TEMPREG);
3070 if(opcode2[i]==0x14) // DSLLV
3071 {
3072 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3073 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3074 emit_orrshr(sl,HOST_TEMPREG,th);
3075 emit_andimm(shift,31,HOST_TEMPREG);
3076 emit_testimm(shift,32);
3077 emit_shl(sl,HOST_TEMPREG,tl);
3078 if(th>=0) emit_cmovne_reg(tl,th);
3079 emit_cmovne_imm(0,tl);
3080 }
3081 if(opcode2[i]==0x16) // DSRLV
3082 {
3083 assert(th>=0);
3084 emit_shr(sl,HOST_TEMPREG,tl);
3085 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3086 emit_orrshl(sh,HOST_TEMPREG,tl);
3087 emit_andimm(shift,31,HOST_TEMPREG);
3088 emit_testimm(shift,32);
3089 emit_shr(sh,HOST_TEMPREG,th);
3090 emit_cmovne_reg(th,tl);
3091 if(real_th>=0) emit_cmovne_imm(0,th);
3092 }
3093 if(opcode2[i]==0x17) // DSRAV
3094 {
3095 assert(th>=0);
3096 emit_shr(sl,HOST_TEMPREG,tl);
3097 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3098 if(real_th>=0) {
3099 assert(temp>=0);
3100 emit_sarimm(th,31,temp);
3101 }
3102 emit_orrshl(sh,HOST_TEMPREG,tl);
3103 emit_andimm(shift,31,HOST_TEMPREG);
3104 emit_testimm(shift,32);
3105 emit_sar(sh,HOST_TEMPREG,th);
3106 emit_cmovne_reg(th,tl);
3107 if(real_th>=0) emit_cmovne_reg(temp,th);
3108 }
3109 }
3110 }
3111 }
3112 }
3113}
ffb0b9e0 3114
ffb0b9e0 3115static void speculate_mov(int rs,int rt)
3116{
3117 if(rt!=0) {
3118 smrv_strong_next|=1<<rt;
3119 smrv[rt]=smrv[rs];
3120 }
3121}
3122
3123static void speculate_mov_weak(int rs,int rt)
3124{
3125 if(rt!=0) {
3126 smrv_weak_next|=1<<rt;
3127 smrv[rt]=smrv[rs];
3128 }
3129}
3130
3131static void speculate_register_values(int i)
3132{
3133 if(i==0) {
3134 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3135 // gp,sp are likely to stay the same throughout the block
3136 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3137 smrv_weak_next=~smrv_strong_next;
3138 //printf(" llr %08x\n", smrv[4]);
3139 }
3140 smrv_strong=smrv_strong_next;
3141 smrv_weak=smrv_weak_next;
3142 switch(itype[i]) {
3143 case ALU:
3144 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3145 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3146 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3147 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3148 else {
3149 smrv_strong_next&=~(1<<rt1[i]);
3150 smrv_weak_next&=~(1<<rt1[i]);
3151 }
3152 break;
3153 case SHIFTIMM:
3154 smrv_strong_next&=~(1<<rt1[i]);
3155 smrv_weak_next&=~(1<<rt1[i]);
3156 // fallthrough
3157 case IMM16:
3158 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3159 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3160 if(hr>=0) {
3161 if(get_final_value(hr,i,&value))
3162 smrv[rt1[i]]=value;
3163 else smrv[rt1[i]]=constmap[i][hr];
3164 smrv_strong_next|=1<<rt1[i];
3165 }
3166 }
3167 else {
3168 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3169 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3170 }
3171 break;
3172 case LOAD:
3173 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3174 // special case for BIOS
3175 smrv[rt1[i]]=0xa0000000;
3176 smrv_strong_next|=1<<rt1[i];
3177 break;
3178 }
3179 // fallthrough
3180 case SHIFT:
3181 case LOADLR:
3182 case MOV:
3183 smrv_strong_next&=~(1<<rt1[i]);
3184 smrv_weak_next&=~(1<<rt1[i]);
3185 break;
3186 case COP0:
3187 case COP2:
3188 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3189 smrv_strong_next&=~(1<<rt1[i]);
3190 smrv_weak_next&=~(1<<rt1[i]);
3191 }
3192 break;
3193 case C2LS:
3194 if (opcode[i]==0x32) { // LWC2
3195 smrv_strong_next&=~(1<<rt1[i]);
3196 smrv_weak_next&=~(1<<rt1[i]);
3197 }
3198 break;
3199 }
3200#if 0
3201 int r=4;
3202 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3203 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3204#endif
3205}
3206
3207enum {
3208 MTYPE_8000 = 0,
3209 MTYPE_8020,
3210 MTYPE_0000,
3211 MTYPE_A000,
3212 MTYPE_1F80,
3213};
3214
3215static int get_ptr_mem_type(u_int a)
3216{
3217 if(a < 0x00200000) {
3218 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3219 // return wrong, must use memhandler for BIOS self-test to pass
3220 // 007 does similar stuff from a00 mirror, weird stuff
3221 return MTYPE_8000;
3222 return MTYPE_0000;
3223 }
3224 if(0x1f800000 <= a && a < 0x1f801000)
3225 return MTYPE_1F80;
3226 if(0x80200000 <= a && a < 0x80800000)
3227 return MTYPE_8020;
3228 if(0xa0000000 <= a && a < 0xa0200000)
3229 return MTYPE_A000;
3230 return MTYPE_8000;
3231}
ffb0b9e0 3232
3233static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3234{
3235 int jaddr,type=0;
ffb0b9e0 3236 int mr=rs1[i];
3237 if(((smrv_strong|smrv_weak)>>mr)&1) {
3238 type=get_ptr_mem_type(smrv[mr]);
3239 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3240 }
3241 else {
3242 // use the mirror we are running on
3243 type=get_ptr_mem_type(start);
3244 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3245 }
3246
3247 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3248 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3249 addr=*addr_reg_override=HOST_TEMPREG;
3250 type=0;
3251 }
3252 else if(type==MTYPE_0000) { // RAM 0 mirror
3253 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3254 addr=*addr_reg_override=HOST_TEMPREG;
3255 type=0;
3256 }
3257 else if(type==MTYPE_A000) { // RAM A mirror
3258 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3259 addr=*addr_reg_override=HOST_TEMPREG;
3260 type=0;
3261 }
3262 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3263 if (psxH == (void *)0x1f800000) {
3264 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3265 emit_cmpimm(HOST_TEMPREG,0x1000);
3266 jaddr=(int)out;
3267 emit_jc(0);
3268 }
3269 else {
3270 // do usual RAM check, jump will go to the right handler
3271 type=0;
3272 }
ffb0b9e0 3273 }
ffb0b9e0 3274
3275 if(type==0)
3276 {
3277 emit_cmpimm(addr,RAM_SIZE);
3278 jaddr=(int)out;
3279 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3280 // Hint to branch predictor that the branch is unlikely to be taken
3281 if(rs1[i]>=28)
3282 emit_jno_unlikely(0);
3283 else
3284 #endif
3285 emit_jno(0);
a327ad27 3286 if(ram_offset!=0) {
3287 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3288 addr=*addr_reg_override=HOST_TEMPREG;
3289 }
ffb0b9e0 3290 }
3291
3292 return jaddr;
3293}
3294
57871462 3295#define shift_assemble shift_assemble_arm
3296
e2b5e7aa 3297static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3298{
3299 int s,th,tl,temp,temp2,addr,map=-1;
3300 int offset;
3301 int jaddr=0;
af4ee1fe 3302 int memtarget=0,c=0;
ffb0b9e0 3303 int fastload_reg_override=0;
57871462 3304 u_int hr,reglist=0;
3305 th=get_reg(i_regs->regmap,rt1[i]|64);
3306 tl=get_reg(i_regs->regmap,rt1[i]);
3307 s=get_reg(i_regs->regmap,rs1[i]);
3308 temp=get_reg(i_regs->regmap,-1);
3309 temp2=get_reg(i_regs->regmap,FTEMP);
3310 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3311 assert(addr<0);
3312 offset=imm[i];
3313 for(hr=0;hr<HOST_REGS;hr++) {
3314 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3315 }
3316 reglist|=1<<temp;
3317 if(offset||s<0||c) addr=temp2;
3318 else addr=s;
3319 if(s>=0) {
3320 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3321 if(c) {
3322 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3323 }
57871462 3324 }
1edfcc68 3325 if(!c) {
3326 #ifdef RAM_OFFSET
3327 map=get_reg(i_regs->regmap,ROREG);
3328 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3329 #endif
3330 emit_shlimm(addr,3,temp);
3331 if (opcode[i]==0x22||opcode[i]==0x26) {
3332 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3333 }else{
3334 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3335 }
1edfcc68 3336 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3337 }
3338 else {
3339 if(ram_offset&&memtarget) {
3340 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3341 fastload_reg_override=HOST_TEMPREG;
57871462 3342 }
1edfcc68 3343 if (opcode[i]==0x22||opcode[i]==0x26) {
3344 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3345 }else{
1edfcc68 3346 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3347 }
535d208a 3348 }
3349 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3350 if(!c||memtarget) {
ffb0b9e0 3351 int a=temp2;
3352 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3353 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3354 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3355 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3356 }
3357 else
3358 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3359 if(rt1[i]) {
3360 assert(tl>=0);
57871462 3361 emit_andimm(temp,24,temp);
2002a1db 3362#ifdef BIG_ENDIAN_MIPS
3363 if (opcode[i]==0x26) // LWR
3364#else
3365 if (opcode[i]==0x22) // LWL
3366#endif
3367 emit_xorimm(temp,24,temp);
57871462 3368 emit_movimm(-1,HOST_TEMPREG);
3369 if (opcode[i]==0x26) {
3370 emit_shr(temp2,temp,temp2);
3371 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3372 }else{
3373 emit_shl(temp2,temp,temp2);
3374 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3375 }
3376 emit_or(temp2,tl,tl);
57871462 3377 }
535d208a 3378 //emit_storereg(rt1[i],tl); // DEBUG
3379 }
3380 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3381 // FIXME: little endian, fastload_reg_override
535d208a 3382 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3383 if(!c||memtarget) {
3384 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3385 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3386 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3387 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3388 }
3389 else
3390 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3391 if(rt1[i]) {
3392 assert(th>=0);
3393 assert(tl>=0);
57871462 3394 emit_testimm(temp,32);
3395 emit_andimm(temp,24,temp);
3396 if (opcode[i]==0x1A) { // LDL
3397 emit_rsbimm(temp,32,HOST_TEMPREG);
3398 emit_shl(temp2h,temp,temp2h);
3399 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3400 emit_movimm(-1,HOST_TEMPREG);
3401 emit_shl(temp2,temp,temp2);
3402 emit_cmove_reg(temp2h,th);
3403 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3404 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3405 emit_orreq(temp2,tl,tl);
3406 emit_orrne(temp2,th,th);
3407 }
3408 if (opcode[i]==0x1B) { // LDR
3409 emit_xorimm(temp,24,temp);
3410 emit_rsbimm(temp,32,HOST_TEMPREG);
3411 emit_shr(temp2,temp,temp2);
3412 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3413 emit_movimm(-1,HOST_TEMPREG);
3414 emit_shr(temp2h,temp,temp2h);
3415 emit_cmovne_reg(temp2,tl);
3416 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3417 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3418 emit_orrne(temp2h,th,th);
3419 emit_orreq(temp2h,tl,tl);
3420 }
3421 }
3422 }
3423}
3424#define loadlr_assemble loadlr_assemble_arm
3425
e2b5e7aa 3426static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3427{
3428 if(opcode2[i]==0) // MFC0
3429 {
3430 signed char t=get_reg(i_regs->regmap,rt1[i]);
3431 char copr=(source[i]>>11)&0x1f;
3432 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3433 if(t>=0&&rt1[i]!=0) {
7139f3c8 3434 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3435 }
3436 }
3437 else if(opcode2[i]==4) // MTC0
3438 {
3439 signed char s=get_reg(i_regs->regmap,rs1[i]);
3440 char copr=(source[i]>>11)&0x1f;
3441 assert(s>=0);
63cb0298 3442 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3443 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3444 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3445 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3446 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3447 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3448 emit_writeword(HOST_CCREG,(int)&Count);
3449 }
3450 // What a mess. The status register (12) can enable interrupts,
3451 // so needs a special case to handle a pending interrupt.
3452 // The interrupt must be taken immediately, because a subsequent
3453 // instruction might disable interrupts again.
7139f3c8 3454 if(copr==12||copr==13) {
fca1aef2 3455 if (is_delayslot) {
3456 // burn cycles to cause cc_interrupt, which will
3457 // reschedule next_interupt. Relies on CCREG from above.
3458 assem_debug("MTC0 DS %d\n", copr);
3459 emit_writeword(HOST_CCREG,(int)&last_count);
3460 emit_movimm(0,HOST_CCREG);
3461 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3462 emit_loadreg(rs1[i],1);
fca1aef2 3463 emit_movimm(copr,0);
3464 emit_call((int)pcsx_mtc0_ds);
042c7287 3465 emit_loadreg(rs1[i],s);
fca1aef2 3466 return;
3467 }
63cb0298 3468 emit_movimm(start+i*4+4,HOST_TEMPREG);
3469 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3470 emit_movimm(0,HOST_TEMPREG);
3471 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3472 }
3473 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3474 //else
caeefe31 3475 if(s==HOST_CCREG)
3476 emit_loadreg(rs1[i],1);
3477 else if(s!=1)
63cb0298 3478 emit_mov(s,1);
fca1aef2 3479 emit_movimm(copr,0);
3480 emit_call((int)pcsx_mtc0);
7139f3c8 3481 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3482 emit_readword((int)&Count,HOST_CCREG);
042c7287 3483 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3484 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3485 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3486 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3487 emit_storereg(CCREG,HOST_CCREG);
3488 }
7139f3c8 3489 if(copr==12||copr==13) {
57871462 3490 assert(!is_delayslot);
3491 emit_readword((int)&pending_exception,14);
042c7287 3492 emit_test(14,14);
3493 emit_jne((int)&do_interrupt);
57871462 3494 }
3495 emit_loadreg(rs1[i],s);
3496 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3497 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3498 cop1_usable=0;
3499 }
3500 else
3501 {
3502 assert(opcode2[i]==0x10);
576bbd8f 3503 if((source[i]&0x3f)==0x10) // RFE
3504 {
3505 emit_readword((int)&Status,0);
3506 emit_andimm(0,0x3c,1);
3507 emit_andimm(0,~0xf,0);
3508 emit_orrshr_imm(1,2,0);
3509 emit_writeword(0,(int)&Status);
3510 }
57871462 3511 }
3512}
3513
b9b61529 3514static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3515{
3516 switch (copr) {
3517 case 1:
3518 case 3:
3519 case 5:
3520 case 8:
3521 case 9:
3522 case 10:
3523 case 11:
3524 emit_readword((int)&reg_cop2d[copr],tl);
3525 emit_signextend16(tl,tl);
3526 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3527 break;
3528 case 7:
3529 case 16:
3530 case 17:
3531 case 18:
3532 case 19:
3533 emit_readword((int)&reg_cop2d[copr],tl);
3534 emit_andimm(tl,0xffff,tl);
3535 emit_writeword(tl,(int)&reg_cop2d[copr]);
3536 break;
3537 case 15:
3538 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3539 emit_writeword(tl,(int)&reg_cop2d[copr]);
3540 break;
3541 case 28:
b9b61529 3542 case 29:
3543 emit_readword((int)&reg_cop2d[9],temp);
3544 emit_testimm(temp,0x8000); // do we need this?
3545 emit_andimm(temp,0xf80,temp);
3546 emit_andne_imm(temp,0,temp);
f70d384d 3547 emit_shrimm(temp,7,tl);
b9b61529 3548 emit_readword((int)&reg_cop2d[10],temp);
3549 emit_testimm(temp,0x8000);
3550 emit_andimm(temp,0xf80,temp);
3551 emit_andne_imm(temp,0,temp);
f70d384d 3552 emit_orrshr_imm(temp,2,tl);
b9b61529 3553 emit_readword((int)&reg_cop2d[11],temp);
3554 emit_testimm(temp,0x8000);
3555 emit_andimm(temp,0xf80,temp);
3556 emit_andne_imm(temp,0,temp);
f70d384d 3557 emit_orrshl_imm(temp,3,tl);
b9b61529 3558 emit_writeword(tl,(int)&reg_cop2d[copr]);
3559 break;
3560 default:
3561 emit_readword((int)&reg_cop2d[copr],tl);
3562 break;
3563 }
3564}
3565
3566static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3567{
3568 switch (copr) {
3569 case 15:
3570 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3571 emit_writeword(sl,(int)&reg_cop2d[copr]);
3572 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3573 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3574 emit_writeword(sl,(int)&reg_cop2d[14]);
3575 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3576 break;
3577 case 28:
3578 emit_andimm(sl,0x001f,temp);
f70d384d 3579 emit_shlimm(temp,7,temp);
b9b61529 3580 emit_writeword(temp,(int)&reg_cop2d[9]);
3581 emit_andimm(sl,0x03e0,temp);
f70d384d 3582 emit_shlimm(temp,2,temp);
b9b61529 3583 emit_writeword(temp,(int)&reg_cop2d[10]);
3584 emit_andimm(sl,0x7c00,temp);
f70d384d 3585 emit_shrimm(temp,3,temp);
b9b61529 3586 emit_writeword(temp,(int)&reg_cop2d[11]);
3587 emit_writeword(sl,(int)&reg_cop2d[28]);
3588 break;
3589 case 30:
3590 emit_movs(sl,temp);
3591 emit_mvnmi(temp,temp);
665f33e1 3592#ifdef HAVE_ARMV5
b9b61529 3593 emit_clz(temp,temp);
665f33e1 3594#else
3595 emit_movs(temp,HOST_TEMPREG);
3596 emit_movimm(0,temp);
3597 emit_jeq((int)out+4*4);
3598 emit_addpl_imm(temp,1,temp);
3599 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3600 emit_jns((int)out-2*4);
3601#endif
b9b61529 3602 emit_writeword(sl,(int)&reg_cop2d[30]);
3603 emit_writeword(temp,(int)&reg_cop2d[31]);
3604 break;
b9b61529 3605 case 31:
3606 break;
3607 default:
3608 emit_writeword(sl,(int)&reg_cop2d[copr]);
3609 break;
3610 }
3611}
3612
e2b5e7aa 3613static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3614{
3615 u_int copr=(source[i]>>11)&0x1f;
3616 signed char temp=get_reg(i_regs->regmap,-1);
3617 if (opcode2[i]==0) { // MFC2
3618 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3619 if(tl>=0&&rt1[i]!=0)
b9b61529 3620 cop2_get_dreg(copr,tl,temp);
3621 }
3622 else if (opcode2[i]==4) { // MTC2
3623 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3624 cop2_put_dreg(copr,sl,temp);
3625 }
3626 else if (opcode2[i]==2) // CFC2
3627 {
3628 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3629 if(tl>=0&&rt1[i]!=0)
b9b61529 3630 emit_readword((int)&reg_cop2c[copr],tl);
3631 }
3632 else if (opcode2[i]==6) // CTC2
3633 {
3634 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3635 switch(copr) {
3636 case 4:
3637 case 12:
3638 case 20:
3639 case 26:
3640 case 27:
3641 case 29:
3642 case 30:
3643 emit_signextend16(sl,temp);
3644 break;
3645 case 31:
3646 //value = value & 0x7ffff000;
3647 //if (value & 0x7f87e000) value |= 0x80000000;
3648 emit_shrimm(sl,12,temp);
3649 emit_shlimm(temp,12,temp);
3650 emit_testimm(temp,0x7f000000);
3651 emit_testeqimm(temp,0x00870000);
3652 emit_testeqimm(temp,0x0000e000);
3653 emit_orrne_imm(temp,0x80000000,temp);
3654 break;
3655 default:
3656 temp=sl;
3657 break;
3658 }
3659 emit_writeword(temp,(int)&reg_cop2c[copr]);
3660 assert(sl>=0);
3661 }
3662}
3663
054175e9 3664static void c2op_prologue(u_int op,u_int reglist)
3665{
3666 save_regs_all(reglist);
82ed88eb 3667#ifdef PCNT
3668 emit_movimm(op,0);
3669 emit_call((int)pcnt_gte_start);
3670#endif
054175e9 3671 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3672}
3673
3674static void c2op_epilogue(u_int op,u_int reglist)
3675{
82ed88eb 3676#ifdef PCNT
3677 emit_movimm(op,0);
3678 emit_call((int)pcnt_gte_end);
3679#endif
054175e9 3680 restore_regs_all(reglist);
3681}
3682
6c0eefaf 3683static void c2op_call_MACtoIR(int lm,int need_flags)
3684{
3685 if(need_flags)
3686 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3687 else
3688 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3689}
3690
3691static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3692{
3693 emit_call((int)func);
3694 // func is C code and trashes r0
3695 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3696 if(need_flags||need_ir)
3697 c2op_call_MACtoIR(lm,need_flags);
3698 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3699}
3700
054175e9 3701static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3702{
3703 signed char temp=get_reg(i_regs->regmap,-1);
3704 u_int c2op=source[i]&0x3f;
6c0eefaf 3705 u_int hr,reglist_full=0,reglist;
054175e9 3706 int need_flags,need_ir;
b9b61529 3707 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3708 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3709 }
4d646738 3710 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3711
3712 if (gte_handlers[c2op]!=NULL) {
bedfea38 3713 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3714 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3715 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3716 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3717 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3718 need_flags=0;
6c0eefaf 3719 int shift = (source[i] >> 19) & 1;
3720 int lm = (source[i] >> 10) & 1;
054175e9 3721 switch(c2op) {
19776aef 3722#ifndef DRC_DBG
054175e9 3723 case GTE_MVMVA: {
82336ba3 3724#ifdef HAVE_ARMV5
054175e9 3725 int v = (source[i] >> 15) & 3;
3726 int cv = (source[i] >> 13) & 3;
3727 int mx = (source[i] >> 17) & 3;
4d646738 3728 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3729 c2op_prologue(c2op,reglist);
3730 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3731 if(v<3)
3732 emit_ldrd(v*8,0,4);
3733 else {
3734 emit_movzwl_indexed(9*4,0,4); // gteIR
3735 emit_movzwl_indexed(10*4,0,6);
3736 emit_movzwl_indexed(11*4,0,5);
3737 emit_orrshl_imm(6,16,4);
3738 }
3739 if(mx<3)
3740 emit_addimm(0,32*4+mx*8*4,6);
3741 else
3742 emit_readword((int)&zeromem_ptr,6);
3743 if(cv<3)
3744 emit_addimm(0,32*4+(cv*8+5)*4,7);
3745 else
3746 emit_readword((int)&zeromem_ptr,7);
3747#ifdef __ARM_NEON__
3748 emit_movimm(source[i],1); // opcode
3749 emit_call((int)gteMVMVA_part_neon);
3750 if(need_flags) {
3751 emit_movimm(lm,1);
3752 emit_call((int)gteMACtoIR_flags_neon);
3753 }
3754#else
3755 if(cv==3&&shift)
3756 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3757 else {
3758 emit_movimm(shift,1);
3759 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3760 }
6c0eefaf 3761 if(need_flags||need_ir)
3762 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3763#endif
3764#else /* if not HAVE_ARMV5 */
3765 c2op_prologue(c2op,reglist);
3766 emit_movimm(source[i],1); // opcode
3767 emit_writeword(1,(int)&psxRegs.code);
3768 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3769#endif
3770 break;
3771 }
6c0eefaf 3772 case GTE_OP:
3773 c2op_prologue(c2op,reglist);
3774 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3775 if(need_flags||need_ir) {
3776 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3777 c2op_call_MACtoIR(lm,need_flags);
3778 }
3779 break;
3780 case GTE_DPCS:
3781 c2op_prologue(c2op,reglist);
3782 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3783 break;
3784 case GTE_INTPL:
3785 c2op_prologue(c2op,reglist);
3786 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3787 break;
3788 case GTE_SQR:
3789 c2op_prologue(c2op,reglist);
3790 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3791 if(need_flags||need_ir) {
3792 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3793 c2op_call_MACtoIR(lm,need_flags);
3794 }
3795 break;
3796 case GTE_DCPL:
3797 c2op_prologue(c2op,reglist);
3798 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3799 break;
3800 case GTE_GPF:
3801 c2op_prologue(c2op,reglist);
3802 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3803 break;
3804 case GTE_GPL:
3805 c2op_prologue(c2op,reglist);
3806 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3807 break;
19776aef 3808#endif
054175e9 3809 default:
054175e9 3810 c2op_prologue(c2op,reglist);
19776aef 3811#ifdef DRC_DBG
3812 emit_movimm(source[i],1); // opcode
3813 emit_writeword(1,(int)&psxRegs.code);
3814#endif
054175e9 3815 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3816 break;
3817 }
3818 c2op_epilogue(c2op,reglist);
3819 }
b9b61529 3820}
3821
e2b5e7aa 3822static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3823{
3824 // XXX: should just just do the exception instead
3825 if(!cop1_usable) {
3826 int jaddr=(int)out;
3827 emit_jmp(0);
3828 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3829 cop1_usable=1;
3830 }
3831}
3832
e2b5e7aa 3833static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3834{
3d624f89 3835 cop1_unusable(i, i_regs);
57871462 3836}
3837
e2b5e7aa 3838static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3839{
3d624f89 3840 cop1_unusable(i, i_regs);
57871462 3841}
3842#define fconv_assemble fconv_assemble_arm
3843
e2b5e7aa 3844static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3845{
3d624f89 3846 cop1_unusable(i, i_regs);
57871462 3847}
3848
e2b5e7aa 3849static void float_assemble(int i,struct regstat *i_regs)
57871462 3850{
3d624f89 3851 cop1_unusable(i, i_regs);
57871462 3852}
3853
e2b5e7aa 3854static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3855{
3856 // case 0x18: MULT
3857 // case 0x19: MULTU
3858 // case 0x1A: DIV
3859 // case 0x1B: DIVU
3860 // case 0x1C: DMULT
3861 // case 0x1D: DMULTU
3862 // case 0x1E: DDIV
3863 // case 0x1F: DDIVU
3864 if(rs1[i]&&rs2[i])
3865 {
3866 if((opcode2[i]&4)==0) // 32-bit
3867 {
3868 if(opcode2[i]==0x18) // MULT
3869 {
3870 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3871 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3872 signed char hi=get_reg(i_regs->regmap,HIREG);
3873 signed char lo=get_reg(i_regs->regmap,LOREG);
3874 assert(m1>=0);
3875 assert(m2>=0);
3876 assert(hi>=0);
3877 assert(lo>=0);
3878 emit_smull(m1,m2,hi,lo);
3879 }
3880 if(opcode2[i]==0x19) // MULTU
3881 {
3882 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3883 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3884 signed char hi=get_reg(i_regs->regmap,HIREG);
3885 signed char lo=get_reg(i_regs->regmap,LOREG);
3886 assert(m1>=0);
3887 assert(m2>=0);
3888 assert(hi>=0);
3889 assert(lo>=0);
3890 emit_umull(m1,m2,hi,lo);
3891 }
3892 if(opcode2[i]==0x1A) // DIV
3893 {
3894 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3895 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3896 assert(d1>=0);
3897 assert(d2>=0);
3898 signed char quotient=get_reg(i_regs->regmap,LOREG);
3899 signed char remainder=get_reg(i_regs->regmap,HIREG);
3900 assert(quotient>=0);
3901 assert(remainder>=0);
3902 emit_movs(d1,remainder);
44a80f6a 3903 emit_movimm(0xffffffff,quotient);
3904 emit_negmi(quotient,quotient); // .. quotient and ..
3905 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3906 emit_movs(d2,HOST_TEMPREG);
3907 emit_jeq((int)out+52); // Division by zero
82336ba3 3908 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3909#ifdef HAVE_ARMV5
57871462 3910 emit_clz(HOST_TEMPREG,quotient);
3911 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3912#else
3913 emit_movimm(0,quotient);
3914 emit_addpl_imm(quotient,1,quotient);
3915 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3916 emit_jns((int)out-2*4);
3917#endif
57871462 3918 emit_orimm(quotient,1<<31,quotient);
3919 emit_shr(quotient,quotient,quotient);
3920 emit_cmp(remainder,HOST_TEMPREG);
3921 emit_subcs(remainder,HOST_TEMPREG,remainder);
3922 emit_adcs(quotient,quotient,quotient);
3923 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3924 emit_jcc((int)out-16); // -4
3925 emit_teq(d1,d2);
3926 emit_negmi(quotient,quotient);
3927 emit_test(d1,d1);
3928 emit_negmi(remainder,remainder);
3929 }
3930 if(opcode2[i]==0x1B) // DIVU
3931 {
3932 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3933 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3934 assert(d1>=0);
3935 assert(d2>=0);
3936 signed char quotient=get_reg(i_regs->regmap,LOREG);
3937 signed char remainder=get_reg(i_regs->regmap,HIREG);
3938 assert(quotient>=0);
3939 assert(remainder>=0);
44a80f6a 3940 emit_mov(d1,remainder);
3941 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3942 emit_test(d2,d2);
44a80f6a 3943 emit_jeq((int)out+40); // Division by zero
665f33e1 3944#ifdef HAVE_ARMV5
57871462 3945 emit_clz(d2,HOST_TEMPREG);
3946 emit_movimm(1<<31,quotient);
3947 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3948#else
3949 emit_movimm(0,HOST_TEMPREG);
82336ba3 3950 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3951 emit_lslpls_imm(d2,1,d2);
665f33e1 3952 emit_jns((int)out-2*4);
3953 emit_movimm(1<<31,quotient);
3954#endif
57871462 3955 emit_shr(quotient,HOST_TEMPREG,quotient);
3956 emit_cmp(remainder,d2);
3957 emit_subcs(remainder,d2,remainder);
3958 emit_adcs(quotient,quotient,quotient);
3959 emit_shrcc_imm(d2,1,d2);
3960 emit_jcc((int)out-16); // -4
3961 }
3962 }
3963 else // 64-bit
71e490c5 3964 assert(0);
57871462 3965 }
3966 else
3967 {
3968 // Multiply by zero is zero.
3969 // MIPS does not have a divide by zero exception.
3970 // The result is undefined, we return zero.
3971 signed char hr=get_reg(i_regs->regmap,HIREG);
3972 signed char lr=get_reg(i_regs->regmap,LOREG);
3973 if(hr>=0) emit_zeroreg(hr);
3974 if(lr>=0) emit_zeroreg(lr);
3975 }
3976}
3977#define multdiv_assemble multdiv_assemble_arm
3978
e2b5e7aa 3979static void do_preload_rhash(int r) {
57871462 3980 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3981 // register. On ARM the hash can be done with a single instruction (below)
3982}
3983
e2b5e7aa 3984static void do_preload_rhtbl(int ht) {
57871462 3985 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3986}
3987
e2b5e7aa 3988static void do_rhash(int rs,int rh) {
57871462 3989 emit_andimm(rs,0xf8,rh);
3990}
3991
e2b5e7aa 3992static void do_miniht_load(int ht,int rh) {
57871462 3993 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3994 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3995}
3996
e2b5e7aa 3997static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3998 emit_cmp(rh,rs);
3999 emit_ldreq_indexed(ht,4,15);
4000 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4001 emit_mov(rs,7);
4002 emit_jmp(jump_vaddr_reg[7]);
4003 #else
4004 emit_jmp(jump_vaddr_reg[rs]);
4005 #endif
4006}
4007
e2b5e7aa 4008static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 4009 #ifndef HAVE_ARMV7
57871462 4010 emit_movimm(return_address,rt); // PC into link register
4011 add_to_linker((int)out,return_address,1);
4012 emit_pcreladdr(temp);
4013 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4014 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4015 #else
4016 emit_movw(return_address&0x0000FFFF,rt);
4017 add_to_linker((int)out,return_address,1);
4018 emit_pcreladdr(temp);
4019 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4020 emit_movt(return_address&0xFFFF0000,rt);
4021 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4022 #endif
4023}
4024
e2b5e7aa 4025static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4026{
4027 //if(dirty_pre==dirty) return;
4028 int hr,reg,new_hr;
4029 for(hr=0;hr<HOST_REGS;hr++) {
4030 if(hr!=EXCLUDE_REG) {
4031 reg=pre[hr];
4032 if(((~u)>>(reg&63))&1) {
f776eb14 4033 if(reg>0) {
57871462 4034 if(((dirty_pre&~dirty)>>hr)&1) {
4035 if(reg>0&&reg<34) {
4036 emit_storereg(reg,hr);
4037 if( ((is32_pre&~uu)>>reg)&1 ) {
4038 emit_sarimm(hr,31,HOST_TEMPREG);
4039 emit_storereg(reg|64,HOST_TEMPREG);
4040 }
4041 }
4042 else if(reg>=64) {
4043 emit_storereg(reg,hr);
4044 }
4045 }
4046 }
57871462 4047 }
4048 }
4049 }
4050}
4051
4052
4053/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4054static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4055{
4056 int hr;
4057 int wrote=-1;
4058 for(hr=HOST_REGS-1;hr>=0;hr--) {
4059 if(hr!=EXCLUDE_REG) {
4060 if(pre[hr]!=entry[hr]) {
4061 if(pre[hr]>=0) {
4062 if((dirty>>hr)&1) {
4063 if(get_reg(entry,pre[hr])<0) {
4064 if(pre[hr]<64) {
4065 if(!((u>>pre[hr])&1)) {
4066 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4067 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4068 emit_sarimm(hr,31,hr+1);
4069 emit_strdreg(pre[hr],hr);
4070 }
4071 else
4072 emit_storereg(pre[hr],hr);
4073 }else{
4074 emit_storereg(pre[hr],hr);
4075 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4076 emit_sarimm(hr,31,hr);
4077 emit_storereg(pre[hr]|64,hr);
4078 }
4079 }
4080 }
4081 }else{
4082 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4083 emit_storereg(pre[hr],hr);
4084 }
4085 }
4086 wrote=hr;
4087 }
4088 }
4089 }
4090 }
4091 }
4092 }
4093 for(hr=0;hr<HOST_REGS;hr++) {
4094 if(hr!=EXCLUDE_REG) {
4095 if(pre[hr]!=entry[hr]) {
4096 if(pre[hr]>=0) {
4097 int nr;
4098 if((nr=get_reg(entry,pre[hr]))>=0) {
4099 emit_mov(hr,nr);
4100 }
4101 }
4102 }
4103 }
4104 }
4105}
4106#define wb_invalidate wb_invalidate_arm
4107*/
4108
dd3a91a1 4109// Clearing the cache is rather slow on ARM Linux, so mark the areas
4110// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4111static void do_clear_cache()
dd3a91a1 4112{
4113 int i,j;
4114 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4115 {
4116 u_int bitmap=needs_clear_cache[i];
4117 if(bitmap) {
4118 u_int start,end;
9f51b4b9 4119 for(j=0;j<32;j++)
dd3a91a1 4120 {
4121 if(bitmap&(1<<j)) {
bdeade46 4122 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4123 end=start+4095;
4124 j++;
4125 while(j<32) {
4126 if(bitmap&(1<<j)) {
4127 end+=4096;
4128 j++;
4129 }else{
4130 __clear_cache((void *)start,(void *)end);
4131 break;
4132 }
4133 }
4134 }
4135 }
4136 needs_clear_cache[i]=0;
4137 }
4138 }
4139}
4140
57871462 4141// CPU-architecture-specific initialization
71e490c5 4142static void arch_init() {
57871462 4143}
b9b61529 4144
4145// vim:shiftwidth=2:expandtab