drc: enable and fix warnings
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
a327ad27 31#if !BASE_ADDR_FIXED
bdeade46 32char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
33#endif
34
4d646738 35#ifndef __MACH__
36#define CALLER_SAVE_REGS 0x100f
37#else
38#define CALLER_SAVE_REGS 0x120f
39#endif
40
e2b5e7aa 41#define unused __attribute__((unused))
42
57871462 43extern int cycle_count;
44extern int last_count;
45extern int pcaddr;
46extern int pending_exception;
47extern int branch_target;
48extern uint64_t readmem_dword;
57871462 49extern void *dynarec_local;
57871462 50extern u_int mini_ht[32][2];
57871462 51
52void indirect_jump_indexed();
53void indirect_jump();
54void do_interrupt();
55void jump_vaddr_r0();
56void jump_vaddr_r1();
57void jump_vaddr_r2();
58void jump_vaddr_r3();
59void jump_vaddr_r4();
60void jump_vaddr_r5();
61void jump_vaddr_r6();
62void jump_vaddr_r7();
63void jump_vaddr_r8();
64void jump_vaddr_r9();
65void jump_vaddr_r10();
66void jump_vaddr_r12();
67
68const u_int jump_vaddr_reg[16] = {
69 (int)jump_vaddr_r0,
70 (int)jump_vaddr_r1,
71 (int)jump_vaddr_r2,
72 (int)jump_vaddr_r3,
73 (int)jump_vaddr_r4,
74 (int)jump_vaddr_r5,
75 (int)jump_vaddr_r6,
76 (int)jump_vaddr_r7,
77 (int)jump_vaddr_r8,
78 (int)jump_vaddr_r9,
79 (int)jump_vaddr_r10,
80 0,
81 (int)jump_vaddr_r12,
82 0,
83 0,
84 0};
85
0bbd1454 86void invalidate_addr_r0();
87void invalidate_addr_r1();
88void invalidate_addr_r2();
89void invalidate_addr_r3();
90void invalidate_addr_r4();
91void invalidate_addr_r5();
92void invalidate_addr_r6();
93void invalidate_addr_r7();
94void invalidate_addr_r8();
95void invalidate_addr_r9();
96void invalidate_addr_r10();
97void invalidate_addr_r12();
98
99const u_int invalidate_addr_reg[16] = {
100 (int)invalidate_addr_r0,
101 (int)invalidate_addr_r1,
102 (int)invalidate_addr_r2,
103 (int)invalidate_addr_r3,
104 (int)invalidate_addr_r4,
105 (int)invalidate_addr_r5,
106 (int)invalidate_addr_r6,
107 (int)invalidate_addr_r7,
108 (int)invalidate_addr_r8,
109 (int)invalidate_addr_r9,
110 (int)invalidate_addr_r10,
111 0,
112 (int)invalidate_addr_r12,
113 0,
114 0,
115 0};
116
e2b5e7aa 117static unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 118
57871462 119/* Linker */
120
e2b5e7aa 121static void set_jump_target(int addr,u_int target)
57871462 122{
123 u_char *ptr=(u_char *)addr;
124 u_int *ptr2=(u_int *)ptr;
125 if(ptr[3]==0xe2) {
126 assert((target-(u_int)ptr2-8)<1024);
127 assert((addr&3)==0);
128 assert((target&3)==0);
129 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
130 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
131 }
132 else if(ptr[3]==0x72) {
133 // generated by emit_jno_unlikely
134 if((target-(u_int)ptr2-8)<1024) {
135 assert((addr&3)==0);
136 assert((target&3)==0);
137 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
138 }
139 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
140 assert((addr&3)==0);
141 assert((target&3)==0);
142 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
143 }
144 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146 else {
147 assert((ptr[3]&0x0e)==0xa);
148 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
149 }
150}
151
152// This optionally copies the instruction from the target of the branch into
153// the space before the branch. Works, but the difference in speed is
154// usually insignificant.
e2b5e7aa 155#if 0
156static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 157{
158 u_char *ptr=(u_char *)addr;
159 u_int *ptr2=(u_int *)ptr;
160 assert(!copy||ptr2[-1]==0xe28dd000);
161 if(ptr[3]==0xe2) {
162 assert(!copy);
163 assert((target-(u_int)ptr2-8)<4096);
164 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
165 }
166 else {
167 assert((ptr[3]&0x0e)==0xa);
168 u_int target_insn=*(u_int *)target;
169 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
170 copy=0;
171 }
172 if((target_insn&0x0c100000)==0x04100000) { // Load
173 copy=0;
174 }
175 if(target_insn&0x08000000) {
176 copy=0;
177 }
178 if(copy) {
179 ptr2[-1]=target_insn;
180 target+=4;
181 }
182 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
183 }
184}
e2b5e7aa 185#endif
57871462 186
187/* Literal pool */
e2b5e7aa 188static void add_literal(int addr,int val)
57871462 189{
15776b68 190 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 191 literals[literalcount][0]=addr;
192 literals[literalcount][1]=val;
9f51b4b9 193 literalcount++;
194}
57871462 195
e2b5e7aa 196static void *kill_pointer(void *stub)
57871462 197{
198 int *ptr=(int *)(stub+4);
199 assert((*ptr&0x0ff00000)==0x05900000);
200 u_int offset=*ptr&0xfff;
201 int **l_ptr=(void *)ptr+offset+8;
202 int *i_ptr=*l_ptr;
203 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 204 return i_ptr;
57871462 205}
206
f968d35d 207// find where external branch is liked to using addr of it's stub:
208// get address that insn one after stub loads (dyna_linker arg1),
209// treat it as a pointer to branch insn,
210// return addr where that branch jumps to
e2b5e7aa 211static int get_pointer(void *stub)
57871462 212{
213 //printf("get_pointer(%x)\n",(int)stub);
214 int *ptr=(int *)(stub+4);
f968d35d 215 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 216 u_int offset=*ptr&0xfff;
217 int **l_ptr=(void *)ptr+offset+8;
218 int *i_ptr=*l_ptr;
219 assert((*i_ptr&0x0f000000)==0x0a000000);
220 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
221}
222
223// Find the "clean" entry point from a "dirty" entry point
224// by skipping past the call to verify_code
e2b5e7aa 225static u_int get_clean_addr(int addr)
57871462 226{
227 int *ptr=(int *)addr;
665f33e1 228 #ifndef HAVE_ARMV7
57871462 229 ptr+=4;
230 #else
231 ptr+=6;
232 #endif
233 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
234 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
235 ptr++;
236 if((*ptr&0xFF000000)==0xea000000) {
237 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
238 }
239 return (u_int)ptr;
240}
241
e2b5e7aa 242static int verify_dirty(u_int *ptr)
57871462 243{
665f33e1 244 #ifndef HAVE_ARMV7
57871462 245 // get from literal pool
15776b68 246 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 247 u_int offset=*ptr&0xfff;
248 u_int *l_ptr=(void *)ptr+offset+8;
249 u_int source=l_ptr[0];
250 u_int copy=l_ptr[1];
251 u_int len=l_ptr[2];
252 ptr+=4;
253 #else
254 // ARMv7 movw/movt
255 assert((*ptr&0xFFF00000)==0xe3000000);
256 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
257 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
258 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
259 ptr+=6;
260 #endif
261 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
262 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 263 //printf("verify_dirty: %x %x %x\n",source,copy,len);
264 return !memcmp((void *)source,(void *)copy,len);
265}
266
267// This doesn't necessarily find all clean entry points, just
268// guarantees that it's not dirty
e2b5e7aa 269static int isclean(int addr)
57871462 270{
665f33e1 271 #ifndef HAVE_ARMV7
581335b0 272 u_int *ptr=((u_int *)addr)+4;
57871462 273 #else
581335b0 274 u_int *ptr=((u_int *)addr)+6;
57871462 275 #endif
276 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
277 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
278 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
279 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
280 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
281 return 1;
282}
283
4a35de07 284// get source that block at addr was compiled from (host pointers)
e2b5e7aa 285static void get_bounds(int addr,u_int *start,u_int *end)
57871462 286{
287 u_int *ptr=(u_int *)addr;
665f33e1 288 #ifndef HAVE_ARMV7
57871462 289 // get from literal pool
15776b68 290 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 291 u_int offset=*ptr&0xfff;
292 u_int *l_ptr=(void *)ptr+offset+8;
293 u_int source=l_ptr[0];
294 //u_int copy=l_ptr[1];
295 u_int len=l_ptr[2];
296 ptr+=4;
297 #else
298 // ARMv7 movw/movt
299 assert((*ptr&0xFFF00000)==0xe3000000);
300 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
301 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
302 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
303 ptr+=6;
304 #endif
305 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
306 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 307 *start=source;
308 *end=source+len;
309}
310
311/* Register allocation */
312
313// Note: registers are allocated clean (unmodified state)
314// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 315static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 316{
317 int r,hr;
318 int preferred_reg = (reg&7);
319 if(reg==CCREG) preferred_reg=HOST_CCREG;
320 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 321
57871462 322 // Don't allocate unused registers
323 if((cur->u>>reg)&1) return;
9f51b4b9 324
57871462 325 // see if it's already allocated
326 for(hr=0;hr<HOST_REGS;hr++)
327 {
328 if(cur->regmap[hr]==reg) return;
329 }
9f51b4b9 330
57871462 331 // Keep the same mapping if the register was already allocated in a loop
332 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 333
57871462 334 // Try to allocate the preferred register
335 if(cur->regmap[preferred_reg]==-1) {
336 cur->regmap[preferred_reg]=reg;
337 cur->dirty&=~(1<<preferred_reg);
338 cur->isconst&=~(1<<preferred_reg);
339 return;
340 }
341 r=cur->regmap[preferred_reg];
342 if(r<64&&((cur->u>>r)&1)) {
343 cur->regmap[preferred_reg]=reg;
344 cur->dirty&=~(1<<preferred_reg);
345 cur->isconst&=~(1<<preferred_reg);
346 return;
347 }
348 if(r>=64&&((cur->uu>>(r&63))&1)) {
349 cur->regmap[preferred_reg]=reg;
350 cur->dirty&=~(1<<preferred_reg);
351 cur->isconst&=~(1<<preferred_reg);
352 return;
353 }
9f51b4b9 354
57871462 355 // Clear any unneeded registers
356 // We try to keep the mapping consistent, if possible, because it
357 // makes branches easier (especially loops). So we try to allocate
358 // first (see above) before removing old mappings. If this is not
359 // possible then go ahead and clear out the registers that are no
360 // longer needed.
361 for(hr=0;hr<HOST_REGS;hr++)
362 {
363 r=cur->regmap[hr];
364 if(r>=0) {
365 if(r<64) {
366 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
367 }
368 else
369 {
370 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
371 }
372 }
373 }
374 // Try to allocate any available register, but prefer
375 // registers that have not been used recently.
376 if(i>0) {
377 for(hr=0;hr<HOST_REGS;hr++) {
378 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
379 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
380 cur->regmap[hr]=reg;
381 cur->dirty&=~(1<<hr);
382 cur->isconst&=~(1<<hr);
383 return;
384 }
385 }
386 }
387 }
388 // Try to allocate any available register
389 for(hr=0;hr<HOST_REGS;hr++) {
390 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
391 cur->regmap[hr]=reg;
392 cur->dirty&=~(1<<hr);
393 cur->isconst&=~(1<<hr);
394 return;
395 }
396 }
9f51b4b9 397
57871462 398 // Ok, now we have to evict someone
399 // Pick a register we hopefully won't need soon
400 u_char hsn[MAXREG+1];
401 memset(hsn,10,sizeof(hsn));
402 int j;
403 lsn(hsn,i,&preferred_reg);
404 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
405 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
406 if(i>0) {
407 // Don't evict the cycle count at entry points, otherwise the entry
408 // stub will have to write it.
409 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
410 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
411 for(j=10;j>=3;j--)
412 {
413 // Alloc preferred register if available
414 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
415 for(hr=0;hr<HOST_REGS;hr++) {
416 // Evict both parts of a 64-bit register
417 if((cur->regmap[hr]&63)==r) {
418 cur->regmap[hr]=-1;
419 cur->dirty&=~(1<<hr);
420 cur->isconst&=~(1<<hr);
421 }
422 }
423 cur->regmap[preferred_reg]=reg;
424 return;
425 }
426 for(r=1;r<=MAXREG;r++)
427 {
428 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
429 for(hr=0;hr<HOST_REGS;hr++) {
430 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
431 if(cur->regmap[hr]==r+64) {
432 cur->regmap[hr]=reg;
433 cur->dirty&=~(1<<hr);
434 cur->isconst&=~(1<<hr);
435 return;
436 }
437 }
438 }
439 for(hr=0;hr<HOST_REGS;hr++) {
440 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
441 if(cur->regmap[hr]==r) {
442 cur->regmap[hr]=reg;
443 cur->dirty&=~(1<<hr);
444 cur->isconst&=~(1<<hr);
445 return;
446 }
447 }
448 }
449 }
450 }
451 }
452 }
453 for(j=10;j>=0;j--)
454 {
455 for(r=1;r<=MAXREG;r++)
456 {
457 if(hsn[r]==j) {
458 for(hr=0;hr<HOST_REGS;hr++) {
459 if(cur->regmap[hr]==r+64) {
460 cur->regmap[hr]=reg;
461 cur->dirty&=~(1<<hr);
462 cur->isconst&=~(1<<hr);
463 return;
464 }
465 }
466 for(hr=0;hr<HOST_REGS;hr++) {
467 if(cur->regmap[hr]==r) {
468 cur->regmap[hr]=reg;
469 cur->dirty&=~(1<<hr);
470 cur->isconst&=~(1<<hr);
471 return;
472 }
473 }
474 }
475 }
476 }
c43b5311 477 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 478}
479
e2b5e7aa 480static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 481{
482 int preferred_reg = 8+(reg&1);
483 int r,hr;
9f51b4b9 484
57871462 485 // allocate the lower 32 bits
486 alloc_reg(cur,i,reg);
9f51b4b9 487
57871462 488 // Don't allocate unused registers
489 if((cur->uu>>reg)&1) return;
9f51b4b9 490
57871462 491 // see if the upper half is already allocated
492 for(hr=0;hr<HOST_REGS;hr++)
493 {
494 if(cur->regmap[hr]==reg+64) return;
495 }
9f51b4b9 496
57871462 497 // Keep the same mapping if the register was already allocated in a loop
498 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 499
57871462 500 // Try to allocate the preferred register
501 if(cur->regmap[preferred_reg]==-1) {
502 cur->regmap[preferred_reg]=reg|64;
503 cur->dirty&=~(1<<preferred_reg);
504 cur->isconst&=~(1<<preferred_reg);
505 return;
506 }
507 r=cur->regmap[preferred_reg];
508 if(r<64&&((cur->u>>r)&1)) {
509 cur->regmap[preferred_reg]=reg|64;
510 cur->dirty&=~(1<<preferred_reg);
511 cur->isconst&=~(1<<preferred_reg);
512 return;
513 }
514 if(r>=64&&((cur->uu>>(r&63))&1)) {
515 cur->regmap[preferred_reg]=reg|64;
516 cur->dirty&=~(1<<preferred_reg);
517 cur->isconst&=~(1<<preferred_reg);
518 return;
519 }
9f51b4b9 520
57871462 521 // Clear any unneeded registers
522 // We try to keep the mapping consistent, if possible, because it
523 // makes branches easier (especially loops). So we try to allocate
524 // first (see above) before removing old mappings. If this is not
525 // possible then go ahead and clear out the registers that are no
526 // longer needed.
527 for(hr=HOST_REGS-1;hr>=0;hr--)
528 {
529 r=cur->regmap[hr];
530 if(r>=0) {
531 if(r<64) {
532 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
533 }
534 else
535 {
536 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
537 }
538 }
539 }
540 // Try to allocate any available register, but prefer
541 // registers that have not been used recently.
542 if(i>0) {
543 for(hr=0;hr<HOST_REGS;hr++) {
544 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
545 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
546 cur->regmap[hr]=reg|64;
547 cur->dirty&=~(1<<hr);
548 cur->isconst&=~(1<<hr);
549 return;
550 }
551 }
552 }
553 }
554 // Try to allocate any available register
555 for(hr=0;hr<HOST_REGS;hr++) {
556 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
557 cur->regmap[hr]=reg|64;
558 cur->dirty&=~(1<<hr);
559 cur->isconst&=~(1<<hr);
560 return;
561 }
562 }
9f51b4b9 563
57871462 564 // Ok, now we have to evict someone
565 // Pick a register we hopefully won't need soon
566 u_char hsn[MAXREG+1];
567 memset(hsn,10,sizeof(hsn));
568 int j;
569 lsn(hsn,i,&preferred_reg);
570 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
571 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
572 if(i>0) {
573 // Don't evict the cycle count at entry points, otherwise the entry
574 // stub will have to write it.
575 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
576 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
577 for(j=10;j>=3;j--)
578 {
579 // Alloc preferred register if available
580 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
581 for(hr=0;hr<HOST_REGS;hr++) {
582 // Evict both parts of a 64-bit register
583 if((cur->regmap[hr]&63)==r) {
584 cur->regmap[hr]=-1;
585 cur->dirty&=~(1<<hr);
586 cur->isconst&=~(1<<hr);
587 }
588 }
589 cur->regmap[preferred_reg]=reg|64;
590 return;
591 }
592 for(r=1;r<=MAXREG;r++)
593 {
594 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
595 for(hr=0;hr<HOST_REGS;hr++) {
596 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
597 if(cur->regmap[hr]==r+64) {
598 cur->regmap[hr]=reg|64;
599 cur->dirty&=~(1<<hr);
600 cur->isconst&=~(1<<hr);
601 return;
602 }
603 }
604 }
605 for(hr=0;hr<HOST_REGS;hr++) {
606 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
607 if(cur->regmap[hr]==r) {
608 cur->regmap[hr]=reg|64;
609 cur->dirty&=~(1<<hr);
610 cur->isconst&=~(1<<hr);
611 return;
612 }
613 }
614 }
615 }
616 }
617 }
618 }
619 for(j=10;j>=0;j--)
620 {
621 for(r=1;r<=MAXREG;r++)
622 {
623 if(hsn[r]==j) {
624 for(hr=0;hr<HOST_REGS;hr++) {
625 if(cur->regmap[hr]==r+64) {
626 cur->regmap[hr]=reg|64;
627 cur->dirty&=~(1<<hr);
628 cur->isconst&=~(1<<hr);
629 return;
630 }
631 }
632 for(hr=0;hr<HOST_REGS;hr++) {
633 if(cur->regmap[hr]==r) {
634 cur->regmap[hr]=reg|64;
635 cur->dirty&=~(1<<hr);
636 cur->isconst&=~(1<<hr);
637 return;
638 }
639 }
640 }
641 }
642 }
c43b5311 643 SysPrintf("This shouldn't happen");exit(1);
57871462 644}
645
646// Allocate a temporary register. This is done without regard to
647// dirty status or whether the register we request is on the unneeded list
648// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 649static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 650{
651 int r,hr;
652 int preferred_reg = -1;
9f51b4b9 653
57871462 654 // see if it's already allocated
655 for(hr=0;hr<HOST_REGS;hr++)
656 {
657 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
658 }
9f51b4b9 659
57871462 660 // Try to allocate any available register
661 for(hr=HOST_REGS-1;hr>=0;hr--) {
662 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
663 cur->regmap[hr]=reg;
664 cur->dirty&=~(1<<hr);
665 cur->isconst&=~(1<<hr);
666 return;
667 }
668 }
9f51b4b9 669
57871462 670 // Find an unneeded register
671 for(hr=HOST_REGS-1;hr>=0;hr--)
672 {
673 r=cur->regmap[hr];
674 if(r>=0) {
675 if(r<64) {
676 if((cur->u>>r)&1) {
677 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
678 cur->regmap[hr]=reg;
679 cur->dirty&=~(1<<hr);
680 cur->isconst&=~(1<<hr);
681 return;
682 }
683 }
684 }
685 else
686 {
687 if((cur->uu>>(r&63))&1) {
688 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
689 cur->regmap[hr]=reg;
690 cur->dirty&=~(1<<hr);
691 cur->isconst&=~(1<<hr);
692 return;
693 }
694 }
695 }
696 }
697 }
9f51b4b9 698
57871462 699 // Ok, now we have to evict someone
700 // Pick a register we hopefully won't need soon
701 // TODO: we might want to follow unconditional jumps here
702 // TODO: get rid of dupe code and make this into a function
703 u_char hsn[MAXREG+1];
704 memset(hsn,10,sizeof(hsn));
705 int j;
706 lsn(hsn,i,&preferred_reg);
707 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
708 if(i>0) {
709 // Don't evict the cycle count at entry points, otherwise the entry
710 // stub will have to write it.
711 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
712 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
713 for(j=10;j>=3;j--)
714 {
715 for(r=1;r<=MAXREG;r++)
716 {
717 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
718 for(hr=0;hr<HOST_REGS;hr++) {
719 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
720 if(cur->regmap[hr]==r+64) {
721 cur->regmap[hr]=reg;
722 cur->dirty&=~(1<<hr);
723 cur->isconst&=~(1<<hr);
724 return;
725 }
726 }
727 }
728 for(hr=0;hr<HOST_REGS;hr++) {
729 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
730 if(cur->regmap[hr]==r) {
731 cur->regmap[hr]=reg;
732 cur->dirty&=~(1<<hr);
733 cur->isconst&=~(1<<hr);
734 return;
735 }
736 }
737 }
738 }
739 }
740 }
741 }
742 for(j=10;j>=0;j--)
743 {
744 for(r=1;r<=MAXREG;r++)
745 {
746 if(hsn[r]==j) {
747 for(hr=0;hr<HOST_REGS;hr++) {
748 if(cur->regmap[hr]==r+64) {
749 cur->regmap[hr]=reg;
750 cur->dirty&=~(1<<hr);
751 cur->isconst&=~(1<<hr);
752 return;
753 }
754 }
755 for(hr=0;hr<HOST_REGS;hr++) {
756 if(cur->regmap[hr]==r) {
757 cur->regmap[hr]=reg;
758 cur->dirty&=~(1<<hr);
759 cur->isconst&=~(1<<hr);
760 return;
761 }
762 }
763 }
764 }
765 }
c43b5311 766 SysPrintf("This shouldn't happen");exit(1);
57871462 767}
e2b5e7aa 768
57871462 769// Allocate a specific ARM register.
e2b5e7aa 770static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 771{
772 int n;
f776eb14 773 int dirty=0;
9f51b4b9 774
57871462 775 // see if it's already allocated (and dealloc it)
776 for(n=0;n<HOST_REGS;n++)
777 {
f776eb14 778 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
779 dirty=(cur->dirty>>n)&1;
780 cur->regmap[n]=-1;
781 }
57871462 782 }
9f51b4b9 783
57871462 784 cur->regmap[hr]=reg;
785 cur->dirty&=~(1<<hr);
f776eb14 786 cur->dirty|=dirty<<hr;
57871462 787 cur->isconst&=~(1<<hr);
788}
789
790// Alloc cycle count into dedicated register
e2b5e7aa 791static void alloc_cc(struct regstat *cur,int i)
57871462 792{
793 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
794}
795
796/* Special alloc */
797
798
799/* Assembler */
800
e2b5e7aa 801static unused char regname[16][4] = {
57871462 802 "r0",
803 "r1",
804 "r2",
805 "r3",
806 "r4",
807 "r5",
808 "r6",
809 "r7",
810 "r8",
811 "r9",
812 "r10",
813 "fp",
814 "r12",
815 "sp",
816 "lr",
817 "pc"};
818
e2b5e7aa 819static void output_w32(u_int word)
57871462 820{
821 *((u_int *)out)=word;
822 out+=4;
823}
e2b5e7aa 824
825static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 826{
827 assert(rd<16);
828 assert(rn<16);
829 assert(rm<16);
830 return((rn<<16)|(rd<<12)|rm);
831}
e2b5e7aa 832
833static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 834{
835 assert(rd<16);
836 assert(rn<16);
837 assert(imm<256);
838 assert((shift&1)==0);
839 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
840}
e2b5e7aa 841
842static u_int genimm(u_int imm,u_int *encoded)
57871462 843{
c2e3bd42 844 *encoded=0;
845 if(imm==0) return 1;
57871462 846 int i=32;
847 while(i>0)
848 {
849 if(imm<256) {
850 *encoded=((i&30)<<7)|imm;
851 return 1;
852 }
853 imm=(imm>>2)|(imm<<30);i-=2;
854 }
855 return 0;
856}
e2b5e7aa 857
858static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 859{
860 u_int ret=genimm(imm,encoded);
861 assert(ret);
581335b0 862 (void)ret;
cfbd3c6e 863}
e2b5e7aa 864
865static u_int genjmp(u_int addr)
57871462 866{
867 int offset=addr-(int)out-8;
e80343e2 868 if(offset<-33554432||offset>=33554432) {
869 if (addr>2) {
c43b5311 870 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 871 exit(1);
872 }
873 return 0;
874 }
57871462 875 return ((u_int)offset>>2)&0xffffff;
876}
877
e2b5e7aa 878static void emit_mov(int rs,int rt)
57871462 879{
880 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
881 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
882}
883
e2b5e7aa 884static void emit_movs(int rs,int rt)
57871462 885{
886 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
887 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
888}
889
e2b5e7aa 890static void emit_add(int rs1,int rs2,int rt)
57871462 891{
892 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
893 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
894}
895
e2b5e7aa 896static void emit_adds(int rs1,int rs2,int rt)
57871462 897{
898 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
899 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
900}
901
e2b5e7aa 902static void emit_adcs(int rs1,int rs2,int rt)
57871462 903{
904 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
905 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
906}
907
e2b5e7aa 908static void emit_sbc(int rs1,int rs2,int rt)
57871462 909{
910 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
911 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
912}
913
e2b5e7aa 914static void emit_sbcs(int rs1,int rs2,int rt)
57871462 915{
916 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
917 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
918}
919
e2b5e7aa 920static void emit_neg(int rs, int rt)
57871462 921{
922 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
923 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
924}
925
e2b5e7aa 926static void emit_negs(int rs, int rt)
57871462 927{
928 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
929 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
930}
931
e2b5e7aa 932static void emit_sub(int rs1,int rs2,int rt)
57871462 933{
934 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
935 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
936}
937
e2b5e7aa 938static void emit_subs(int rs1,int rs2,int rt)
57871462 939{
940 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
941 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
942}
943
e2b5e7aa 944static void emit_zeroreg(int rt)
57871462 945{
946 assem_debug("mov %s,#0\n",regname[rt]);
947 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
948}
949
e2b5e7aa 950static void emit_loadlp(u_int imm,u_int rt)
790ee18e 951{
952 add_literal((int)out,imm);
953 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
954 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
955}
e2b5e7aa 956
957static void emit_movw(u_int imm,u_int rt)
790ee18e 958{
959 assert(imm<65536);
960 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
961 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
962}
e2b5e7aa 963
964static void emit_movt(u_int imm,u_int rt)
790ee18e 965{
966 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
967 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
968}
e2b5e7aa 969
970static void emit_movimm(u_int imm,u_int rt)
790ee18e 971{
972 u_int armval;
973 if(genimm(imm,&armval)) {
974 assem_debug("mov %s,#%d\n",regname[rt],imm);
975 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
976 }else if(genimm(~imm,&armval)) {
977 assem_debug("mvn %s,#%d\n",regname[rt],imm);
978 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
979 }else if(imm<65536) {
665f33e1 980 #ifndef HAVE_ARMV7
790ee18e 981 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
982 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
983 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
984 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
985 #else
986 emit_movw(imm,rt);
987 #endif
988 }else{
665f33e1 989 #ifndef HAVE_ARMV7
790ee18e 990 emit_loadlp(imm,rt);
991 #else
992 emit_movw(imm&0x0000FFFF,rt);
993 emit_movt(imm&0xFFFF0000,rt);
994 #endif
995 }
996}
e2b5e7aa 997
998static void emit_pcreladdr(u_int rt)
790ee18e 999{
1000 assem_debug("add %s,pc,#?\n",regname[rt]);
1001 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1002}
1003
e2b5e7aa 1004static void emit_loadreg(int r, int hr)
57871462 1005{
3d624f89 1006 if(r&64) {
c43b5311 1007 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1008 assert(0);
1009 return;
3d624f89 1010 }
57871462 1011 if((r&63)==0)
1012 emit_zeroreg(hr);
1013 else {
3d624f89 1014 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1015 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1016 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1017 if(r==CCREG) addr=(int)&cycle_count;
1018 if(r==CSREG) addr=(int)&Status;
1019 if(r==FSREG) addr=(int)&FCR31;
1020 if(r==INVCP) addr=(int)&invc_ptr;
1021 u_int offset = addr-(u_int)&dynarec_local;
1022 assert(offset<4096);
1023 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1024 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1025 }
1026}
e2b5e7aa 1027
1028static void emit_storereg(int r, int hr)
57871462 1029{
3d624f89 1030 if(r&64) {
c43b5311 1031 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1032 assert(0);
1033 return;
3d624f89 1034 }
3d624f89 1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==FSREG) addr=(int)&FCR31;
1040 u_int offset = addr-(u_int)&dynarec_local;
1041 assert(offset<4096);
1042 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1043 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1044}
1045
e2b5e7aa 1046static void emit_test(int rs, int rt)
57871462 1047{
1048 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1049 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1050}
1051
e2b5e7aa 1052static void emit_testimm(int rs,int imm)
57871462 1053{
1054 u_int armval;
5a05d80c 1055 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1056 genimm_checked(imm,&armval);
57871462 1057 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1058}
1059
e2b5e7aa 1060static void emit_testeqimm(int rs,int imm)
b9b61529 1061{
1062 u_int armval;
1063 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1064 genimm_checked(imm,&armval);
b9b61529 1065 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1066}
1067
e2b5e7aa 1068static void emit_not(int rs,int rt)
57871462 1069{
1070 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1071 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1072}
1073
e2b5e7aa 1074static void emit_mvnmi(int rs,int rt)
b9b61529 1075{
1076 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1077 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1078}
1079
e2b5e7aa 1080static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1081{
1082 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1083 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1084}
1085
e2b5e7aa 1086static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1087{
1088 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1089 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1090}
e2b5e7aa 1091
1092static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1093{
1094 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1095 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1096}
1097
e2b5e7aa 1098static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1099{
1100 assert(rs<16);
1101 assert(rt<16);
1102 assert(imm<32);
1103 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1104 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1105}
1106
e2b5e7aa 1107static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1108{
1109 assert(rs<16);
1110 assert(rt<16);
1111 assert(imm<32);
1112 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1113 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1114}
1115
e2b5e7aa 1116static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1117{
1118 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1119 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1120}
1121
e2b5e7aa 1122static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1123{
1124 assert(rs<16);
1125 assert(rt<16);
1126 if(imm!=0) {
57871462 1127 u_int armval;
1128 if(genimm(imm,&armval)) {
1129 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1130 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1131 }else if(genimm(-imm,&armval)) {
8a0a8423 1132 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1133 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1134 #ifdef HAVE_ARMV7
1135 }else if(rt!=rs&&(u_int)imm<65536) {
1136 emit_movw(imm&0x0000ffff,rt);
1137 emit_add(rs,rt,rt);
1138 }else if(rt!=rs&&(u_int)-imm<65536) {
1139 emit_movw(-imm&0x0000ffff,rt);
1140 emit_sub(rs,rt,rt);
1141 #endif
1142 }else if((u_int)-imm<65536) {
57871462 1143 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1144 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1145 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1146 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1147 }else {
1148 do {
1149 int shift = (ffs(imm) - 1) & ~1;
1150 int imm8 = imm & (0xff << shift);
1151 genimm_checked(imm8,&armval);
1152 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1153 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1154 rs = rt;
1155 imm &= ~imm8;
1156 }
1157 while (imm != 0);
57871462 1158 }
1159 }
1160 else if(rs!=rt) emit_mov(rs,rt);
1161}
1162
e2b5e7aa 1163static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1164{
1165 assert(imm>-65536&&imm<65536);
1166 u_int armval;
1167 if(genimm(imm,&armval)) {
1168 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1169 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1170 }else if(genimm(-imm,&armval)) {
1171 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1172 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1173 }else if(imm<0) {
1174 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1175 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1176 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1177 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1178 }else{
1179 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1180 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1181 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1182 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1183 }
1184}
e2b5e7aa 1185
1186static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1187{
1188 emit_addimm(rt,imm,rt);
1189}
1190
e2b5e7aa 1191static void emit_addnop(u_int r)
57871462 1192{
1193 assert(r<16);
1194 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1195 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1196}
1197
e2b5e7aa 1198static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1199{
1200 u_int armval;
cfbd3c6e 1201 genimm_checked(imm,&armval);
57871462 1202 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1203 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1204}
1edfcc68 1205
e2b5e7aa 1206static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1207{
1208 assert(0);
1209 u_int armval;
cfbd3c6e 1210 genimm_checked(imm,&armval);
57871462 1211 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1212 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1213}
1214
e2b5e7aa 1215static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1216{
1217 // TODO: if(genimm(imm,&armval)) ...
1218 // else
1219 emit_movimm(imm,HOST_TEMPREG);
1220 emit_adds(HOST_TEMPREG,rsl,rtl);
1221 emit_adcimm(rsh,0,rth);
1222}
1223
e2b5e7aa 1224static void emit_andimm(int rs,int imm,int rt)
57871462 1225{
1226 u_int armval;
790ee18e 1227 if(imm==0) {
1228 emit_zeroreg(rt);
1229 }else if(genimm(imm,&armval)) {
57871462 1230 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1231 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1232 }else if(genimm(~imm,&armval)) {
1233 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1234 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1235 }else if(imm==65535) {
332a4533 1236 #ifndef HAVE_ARMV6
57871462 1237 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1238 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1239 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1240 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1241 #else
1242 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1243 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1244 #endif
1245 }else{
1246 assert(imm>0&&imm<65535);
665f33e1 1247 #ifndef HAVE_ARMV7
57871462 1248 assem_debug("mov r14,#%d\n",imm&0xFF00);
1249 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1250 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1251 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1252 #else
1253 emit_movw(imm,HOST_TEMPREG);
1254 #endif
1255 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1256 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1257 }
1258}
1259
e2b5e7aa 1260static void emit_orimm(int rs,int imm,int rt)
57871462 1261{
1262 u_int armval;
790ee18e 1263 if(imm==0) {
1264 if(rs!=rt) emit_mov(rs,rt);
1265 }else if(genimm(imm,&armval)) {
57871462 1266 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1267 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1268 }else{
1269 assert(imm>0&&imm<65536);
1270 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1271 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1272 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1273 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1274 }
1275}
1276
e2b5e7aa 1277static void emit_xorimm(int rs,int imm,int rt)
57871462 1278{
57871462 1279 u_int armval;
790ee18e 1280 if(imm==0) {
1281 if(rs!=rt) emit_mov(rs,rt);
1282 }else if(genimm(imm,&armval)) {
57871462 1283 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1284 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1285 }else{
514ed0d9 1286 assert(imm>0&&imm<65536);
57871462 1287 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1288 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1289 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1290 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1291 }
1292}
1293
e2b5e7aa 1294static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1295{
1296 assert(imm>0);
1297 assert(imm<32);
1298 //if(imm==1) ...
1299 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1300 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1301}
1302
e2b5e7aa 1303static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1304{
1305 assert(imm>0);
1306 assert(imm<32);
1307 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1308 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1309}
1310
e2b5e7aa 1311static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1312{
1313 assert(imm>0);
1314 assert(imm<32);
1315 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1316 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1317}
1318
e2b5e7aa 1319static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1320{
1321 assert(imm>0);
1322 assert(imm<32);
1323 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1325}
1326
e2b5e7aa 1327static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1328{
1329 assert(imm>0);
1330 assert(imm<32);
1331 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1332 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1333}
1334
e2b5e7aa 1335static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1336{
1337 assert(imm>0);
1338 assert(imm<32);
1339 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1341}
1342
e2b5e7aa 1343static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1344{
1345 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1346 assert(imm>0);
1347 assert(imm<32);
1348 //if(imm==1) ...
1349 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1350 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1351 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1352 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1353}
1354
e2b5e7aa 1355static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1356{
1357 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1358 assert(imm>0);
1359 assert(imm<32);
1360 //if(imm==1) ...
1361 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1362 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1363 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1364 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1365}
1366
e2b5e7aa 1367static void emit_signextend16(int rs,int rt)
b9b61529 1368{
332a4533 1369 #ifndef HAVE_ARMV6
b9b61529 1370 emit_shlimm(rs,16,rt);
1371 emit_sarimm(rt,16,rt);
1372 #else
1373 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1374 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1375 #endif
1376}
1377
e2b5e7aa 1378static void emit_signextend8(int rs,int rt)
c6c3b1b3 1379{
332a4533 1380 #ifndef HAVE_ARMV6
c6c3b1b3 1381 emit_shlimm(rs,24,rt);
1382 emit_sarimm(rt,24,rt);
1383 #else
1384 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1385 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1386 #endif
1387}
1388
e2b5e7aa 1389static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1390{
1391 assert(rs<16);
1392 assert(rt<16);
1393 assert(shift<16);
1394 //if(imm==1) ...
1395 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1397}
e2b5e7aa 1398
1399static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1400{
1401 assert(rs<16);
1402 assert(rt<16);
1403 assert(shift<16);
1404 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1405 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1406}
e2b5e7aa 1407
1408static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1409{
1410 assert(rs<16);
1411 assert(rt<16);
1412 assert(shift<16);
1413 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1414 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1415}
57871462 1416
e2b5e7aa 1417static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1418{
1419 assert(rs<16);
1420 assert(rt<16);
1421 assert(shift<16);
1422 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1423 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1424}
e2b5e7aa 1425
1426static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1427{
1428 assert(rs<16);
1429 assert(rt<16);
1430 assert(shift<16);
1431 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1432 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1433}
1434
e2b5e7aa 1435static void emit_cmpimm(int rs,int imm)
57871462 1436{
1437 u_int armval;
1438 if(genimm(imm,&armval)) {
5a05d80c 1439 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1440 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1441 }else if(genimm(-imm,&armval)) {
5a05d80c 1442 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1443 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1444 }else if(imm>0) {
1445 assert(imm<65536);
57871462 1446 emit_movimm(imm,HOST_TEMPREG);
57871462 1447 assem_debug("cmp %s,r14\n",regname[rs]);
1448 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1449 }else{
1450 assert(imm>-65536);
57871462 1451 emit_movimm(-imm,HOST_TEMPREG);
57871462 1452 assem_debug("cmn %s,r14\n",regname[rs]);
1453 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1454 }
1455}
1456
e2b5e7aa 1457static void emit_cmovne_imm(int imm,int rt)
57871462 1458{
1459 assem_debug("movne %s,#%d\n",regname[rt],imm);
1460 u_int armval;
cfbd3c6e 1461 genimm_checked(imm,&armval);
57871462 1462 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1463}
e2b5e7aa 1464
1465static void emit_cmovl_imm(int imm,int rt)
57871462 1466{
1467 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1468 u_int armval;
cfbd3c6e 1469 genimm_checked(imm,&armval);
57871462 1470 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1471}
e2b5e7aa 1472
1473static void emit_cmovb_imm(int imm,int rt)
57871462 1474{
1475 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1476 u_int armval;
cfbd3c6e 1477 genimm_checked(imm,&armval);
57871462 1478 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1479}
e2b5e7aa 1480
1481static void emit_cmovs_imm(int imm,int rt)
57871462 1482{
1483 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1484 u_int armval;
cfbd3c6e 1485 genimm_checked(imm,&armval);
57871462 1486 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1487}
e2b5e7aa 1488
1489static void emit_cmove_reg(int rs,int rt)
57871462 1490{
1491 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1492 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1493}
e2b5e7aa 1494
1495static void emit_cmovne_reg(int rs,int rt)
57871462 1496{
1497 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1498 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1499}
e2b5e7aa 1500
1501static void emit_cmovl_reg(int rs,int rt)
57871462 1502{
1503 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1504 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1505}
e2b5e7aa 1506
1507static void emit_cmovs_reg(int rs,int rt)
57871462 1508{
1509 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1510 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1511}
1512
e2b5e7aa 1513static void emit_slti32(int rs,int imm,int rt)
57871462 1514{
1515 if(rs!=rt) emit_zeroreg(rt);
1516 emit_cmpimm(rs,imm);
1517 if(rs==rt) emit_movimm(0,rt);
1518 emit_cmovl_imm(1,rt);
1519}
e2b5e7aa 1520
1521static void emit_sltiu32(int rs,int imm,int rt)
57871462 1522{
1523 if(rs!=rt) emit_zeroreg(rt);
1524 emit_cmpimm(rs,imm);
1525 if(rs==rt) emit_movimm(0,rt);
1526 emit_cmovb_imm(1,rt);
1527}
e2b5e7aa 1528
1529static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1530{
1531 assert(rsh!=rt);
1532 emit_slti32(rsl,imm,rt);
1533 if(imm>=0)
1534 {
1535 emit_test(rsh,rsh);
1536 emit_cmovne_imm(0,rt);
1537 emit_cmovs_imm(1,rt);
1538 }
1539 else
1540 {
1541 emit_cmpimm(rsh,-1);
1542 emit_cmovne_imm(0,rt);
1543 emit_cmovl_imm(1,rt);
1544 }
1545}
e2b5e7aa 1546
1547static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1548{
1549 assert(rsh!=rt);
1550 emit_sltiu32(rsl,imm,rt);
1551 if(imm>=0)
1552 {
1553 emit_test(rsh,rsh);
1554 emit_cmovne_imm(0,rt);
1555 }
1556 else
1557 {
1558 emit_cmpimm(rsh,-1);
1559 emit_cmovne_imm(1,rt);
1560 }
1561}
1562
e2b5e7aa 1563static void emit_cmp(int rs,int rt)
57871462 1564{
1565 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1566 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1567}
e2b5e7aa 1568
1569static void emit_set_gz32(int rs, int rt)
57871462 1570{
1571 //assem_debug("set_gz32\n");
1572 emit_cmpimm(rs,1);
1573 emit_movimm(1,rt);
1574 emit_cmovl_imm(0,rt);
1575}
e2b5e7aa 1576
1577static void emit_set_nz32(int rs, int rt)
57871462 1578{
1579 //assem_debug("set_nz32\n");
1580 if(rs!=rt) emit_movs(rs,rt);
1581 else emit_test(rs,rs);
1582 emit_cmovne_imm(1,rt);
1583}
e2b5e7aa 1584
1585static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1586{
1587 //assem_debug("set_gz64\n");
1588 emit_set_gz32(rsl,rt);
1589 emit_test(rsh,rsh);
1590 emit_cmovne_imm(1,rt);
1591 emit_cmovs_imm(0,rt);
1592}
e2b5e7aa 1593
1594static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1595{
1596 //assem_debug("set_nz64\n");
1597 emit_or_and_set_flags(rsh,rsl,rt);
1598 emit_cmovne_imm(1,rt);
1599}
e2b5e7aa 1600
1601static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1602{
1603 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1604 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1605 emit_cmp(rs1,rs2);
1606 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1607 emit_cmovl_imm(1,rt);
1608}
e2b5e7aa 1609
1610static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1611{
1612 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1613 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1614 emit_cmp(rs1,rs2);
1615 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1616 emit_cmovb_imm(1,rt);
1617}
e2b5e7aa 1618
1619static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1620{
1621 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1622 assert(u1!=rt);
1623 assert(u2!=rt);
1624 emit_cmp(l1,l2);
1625 emit_movimm(0,rt);
1626 emit_sbcs(u1,u2,HOST_TEMPREG);
1627 emit_cmovl_imm(1,rt);
1628}
e2b5e7aa 1629
1630static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1631{
1632 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1633 assert(u1!=rt);
1634 assert(u2!=rt);
1635 emit_cmp(l1,l2);
1636 emit_movimm(0,rt);
1637 emit_sbcs(u1,u2,HOST_TEMPREG);
1638 emit_cmovb_imm(1,rt);
1639}
1640
e2b5e7aa 1641static void emit_call(int a)
57871462 1642{
1643 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1644 u_int offset=genjmp(a);
1645 output_w32(0xeb000000|offset);
1646}
e2b5e7aa 1647
1648static void emit_jmp(int a)
57871462 1649{
1650 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1651 u_int offset=genjmp(a);
1652 output_w32(0xea000000|offset);
1653}
e2b5e7aa 1654
1655static void emit_jne(int a)
57871462 1656{
1657 assem_debug("bne %x\n",a);
1658 u_int offset=genjmp(a);
1659 output_w32(0x1a000000|offset);
1660}
e2b5e7aa 1661
1662static void emit_jeq(int a)
57871462 1663{
1664 assem_debug("beq %x\n",a);
1665 u_int offset=genjmp(a);
1666 output_w32(0x0a000000|offset);
1667}
e2b5e7aa 1668
1669static void emit_js(int a)
57871462 1670{
1671 assem_debug("bmi %x\n",a);
1672 u_int offset=genjmp(a);
1673 output_w32(0x4a000000|offset);
1674}
e2b5e7aa 1675
1676static void emit_jns(int a)
57871462 1677{
1678 assem_debug("bpl %x\n",a);
1679 u_int offset=genjmp(a);
1680 output_w32(0x5a000000|offset);
1681}
e2b5e7aa 1682
1683static void emit_jl(int a)
57871462 1684{
1685 assem_debug("blt %x\n",a);
1686 u_int offset=genjmp(a);
1687 output_w32(0xba000000|offset);
1688}
e2b5e7aa 1689
1690static void emit_jge(int a)
57871462 1691{
1692 assem_debug("bge %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0xaa000000|offset);
1695}
e2b5e7aa 1696
1697static void emit_jno(int a)
57871462 1698{
1699 assem_debug("bvc %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0x7a000000|offset);
1702}
e2b5e7aa 1703
1704static void emit_jc(int a)
57871462 1705{
1706 assem_debug("bcs %x\n",a);
1707 u_int offset=genjmp(a);
1708 output_w32(0x2a000000|offset);
1709}
e2b5e7aa 1710
1711static void emit_jcc(int a)
57871462 1712{
1713 assem_debug("bcc %x\n",a);
1714 u_int offset=genjmp(a);
1715 output_w32(0x3a000000|offset);
1716}
1717
e2b5e7aa 1718static void emit_callreg(u_int r)
57871462 1719{
c6c3b1b3 1720 assert(r<15);
1721 assem_debug("blx %s\n",regname[r]);
1722 output_w32(0xe12fff30|r);
57871462 1723}
e2b5e7aa 1724
1725static void emit_jmpreg(u_int r)
57871462 1726{
1727 assem_debug("mov pc,%s\n",regname[r]);
1728 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1729}
1730
e2b5e7aa 1731static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1732{
1733 assert(offset>-4096&&offset<4096);
1734 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1735 if(offset>=0) {
1736 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1737 }else{
1738 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1739 }
1740}
e2b5e7aa 1741
1742static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1743{
1744 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1745 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1746}
e2b5e7aa 1747
1748static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1749{
1750 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1751 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1752}
e2b5e7aa 1753
1754static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1755{
1756 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1757 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1758}
e2b5e7aa 1759
1760static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1761{
1762 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1763 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1764}
e2b5e7aa 1765
1766static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1767{
1768 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1769 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1770}
e2b5e7aa 1771
1772static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1773{
1774 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1775 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1776}
e2b5e7aa 1777
1778static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1779{
1780 if(map<0) emit_readword_indexed(addr, rs, rt);
1781 else {
1782 assert(addr==0);
1783 emit_readword_dualindexedx4(rs, map, rt);
1784 }
1785}
e2b5e7aa 1786
1787static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1788{
1789 if(map<0) {
1790 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1791 emit_readword_indexed(addr+4, rs, rl);
1792 }else{
1793 assert(rh!=rs);
1794 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1795 emit_addimm(map,1,map);
1796 emit_readword_indexed_tlb(addr, rs, map, rl);
1797 }
1798}
e2b5e7aa 1799
1800static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1801{
1802 assert(offset>-256&&offset<256);
1803 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1804 if(offset>=0) {
1805 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1806 }else{
1807 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1808 }
1809}
e2b5e7aa 1810
1811static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1812{
1813 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1814 else {
1815 if(addr==0) {
1816 emit_shlimm(map,2,map);
1817 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1818 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1819 }else{
1820 assert(addr>-256&&addr<256);
1821 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1822 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1823 emit_movsbl_indexed(addr, rt, rt);
1824 }
1825 }
1826}
e2b5e7aa 1827
1828static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1829{
1830 assert(offset>-256&&offset<256);
1831 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1832 if(offset>=0) {
1833 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1834 }else{
1835 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1836 }
1837}
e2b5e7aa 1838
1839static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1840{
1841 assert(offset>-4096&&offset<4096);
1842 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1843 if(offset>=0) {
1844 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1845 }else{
1846 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1847 }
1848}
e2b5e7aa 1849
1850static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1851{
1852 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1853 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1854}
e2b5e7aa 1855
1856static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1857{
1858 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1859 else {
1860 if(addr==0) {
1861 emit_movzbl_dualindexedx4(rs, map, rt);
1862 }else{
1863 emit_addimm(rs,addr,rt);
1864 emit_movzbl_dualindexedx4(rt, map, rt);
1865 }
1866 }
1867}
e2b5e7aa 1868
1869static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1870{
1871 assert(offset>-256&&offset<256);
1872 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1873 if(offset>=0) {
1874 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1875 }else{
1876 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1877 }
1878}
e2b5e7aa 1879
054175e9 1880static void emit_ldrd(int offset, int rs, int rt)
1881{
1882 assert(offset>-256&&offset<256);
1883 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1884 if(offset>=0) {
1885 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1886 }else{
1887 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1888 }
1889}
e2b5e7aa 1890
1891static void emit_readword(int addr, int rt)
57871462 1892{
1893 u_int offset = addr-(u_int)&dynarec_local;
1894 assert(offset<4096);
1895 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1896 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1897}
e2b5e7aa 1898
1899static unused void emit_movsbl(int addr, int rt)
57871462 1900{
1901 u_int offset = addr-(u_int)&dynarec_local;
1902 assert(offset<256);
1903 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1904 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1905}
e2b5e7aa 1906
1907static unused void emit_movswl(int addr, int rt)
57871462 1908{
1909 u_int offset = addr-(u_int)&dynarec_local;
1910 assert(offset<256);
1911 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1912 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1913}
e2b5e7aa 1914
1915static unused void emit_movzbl(int addr, int rt)
57871462 1916{
1917 u_int offset = addr-(u_int)&dynarec_local;
1918 assert(offset<4096);
1919 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1920 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1921}
e2b5e7aa 1922
1923static unused void emit_movzwl(int addr, int rt)
57871462 1924{
1925 u_int offset = addr-(u_int)&dynarec_local;
1926 assert(offset<256);
1927 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1928 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1929}
57871462 1930
e2b5e7aa 1931static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1932{
1933 assert(offset>-4096&&offset<4096);
1934 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1935 if(offset>=0) {
1936 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1937 }else{
1938 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1939 }
1940}
e2b5e7aa 1941
1942static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 1943{
1944 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1945 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1946}
e2b5e7aa 1947
1948static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 1949{
1950 if(map<0) emit_writeword_indexed(rt, addr, rs);
1951 else {
1952 assert(addr==0);
1953 emit_writeword_dualindexedx4(rt, rs, map);
1954 }
1955}
e2b5e7aa 1956
1957static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 1958{
1959 if(map<0) {
1960 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1961 emit_writeword_indexed(rl, addr+4, rs);
1962 }else{
1963 assert(rh>=0);
1964 if(temp!=rs) emit_addimm(map,1,temp);
1965 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1966 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1967 else {
1968 emit_addimm(rs,4,rs);
1969 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1970 }
1971 }
1972}
e2b5e7aa 1973
1974static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1975{
1976 assert(offset>-256&&offset<256);
1977 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1978 if(offset>=0) {
1979 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1980 }else{
1981 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1982 }
1983}
e2b5e7aa 1984
1985static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1986{
1987 assert(offset>-4096&&offset<4096);
1988 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1989 if(offset>=0) {
1990 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1991 }else{
1992 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1993 }
1994}
e2b5e7aa 1995
1996static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 1997{
1998 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1999 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2000}
e2b5e7aa 2001
2002static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2003{
2004 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2005 else {
2006 if(addr==0) {
2007 emit_writebyte_dualindexedx4(rt, rs, map);
2008 }else{
2009 emit_addimm(rs,addr,temp);
2010 emit_writebyte_dualindexedx4(rt, temp, map);
2011 }
2012 }
2013}
e2b5e7aa 2014
2015static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2016{
2017 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2018 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2019}
e2b5e7aa 2020
2021static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2022{
2023 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2024 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2025}
e2b5e7aa 2026
2027static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2028{
2029 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2030 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2031}
e2b5e7aa 2032
2033static void emit_writeword(int rt, int addr)
57871462 2034{
2035 u_int offset = addr-(u_int)&dynarec_local;
2036 assert(offset<4096);
2037 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2038 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2039}
e2b5e7aa 2040
2041static unused void emit_writehword(int rt, int addr)
57871462 2042{
2043 u_int offset = addr-(u_int)&dynarec_local;
2044 assert(offset<256);
2045 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2046 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2047}
e2b5e7aa 2048
2049static unused void emit_writebyte(int rt, int addr)
57871462 2050{
2051 u_int offset = addr-(u_int)&dynarec_local;
2052 assert(offset<4096);
74426039 2053 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2054 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2055}
57871462 2056
e2b5e7aa 2057static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2058{
2059 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2060 assert(rs1<16);
2061 assert(rs2<16);
2062 assert(hi<16);
2063 assert(lo<16);
2064 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2065}
e2b5e7aa 2066
2067static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2068{
2069 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2070 assert(rs1<16);
2071 assert(rs2<16);
2072 assert(hi<16);
2073 assert(lo<16);
2074 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2075}
2076
e2b5e7aa 2077static void emit_clz(int rs,int rt)
57871462 2078{
2079 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2080 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2081}
2082
e2b5e7aa 2083static void emit_subcs(int rs1,int rs2,int rt)
57871462 2084{
2085 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2086 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2087}
2088
e2b5e7aa 2089static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2090{
2091 assert(imm>0);
2092 assert(imm<32);
2093 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2094 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2095}
2096
e2b5e7aa 2097static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2098{
2099 assert(imm>0);
2100 assert(imm<32);
2101 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2102 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2103}
2104
e2b5e7aa 2105static void emit_negmi(int rs, int rt)
57871462 2106{
2107 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2108 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2109}
2110
e2b5e7aa 2111static void emit_negsmi(int rs, int rt)
57871462 2112{
2113 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2114 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2115}
2116
e2b5e7aa 2117static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2118{
2119 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2120 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2121}
2122
e2b5e7aa 2123static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2124{
2125 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2126 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2127}
2128
e2b5e7aa 2129static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2130{
2131 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2132 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2133}
2134
e2b5e7aa 2135static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2136{
2137 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2138 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2139}
2140
e2b5e7aa 2141static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2142{
2143 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2144 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2145}
2146
e2b5e7aa 2147static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2148{
2149 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2150 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2151}
2152
e2b5e7aa 2153static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2154{
2155 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2156 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2157}
2158
e2b5e7aa 2159static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2160{
2161 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2162 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2163}
2164
e2b5e7aa 2165static void emit_teq(int rs, int rt)
57871462 2166{
2167 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2168 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2169}
2170
e2b5e7aa 2171static void emit_rsbimm(int rs, int imm, int rt)
57871462 2172{
2173 u_int armval;
cfbd3c6e 2174 genimm_checked(imm,&armval);
57871462 2175 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2176 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2177}
2178
2179// Load 2 immediates optimizing for small code size
e2b5e7aa 2180static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2181{
2182 emit_movimm(imm1,rt1);
2183 u_int armval;
2184 if(genimm(imm2-imm1,&armval)) {
2185 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2186 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2187 }else if(genimm(imm1-imm2,&armval)) {
2188 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2189 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2190 }
2191 else emit_movimm(imm2,rt2);
2192}
2193
2194// Conditionally select one of two immediates, optimizing for small code size
2195// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2196static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2197{
2198 u_int armval;
2199 if(genimm(imm2-imm1,&armval)) {
2200 emit_movimm(imm1,rt);
2201 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2202 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2203 }else if(genimm(imm1-imm2,&armval)) {
2204 emit_movimm(imm1,rt);
2205 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2206 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2207 }
2208 else {
665f33e1 2209 #ifndef HAVE_ARMV7
57871462 2210 emit_movimm(imm1,rt);
2211 add_literal((int)out,imm2);
2212 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2213 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2214 #else
2215 emit_movw(imm1&0x0000FFFF,rt);
2216 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2217 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2218 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2219 }
2220 emit_movt(imm1&0xFFFF0000,rt);
2221 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2222 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2223 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2224 }
2225 #endif
2226 }
2227}
2228
57871462 2229// special case for checking invalid_code
e2b5e7aa 2230static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2231{
2232 assert(imm<128&&imm>=0);
2233 assert(r>=0&&r<16);
2234 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2235 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2236 emit_cmpimm(HOST_TEMPREG,imm);
2237}
2238
e2b5e7aa 2239static void emit_callne(int a)
0bbd1454 2240{
2241 assem_debug("blne %x\n",a);
2242 u_int offset=genjmp(a);
2243 output_w32(0x1b000000|offset);
2244}
2245
57871462 2246// Used to preload hash table entries
e2b5e7aa 2247static unused void emit_prefetchreg(int r)
57871462 2248{
2249 assem_debug("pld %s\n",regname[r]);
2250 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2251}
2252
2253// Special case for mini_ht
e2b5e7aa 2254static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2255{
2256 assert(offset<4096);
2257 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2258 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2259}
2260
e2b5e7aa 2261static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2262{
2263 u_int armval;
cfbd3c6e 2264 genimm_checked(imm,&armval);
57871462 2265 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2266 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2267}
2268
e2b5e7aa 2269static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2270{
2271 u_int armval;
cfbd3c6e 2272 genimm_checked(imm,&armval);
57871462 2273 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2274 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2275}
2276
e2b5e7aa 2277static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2278{
2279 u_int armval;
cfbd3c6e 2280 genimm_checked(imm,&armval);
57871462 2281 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2282 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2283}
2284
e2b5e7aa 2285static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2286{
2287 u_int armval;
cfbd3c6e 2288 genimm_checked(imm,&armval);
57871462 2289 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2290 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2291}
2292
e2b5e7aa 2293static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2294{
2295 u_int armval;
cfbd3c6e 2296 genimm_checked(imm,&armval);
57871462 2297 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2298 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2299}
2300
e2b5e7aa 2301static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2302{
2303 u_int armval;
cfbd3c6e 2304 genimm_checked(imm,&armval);
b9b61529 2305 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2306 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2307}
2308
e2b5e7aa 2309static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2310{
2311 u_int armval;
cfbd3c6e 2312 genimm_checked(imm,&armval);
b9b61529 2313 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2314 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2315}
2316
e2b5e7aa 2317static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2318{
2319 u_int armval;
2320 genimm_checked(imm,&armval);
2321 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2322 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2323}
2324
e2b5e7aa 2325static void emit_jno_unlikely(int a)
57871462 2326{
2327 //emit_jno(a);
2328 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2329 output_w32(0x72800000|rd_rn_rm(15,15,0));
2330}
2331
054175e9 2332static void save_regs_all(u_int reglist)
57871462 2333{
054175e9 2334 int i;
57871462 2335 if(!reglist) return;
2336 assem_debug("stmia fp,{");
054175e9 2337 for(i=0;i<16;i++)
2338 if(reglist&(1<<i))
2339 assem_debug("r%d,",i);
57871462 2340 assem_debug("}\n");
2341 output_w32(0xe88b0000|reglist);
2342}
e2b5e7aa 2343
054175e9 2344static void restore_regs_all(u_int reglist)
57871462 2345{
054175e9 2346 int i;
57871462 2347 if(!reglist) return;
2348 assem_debug("ldmia fp,{");
054175e9 2349 for(i=0;i<16;i++)
2350 if(reglist&(1<<i))
2351 assem_debug("r%d,",i);
57871462 2352 assem_debug("}\n");
2353 output_w32(0xe89b0000|reglist);
2354}
e2b5e7aa 2355
054175e9 2356// Save registers before function call
2357static void save_regs(u_int reglist)
2358{
4d646738 2359 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2360 save_regs_all(reglist);
2361}
e2b5e7aa 2362
054175e9 2363// Restore registers after function call
2364static void restore_regs(u_int reglist)
2365{
4d646738 2366 reglist&=CALLER_SAVE_REGS;
054175e9 2367 restore_regs_all(reglist);
2368}
57871462 2369
57871462 2370/* Stubs/epilogue */
2371
e2b5e7aa 2372static void literal_pool(int n)
57871462 2373{
2374 if(!literalcount) return;
2375 if(n) {
2376 if((int)out-literals[0][0]<4096-n) return;
2377 }
2378 u_int *ptr;
2379 int i;
2380 for(i=0;i<literalcount;i++)
2381 {
77750690 2382 u_int l_addr=(u_int)out;
2383 int j;
2384 for(j=0;j<i;j++) {
2385 if(literals[j][1]==literals[i][1]) {
2386 //printf("dup %08x\n",literals[i][1]);
2387 l_addr=literals[j][0];
2388 break;
2389 }
2390 }
57871462 2391 ptr=(u_int *)literals[i][0];
77750690 2392 u_int offset=l_addr-(u_int)ptr-8;
57871462 2393 assert(offset<4096);
2394 assert(!(offset&3));
2395 *ptr|=offset;
77750690 2396 if(l_addr==(u_int)out) {
2397 literals[i][0]=l_addr; // remember for dupes
2398 output_w32(literals[i][1]);
2399 }
57871462 2400 }
2401 literalcount=0;
2402}
2403
e2b5e7aa 2404static void literal_pool_jumpover(int n)
57871462 2405{
2406 if(!literalcount) return;
2407 if(n) {
2408 if((int)out-literals[0][0]<4096-n) return;
2409 }
2410 int jaddr=(int)out;
2411 emit_jmp(0);
2412 literal_pool(0);
2413 set_jump_target(jaddr,(int)out);
2414}
2415
e2b5e7aa 2416static void emit_extjump2(u_int addr, int target, int linker)
57871462 2417{
2418 u_char *ptr=(u_char *)addr;
2419 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2420 (void)ptr;
2421
57871462 2422 emit_loadlp(target,0);
2423 emit_loadlp(addr,1);
24385cae 2424 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2425 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2426//DEBUG >
2427#ifdef DEBUG_CYCLE_COUNT
2428 emit_readword((int)&last_count,ECX);
2429 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2430 emit_readword((int)&next_interupt,ECX);
2431 emit_writeword(HOST_CCREG,(int)&Count);
2432 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2433 emit_writeword(ECX,(int)&last_count);
2434#endif
2435//DEBUG <
2436 emit_jmp(linker);
2437}
2438
e2b5e7aa 2439static void emit_extjump(int addr, int target)
57871462 2440{
2441 emit_extjump2(addr, target, (int)dyna_linker);
2442}
e2b5e7aa 2443
2444static void emit_extjump_ds(int addr, int target)
57871462 2445{
2446 emit_extjump2(addr, target, (int)dyna_linker_ds);
2447}
2448
13e35c04 2449// put rt_val into rt, potentially making use of rs with value rs_val
2450static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2451{
8575a877 2452 u_int armval;
2453 int diff;
2454 if(genimm(rt_val,&armval)) {
2455 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2456 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2457 return;
2458 }
2459 if(genimm(~rt_val,&armval)) {
2460 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2461 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2462 return;
2463 }
2464 diff=rt_val-rs_val;
2465 if(genimm(diff,&armval)) {
2466 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2467 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2468 return;
2469 }else if(genimm(-diff,&armval)) {
2470 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2471 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2472 return;
2473 }
2474 emit_movimm(rt_val,rt);
2475}
2476
2477// return 1 if above function can do it's job cheaply
2478static int is_similar_value(u_int v1,u_int v2)
2479{
13e35c04 2480 u_int xs;
8575a877 2481 int diff;
2482 if(v1==v2) return 1;
2483 diff=v2-v1;
2484 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2485 ;
8575a877 2486 if(xs<0x100) return 1;
2487 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2488 ;
2489 if(xs<0x100) return 1;
2490 return 0;
13e35c04 2491}
cbbab9cd 2492
b96d3df7 2493// trashes r2
2494static void pass_args(int a0, int a1)
2495{
2496 if(a0==1&&a1==0) {
2497 // must swap
2498 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2499 }
2500 else if(a0!=0&&a1==0) {
2501 emit_mov(a1,1);
2502 if (a0>=0) emit_mov(a0,0);
2503 }
2504 else {
2505 if(a0>=0&&a0!=0) emit_mov(a0,0);
2506 if(a1>=0&&a1!=1) emit_mov(a1,1);
2507 }
2508}
2509
b1be1eee 2510static void mov_loadtype_adj(int type,int rs,int rt)
2511{
2512 switch(type) {
2513 case LOADB_STUB: emit_signextend8(rs,rt); break;
2514 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2515 case LOADH_STUB: emit_signextend16(rs,rt); break;
2516 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2517 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2518 default: assert(0);
2519 }
2520}
2521
b1be1eee 2522#include "pcsxmem.h"
2523#include "pcsxmem_inline.c"
b1be1eee 2524
e2b5e7aa 2525static void do_readstub(int n)
57871462 2526{
2527 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2528 literal_pool(256);
2529 set_jump_target(stubs[n][1],(int)out);
2530 int type=stubs[n][0];
2531 int i=stubs[n][3];
2532 int rs=stubs[n][4];
2533 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2534 u_int reglist=stubs[n][7];
2535 signed char *i_regmap=i_regs->regmap;
581335b0 2536 int rt;
b9b61529 2537 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2538 rt=get_reg(i_regmap,FTEMP);
2539 }else{
57871462 2540 rt=get_reg(i_regmap,rt1[i]);
2541 }
2542 assert(rs>=0);
c6c3b1b3 2543 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2544 reglist|=(1<<rs);
2545 for(r=0;r<=12;r++) {
2546 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2547 temp=r; break;
2548 }
2549 }
db829eeb 2550 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2551 reglist&=~(1<<rt);
2552 if(temp==-1) {
2553 save_regs(reglist);
2554 regs_saved=1;
2555 temp=(rs==0)?2:0;
2556 }
2557 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2558 temp2=1;
2559 emit_readword((int)&mem_rtab,temp);
2560 emit_shrimm(rs,12,temp2);
2561 emit_readword_dualindexedx4(temp,temp2,temp2);
2562 emit_lsls_imm(temp2,1,temp2);
2563 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2564 switch(type) {
2565 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2566 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2567 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2568 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2569 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2570 }
2571 }
2572 if(regs_saved) {
2573 restore_jump=(int)out;
2574 emit_jcc(0); // jump to reg restore
2575 }
2576 else
2577 emit_jcc(stubs[n][2]); // return address
2578
2579 if(!regs_saved)
2580 save_regs(reglist);
2581 int handler=0;
2582 if(type==LOADB_STUB||type==LOADBU_STUB)
2583 handler=(int)jump_handler_read8;
2584 if(type==LOADH_STUB||type==LOADHU_STUB)
2585 handler=(int)jump_handler_read16;
2586 if(type==LOADW_STUB)
2587 handler=(int)jump_handler_read32;
2588 assert(handler!=0);
b96d3df7 2589 pass_args(rs,temp2);
c6c3b1b3 2590 int cc=get_reg(i_regmap,CCREG);
2591 if(cc<0)
2592 emit_loadreg(CCREG,2);
2573466a 2593 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2594 emit_call(handler);
2595 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2596 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2597 }
2598 if(restore_jump)
2599 set_jump_target(restore_jump,(int)out);
2600 restore_regs(reglist);
2601 emit_jmp(stubs[n][2]); // return address
57871462 2602}
2603
c6c3b1b3 2604// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2605static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2606{
2607 u_int l1,l2=0;
2608 l1=((u_int *)table)[addr>>12];
2609 if((l1&(1<<31))==0) {
2610 u_int v=l1<<1;
2611 *addr_host=v+addr;
2612 return 0;
2613 }
2614 else {
2615 l1<<=1;
2616 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2617 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2618 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2619 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2620 else
2621 l2=((u_int *)l1)[(addr&0xfff)/4];
2622 if((l2&(1<<31))==0) {
2623 u_int v=l2<<1;
2624 *addr_host=v+(addr&0xfff);
2625 return 0;
2626 }
2627 return l2<<1;
2628 }
2629}
c6c3b1b3 2630
e2b5e7aa 2631static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2632{
2633 int rs=get_reg(regmap,target);
57871462 2634 int rt=get_reg(regmap,target);
535d208a 2635 if(rs<0) rs=get_reg(regmap,-1);
57871462 2636 assert(rs>=0);
b1be1eee 2637 u_int handler,host_addr=0,is_dynamic,far_call=0;
2638 int cc=get_reg(regmap,CCREG);
2639 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2640 return;
c6c3b1b3 2641 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2642 if (handler==0) {
db829eeb 2643 if(rt<0||rt1[i]==0)
c6c3b1b3 2644 return;
13e35c04 2645 if(addr!=host_addr)
2646 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2647 switch(type) {
2648 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2649 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2650 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2651 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2652 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2653 default: assert(0);
2654 }
2655 return;
2656 }
b1be1eee 2657 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2658 if(is_dynamic) {
2659 if(type==LOADB_STUB||type==LOADBU_STUB)
2660 handler=(int)jump_handler_read8;
2661 if(type==LOADH_STUB||type==LOADHU_STUB)
2662 handler=(int)jump_handler_read16;
2663 if(type==LOADW_STUB)
2664 handler=(int)jump_handler_read32;
2665 }
c6c3b1b3 2666
2667 // call a memhandler
db829eeb 2668 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2669 reglist&=~(1<<rt);
2670 save_regs(reglist);
2671 if(target==0)
2672 emit_movimm(addr,0);
2673 else if(rs!=0)
2674 emit_mov(rs,0);
c6c3b1b3 2675 int offset=(int)handler-(int)out-8;
2676 if(offset<-33554432||offset>=33554432) {
2677 // unreachable memhandler, a plugin func perhaps
b1be1eee 2678 emit_movimm(handler,12);
2679 far_call=1;
2680 }
2681 if(cc<0)
2682 emit_loadreg(CCREG,2);
2683 if(is_dynamic) {
2684 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2685 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2686 }
b1be1eee 2687 else {
2688 emit_readword((int)&last_count,3);
2689 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2690 emit_add(2,3,2);
2691 emit_writeword(2,(int)&Count);
2692 }
2693
2694 if(far_call)
2695 emit_callreg(12);
c6c3b1b3 2696 else
2697 emit_call(handler);
b1be1eee 2698
db829eeb 2699 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2700 switch(type) {
2701 case LOADB_STUB: emit_signextend8(0,rt); break;
2702 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2703 case LOADH_STUB: emit_signextend16(0,rt); break;
2704 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2705 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2706 default: assert(0);
2707 }
2708 }
2709 restore_regs(reglist);
57871462 2710}
2711
e2b5e7aa 2712static void do_writestub(int n)
57871462 2713{
2714 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2715 literal_pool(256);
2716 set_jump_target(stubs[n][1],(int)out);
2717 int type=stubs[n][0];
2718 int i=stubs[n][3];
2719 int rs=stubs[n][4];
2720 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2721 u_int reglist=stubs[n][7];
2722 signed char *i_regmap=i_regs->regmap;
581335b0 2723 int rt,r;
b9b61529 2724 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2725 rt=get_reg(i_regmap,r=FTEMP);
2726 }else{
57871462 2727 rt=get_reg(i_regmap,r=rs2[i]);
2728 }
2729 assert(rs>=0);
2730 assert(rt>=0);
b96d3df7 2731 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2732 int reglist2=reglist|(1<<rs)|(1<<rt);
2733 for(rtmp=0;rtmp<=12;rtmp++) {
2734 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2735 temp=rtmp; break;
2736 }
2737 }
2738 if(temp==-1) {
2739 save_regs(reglist);
2740 regs_saved=1;
2741 for(rtmp=0;rtmp<=3;rtmp++)
2742 if(rtmp!=rs&&rtmp!=rt)
2743 {temp=rtmp;break;}
2744 }
2745 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2746 temp2=3;
2747 emit_readword((int)&mem_wtab,temp);
2748 emit_shrimm(rs,12,temp2);
2749 emit_readword_dualindexedx4(temp,temp2,temp2);
2750 emit_lsls_imm(temp2,1,temp2);
2751 switch(type) {
2752 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2753 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2754 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2755 default: assert(0);
2756 }
2757 if(regs_saved) {
2758 restore_jump=(int)out;
2759 emit_jcc(0); // jump to reg restore
2760 }
2761 else
2762 emit_jcc(stubs[n][2]); // return address (invcode check)
2763
2764 if(!regs_saved)
2765 save_regs(reglist);
2766 int handler=0;
2767 switch(type) {
2768 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2769 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2770 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2771 }
2772 assert(handler!=0);
2773 pass_args(rs,rt);
2774 if(temp2!=3)
2775 emit_mov(temp2,3);
2776 int cc=get_reg(i_regmap,CCREG);
2777 if(cc<0)
2778 emit_loadreg(CCREG,2);
2573466a 2779 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2780 // returns new cycle_count
2781 emit_call(handler);
2573466a 2782 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2783 if(cc<0)
2784 emit_storereg(CCREG,2);
2785 if(restore_jump)
2786 set_jump_target(restore_jump,(int)out);
2787 restore_regs(reglist);
2788 ra=stubs[n][2];
b96d3df7 2789 emit_jmp(ra);
57871462 2790}
2791
e2b5e7aa 2792static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2793{
2794 int rs=get_reg(regmap,-1);
57871462 2795 int rt=get_reg(regmap,target);
2796 assert(rs>=0);
2797 assert(rt>=0);
b96d3df7 2798 u_int handler,host_addr=0;
b96d3df7 2799 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2800 if (handler==0) {
13e35c04 2801 if(addr!=host_addr)
2802 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2803 switch(type) {
2804 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2805 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2806 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2807 default: assert(0);
2808 }
2809 return;
2810 }
2811
2812 // call a memhandler
2813 save_regs(reglist);
13e35c04 2814 pass_args(rs,rt);
b96d3df7 2815 int cc=get_reg(regmap,CCREG);
2816 if(cc<0)
2817 emit_loadreg(CCREG,2);
2573466a 2818 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2819 emit_movimm(handler,3);
2820 // returns new cycle_count
2821 emit_call((int)jump_handler_write_h);
2573466a 2822 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2823 if(cc<0)
2824 emit_storereg(CCREG,2);
2825 restore_regs(reglist);
57871462 2826}
2827
e2b5e7aa 2828static void do_unalignedwritestub(int n)
57871462 2829{
b7918751 2830 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2831 literal_pool(256);
57871462 2832 set_jump_target(stubs[n][1],(int)out);
b7918751 2833
2834 int i=stubs[n][3];
2835 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2836 int addr=stubs[n][5];
2837 u_int reglist=stubs[n][7];
2838 signed char *i_regmap=i_regs->regmap;
2839 int temp2=get_reg(i_regmap,FTEMP);
2840 int rt;
b7918751 2841 rt=get_reg(i_regmap,rs2[i]);
2842 assert(rt>=0);
2843 assert(addr>=0);
2844 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2845 reglist|=(1<<addr);
2846 reglist&=~(1<<temp2);
2847
b96d3df7 2848#if 1
2849 // don't bother with it and call write handler
2850 save_regs(reglist);
2851 pass_args(addr,rt);
2852 int cc=get_reg(i_regmap,CCREG);
2853 if(cc<0)
2854 emit_loadreg(CCREG,2);
2573466a 2855 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2856 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2857 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2858 if(cc<0)
2859 emit_storereg(CCREG,2);
2860 restore_regs(reglist);
2861 emit_jmp(stubs[n][2]); // return address
2862#else
b7918751 2863 emit_andimm(addr,0xfffffffc,temp2);
2864 emit_writeword(temp2,(int)&address);
2865
2866 save_regs(reglist);
b7918751 2867 emit_shrimm(addr,16,1);
2868 int cc=get_reg(i_regmap,CCREG);
2869 if(cc<0) {
2870 emit_loadreg(CCREG,2);
2871 }
2872 emit_movimm((u_int)readmem,0);
2873 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2874 emit_call((int)&indirect_jump_indexed);
2875 restore_regs(reglist);
2876
2877 emit_readword((int)&readmem_dword,temp2);
2878 int temp=addr; //hmh
2879 emit_shlimm(addr,3,temp);
2880 emit_andimm(temp,24,temp);
2881#ifdef BIG_ENDIAN_MIPS
2882 if (opcode[i]==0x2e) // SWR
2883#else
2884 if (opcode[i]==0x2a) // SWL
2885#endif
2886 emit_xorimm(temp,24,temp);
2887 emit_movimm(-1,HOST_TEMPREG);
55439448 2888 if (opcode[i]==0x2a) { // SWL
b7918751 2889 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2890 emit_orrshr(rt,temp,temp2);
2891 }else{
2892 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2893 emit_orrshl(rt,temp,temp2);
2894 }
2895 emit_readword((int)&address,addr);
2896 emit_writeword(temp2,(int)&word);
2897 //save_regs(reglist); // don't need to, no state changes
2898 emit_shrimm(addr,16,1);
2899 emit_movimm((u_int)writemem,0);
2900 //emit_call((int)&indirect_jump_indexed);
2901 emit_mov(15,14);
2902 emit_readword_dualindexedx4(0,1,15);
2903 emit_readword((int)&Count,HOST_TEMPREG);
2904 emit_readword((int)&next_interupt,2);
2905 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2906 emit_writeword(2,(int)&last_count);
2907 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2908 if(cc<0) {
2909 emit_storereg(CCREG,HOST_TEMPREG);
2910 }
2911 restore_regs(reglist);
57871462 2912 emit_jmp(stubs[n][2]); // return address
b96d3df7 2913#endif
57871462 2914}
2915
e2b5e7aa 2916static void do_invstub(int n)
57871462 2917{
2918 literal_pool(20);
2919 u_int reglist=stubs[n][3];
2920 set_jump_target(stubs[n][1],(int)out);
2921 save_regs(reglist);
2922 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2923 emit_call((int)&invalidate_addr);
2924 restore_regs(reglist);
2925 emit_jmp(stubs[n][2]); // return address
2926}
2927
2928int do_dirty_stub(int i)
2929{
2930 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2931 u_int addr=(u_int)source;
57871462 2932 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2933 #ifndef HAVE_ARMV7
ac545b3a 2934 emit_loadlp(addr,1);
57871462 2935 emit_loadlp((int)copy,2);
2936 emit_loadlp(slen*4,3);
2937 #else
ac545b3a 2938 emit_movw(addr&0x0000FFFF,1);
57871462 2939 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2940 emit_movt(addr&0xFFFF0000,1);
57871462 2941 emit_movt(((u_int)copy)&0xFFFF0000,2);
2942 emit_movw(slen*4,3);
2943 #endif
2944 emit_movimm(start+i*4,0);
2945 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2946 int entry=(int)out;
2947 load_regs_entry(i);
2948 if(entry==(int)out) entry=instr_addr[i];
2949 emit_jmp(instr_addr[i]);
2950 return entry;
2951}
2952
e2b5e7aa 2953static void do_dirty_stub_ds()
57871462 2954{
2955 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2956 #ifndef HAVE_ARMV7
57871462 2957 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2958 emit_loadlp((int)copy,2);
2959 emit_loadlp(slen*4,3);
2960 #else
2961 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2962 emit_movw(((u_int)copy)&0x0000FFFF,2);
2963 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2964 emit_movt(((u_int)copy)&0xFFFF0000,2);
2965 emit_movw(slen*4,3);
2966 #endif
2967 emit_movimm(start+1,0);
2968 emit_call((int)&verify_code_ds);
2969}
2970
e2b5e7aa 2971static void do_cop1stub(int n)
57871462 2972{
2973 literal_pool(256);
2974 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2975 set_jump_target(stubs[n][1],(int)out);
2976 int i=stubs[n][3];
3d624f89 2977// int rs=stubs[n][4];
57871462 2978 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2979 int ds=stubs[n][6];
2980 if(!ds) {
2981 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2982 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2983 }
2984 //else {printf("fp exception in delay slot\n");}
2985 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2986 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2987 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2988 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 2989 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2990}
2991
57871462 2992/* Special assem */
2993
e2b5e7aa 2994static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 2995{
2996 if(rt1[i]) {
2997 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2998 {
2999 signed char s,t,shift;
3000 t=get_reg(i_regs->regmap,rt1[i]);
3001 s=get_reg(i_regs->regmap,rs1[i]);
3002 shift=get_reg(i_regs->regmap,rs2[i]);
3003 if(t>=0){
3004 if(rs1[i]==0)
3005 {
3006 emit_zeroreg(t);
3007 }
3008 else if(rs2[i]==0)
3009 {
3010 assert(s>=0);
3011 if(s!=t) emit_mov(s,t);
3012 }
3013 else
3014 {
3015 emit_andimm(shift,31,HOST_TEMPREG);
3016 if(opcode2[i]==4) // SLLV
3017 {
3018 emit_shl(s,HOST_TEMPREG,t);
3019 }
3020 if(opcode2[i]==6) // SRLV
3021 {
3022 emit_shr(s,HOST_TEMPREG,t);
3023 }
3024 if(opcode2[i]==7) // SRAV
3025 {
3026 emit_sar(s,HOST_TEMPREG,t);
3027 }
3028 }
3029 }
3030 } else { // DSLLV/DSRLV/DSRAV
3031 signed char sh,sl,th,tl,shift;
3032 th=get_reg(i_regs->regmap,rt1[i]|64);
3033 tl=get_reg(i_regs->regmap,rt1[i]);
3034 sh=get_reg(i_regs->regmap,rs1[i]|64);
3035 sl=get_reg(i_regs->regmap,rs1[i]);
3036 shift=get_reg(i_regs->regmap,rs2[i]);
3037 if(tl>=0){
3038 if(rs1[i]==0)
3039 {
3040 emit_zeroreg(tl);
3041 if(th>=0) emit_zeroreg(th);
3042 }
3043 else if(rs2[i]==0)
3044 {
3045 assert(sl>=0);
3046 if(sl!=tl) emit_mov(sl,tl);
3047 if(th>=0&&sh!=th) emit_mov(sh,th);
3048 }
3049 else
3050 {
3051 // FIXME: What if shift==tl ?
3052 assert(shift!=tl);
3053 int temp=get_reg(i_regs->regmap,-1);
3054 int real_th=th;
3055 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3056 assert(sl>=0);
3057 assert(sh>=0);
3058 emit_andimm(shift,31,HOST_TEMPREG);
3059 if(opcode2[i]==0x14) // DSLLV
3060 {
3061 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3062 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3063 emit_orrshr(sl,HOST_TEMPREG,th);
3064 emit_andimm(shift,31,HOST_TEMPREG);
3065 emit_testimm(shift,32);
3066 emit_shl(sl,HOST_TEMPREG,tl);
3067 if(th>=0) emit_cmovne_reg(tl,th);
3068 emit_cmovne_imm(0,tl);
3069 }
3070 if(opcode2[i]==0x16) // DSRLV
3071 {
3072 assert(th>=0);
3073 emit_shr(sl,HOST_TEMPREG,tl);
3074 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3075 emit_orrshl(sh,HOST_TEMPREG,tl);
3076 emit_andimm(shift,31,HOST_TEMPREG);
3077 emit_testimm(shift,32);
3078 emit_shr(sh,HOST_TEMPREG,th);
3079 emit_cmovne_reg(th,tl);
3080 if(real_th>=0) emit_cmovne_imm(0,th);
3081 }
3082 if(opcode2[i]==0x17) // DSRAV
3083 {
3084 assert(th>=0);
3085 emit_shr(sl,HOST_TEMPREG,tl);
3086 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3087 if(real_th>=0) {
3088 assert(temp>=0);
3089 emit_sarimm(th,31,temp);
3090 }
3091 emit_orrshl(sh,HOST_TEMPREG,tl);
3092 emit_andimm(shift,31,HOST_TEMPREG);
3093 emit_testimm(shift,32);
3094 emit_sar(sh,HOST_TEMPREG,th);
3095 emit_cmovne_reg(th,tl);
3096 if(real_th>=0) emit_cmovne_reg(temp,th);
3097 }
3098 }
3099 }
3100 }
3101 }
3102}
ffb0b9e0 3103
ffb0b9e0 3104static void speculate_mov(int rs,int rt)
3105{
3106 if(rt!=0) {
3107 smrv_strong_next|=1<<rt;
3108 smrv[rt]=smrv[rs];
3109 }
3110}
3111
3112static void speculate_mov_weak(int rs,int rt)
3113{
3114 if(rt!=0) {
3115 smrv_weak_next|=1<<rt;
3116 smrv[rt]=smrv[rs];
3117 }
3118}
3119
3120static void speculate_register_values(int i)
3121{
3122 if(i==0) {
3123 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3124 // gp,sp are likely to stay the same throughout the block
3125 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3126 smrv_weak_next=~smrv_strong_next;
3127 //printf(" llr %08x\n", smrv[4]);
3128 }
3129 smrv_strong=smrv_strong_next;
3130 smrv_weak=smrv_weak_next;
3131 switch(itype[i]) {
3132 case ALU:
3133 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3134 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3135 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3136 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3137 else {
3138 smrv_strong_next&=~(1<<rt1[i]);
3139 smrv_weak_next&=~(1<<rt1[i]);
3140 }
3141 break;
3142 case SHIFTIMM:
3143 smrv_strong_next&=~(1<<rt1[i]);
3144 smrv_weak_next&=~(1<<rt1[i]);
3145 // fallthrough
3146 case IMM16:
3147 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3148 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3149 if(hr>=0) {
3150 if(get_final_value(hr,i,&value))
3151 smrv[rt1[i]]=value;
3152 else smrv[rt1[i]]=constmap[i][hr];
3153 smrv_strong_next|=1<<rt1[i];
3154 }
3155 }
3156 else {
3157 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3158 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3159 }
3160 break;
3161 case LOAD:
3162 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3163 // special case for BIOS
3164 smrv[rt1[i]]=0xa0000000;
3165 smrv_strong_next|=1<<rt1[i];
3166 break;
3167 }
3168 // fallthrough
3169 case SHIFT:
3170 case LOADLR:
3171 case MOV:
3172 smrv_strong_next&=~(1<<rt1[i]);
3173 smrv_weak_next&=~(1<<rt1[i]);
3174 break;
3175 case COP0:
3176 case COP2:
3177 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3178 smrv_strong_next&=~(1<<rt1[i]);
3179 smrv_weak_next&=~(1<<rt1[i]);
3180 }
3181 break;
3182 case C2LS:
3183 if (opcode[i]==0x32) { // LWC2
3184 smrv_strong_next&=~(1<<rt1[i]);
3185 smrv_weak_next&=~(1<<rt1[i]);
3186 }
3187 break;
3188 }
3189#if 0
3190 int r=4;
3191 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3192 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3193#endif
3194}
3195
3196enum {
3197 MTYPE_8000 = 0,
3198 MTYPE_8020,
3199 MTYPE_0000,
3200 MTYPE_A000,
3201 MTYPE_1F80,
3202};
3203
3204static int get_ptr_mem_type(u_int a)
3205{
3206 if(a < 0x00200000) {
3207 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3208 // return wrong, must use memhandler for BIOS self-test to pass
3209 // 007 does similar stuff from a00 mirror, weird stuff
3210 return MTYPE_8000;
3211 return MTYPE_0000;
3212 }
3213 if(0x1f800000 <= a && a < 0x1f801000)
3214 return MTYPE_1F80;
3215 if(0x80200000 <= a && a < 0x80800000)
3216 return MTYPE_8020;
3217 if(0xa0000000 <= a && a < 0xa0200000)
3218 return MTYPE_A000;
3219 return MTYPE_8000;
3220}
ffb0b9e0 3221
3222static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3223{
581335b0 3224 int jaddr=0,type=0;
ffb0b9e0 3225 int mr=rs1[i];
3226 if(((smrv_strong|smrv_weak)>>mr)&1) {
3227 type=get_ptr_mem_type(smrv[mr]);
3228 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3229 }
3230 else {
3231 // use the mirror we are running on
3232 type=get_ptr_mem_type(start);
3233 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3234 }
3235
3236 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3237 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3238 addr=*addr_reg_override=HOST_TEMPREG;
3239 type=0;
3240 }
3241 else if(type==MTYPE_0000) { // RAM 0 mirror
3242 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3243 addr=*addr_reg_override=HOST_TEMPREG;
3244 type=0;
3245 }
3246 else if(type==MTYPE_A000) { // RAM A mirror
3247 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3248 addr=*addr_reg_override=HOST_TEMPREG;
3249 type=0;
3250 }
3251 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3252 if (psxH == (void *)0x1f800000) {
3253 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3254 emit_cmpimm(HOST_TEMPREG,0x1000);
3255 jaddr=(int)out;
3256 emit_jc(0);
3257 }
3258 else {
3259 // do usual RAM check, jump will go to the right handler
3260 type=0;
3261 }
ffb0b9e0 3262 }
ffb0b9e0 3263
3264 if(type==0)
3265 {
3266 emit_cmpimm(addr,RAM_SIZE);
3267 jaddr=(int)out;
3268 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3269 // Hint to branch predictor that the branch is unlikely to be taken
3270 if(rs1[i]>=28)
3271 emit_jno_unlikely(0);
3272 else
3273 #endif
3274 emit_jno(0);
a327ad27 3275 if(ram_offset!=0) {
3276 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3277 addr=*addr_reg_override=HOST_TEMPREG;
3278 }
ffb0b9e0 3279 }
3280
3281 return jaddr;
3282}
3283
57871462 3284#define shift_assemble shift_assemble_arm
3285
e2b5e7aa 3286static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3287{
3288 int s,th,tl,temp,temp2,addr,map=-1;
3289 int offset;
3290 int jaddr=0;
af4ee1fe 3291 int memtarget=0,c=0;
ffb0b9e0 3292 int fastload_reg_override=0;
57871462 3293 u_int hr,reglist=0;
3294 th=get_reg(i_regs->regmap,rt1[i]|64);
3295 tl=get_reg(i_regs->regmap,rt1[i]);
3296 s=get_reg(i_regs->regmap,rs1[i]);
3297 temp=get_reg(i_regs->regmap,-1);
3298 temp2=get_reg(i_regs->regmap,FTEMP);
3299 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3300 assert(addr<0);
3301 offset=imm[i];
3302 for(hr=0;hr<HOST_REGS;hr++) {
3303 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3304 }
3305 reglist|=1<<temp;
3306 if(offset||s<0||c) addr=temp2;
3307 else addr=s;
3308 if(s>=0) {
3309 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3310 if(c) {
3311 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3312 }
57871462 3313 }
1edfcc68 3314 if(!c) {
3315 #ifdef RAM_OFFSET
3316 map=get_reg(i_regs->regmap,ROREG);
3317 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3318 #endif
3319 emit_shlimm(addr,3,temp);
3320 if (opcode[i]==0x22||opcode[i]==0x26) {
3321 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3322 }else{
3323 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3324 }
1edfcc68 3325 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3326 }
3327 else {
3328 if(ram_offset&&memtarget) {
3329 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3330 fastload_reg_override=HOST_TEMPREG;
57871462 3331 }
1edfcc68 3332 if (opcode[i]==0x22||opcode[i]==0x26) {
3333 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3334 }else{
1edfcc68 3335 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3336 }
535d208a 3337 }
3338 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3339 if(!c||memtarget) {
ffb0b9e0 3340 int a=temp2;
3341 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3342 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3343 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3344 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3345 }
3346 else
3347 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3348 if(rt1[i]) {
3349 assert(tl>=0);
57871462 3350 emit_andimm(temp,24,temp);
2002a1db 3351#ifdef BIG_ENDIAN_MIPS
3352 if (opcode[i]==0x26) // LWR
3353#else
3354 if (opcode[i]==0x22) // LWL
3355#endif
3356 emit_xorimm(temp,24,temp);
57871462 3357 emit_movimm(-1,HOST_TEMPREG);
3358 if (opcode[i]==0x26) {
3359 emit_shr(temp2,temp,temp2);
3360 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3361 }else{
3362 emit_shl(temp2,temp,temp2);
3363 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3364 }
3365 emit_or(temp2,tl,tl);
57871462 3366 }
535d208a 3367 //emit_storereg(rt1[i],tl); // DEBUG
3368 }
3369 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3370 // FIXME: little endian, fastload_reg_override
535d208a 3371 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3372 if(!c||memtarget) {
3373 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3374 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3375 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3376 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3377 }
3378 else
3379 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3380 if(rt1[i]) {
3381 assert(th>=0);
3382 assert(tl>=0);
57871462 3383 emit_testimm(temp,32);
3384 emit_andimm(temp,24,temp);
3385 if (opcode[i]==0x1A) { // LDL
3386 emit_rsbimm(temp,32,HOST_TEMPREG);
3387 emit_shl(temp2h,temp,temp2h);
3388 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3389 emit_movimm(-1,HOST_TEMPREG);
3390 emit_shl(temp2,temp,temp2);
3391 emit_cmove_reg(temp2h,th);
3392 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3393 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3394 emit_orreq(temp2,tl,tl);
3395 emit_orrne(temp2,th,th);
3396 }
3397 if (opcode[i]==0x1B) { // LDR
3398 emit_xorimm(temp,24,temp);
3399 emit_rsbimm(temp,32,HOST_TEMPREG);
3400 emit_shr(temp2,temp,temp2);
3401 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3402 emit_movimm(-1,HOST_TEMPREG);
3403 emit_shr(temp2h,temp,temp2h);
3404 emit_cmovne_reg(temp2,tl);
3405 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3406 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3407 emit_orrne(temp2h,th,th);
3408 emit_orreq(temp2h,tl,tl);
3409 }
3410 }
3411 }
3412}
3413#define loadlr_assemble loadlr_assemble_arm
3414
e2b5e7aa 3415static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3416{
3417 if(opcode2[i]==0) // MFC0
3418 {
3419 signed char t=get_reg(i_regs->regmap,rt1[i]);
3420 char copr=(source[i]>>11)&0x1f;
3421 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3422 if(t>=0&&rt1[i]!=0) {
7139f3c8 3423 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3424 }
3425 }
3426 else if(opcode2[i]==4) // MTC0
3427 {
3428 signed char s=get_reg(i_regs->regmap,rs1[i]);
3429 char copr=(source[i]>>11)&0x1f;
3430 assert(s>=0);
63cb0298 3431 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3432 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3433 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3434 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3435 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3436 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3437 emit_writeword(HOST_CCREG,(int)&Count);
3438 }
3439 // What a mess. The status register (12) can enable interrupts,
3440 // so needs a special case to handle a pending interrupt.
3441 // The interrupt must be taken immediately, because a subsequent
3442 // instruction might disable interrupts again.
7139f3c8 3443 if(copr==12||copr==13) {
fca1aef2 3444 if (is_delayslot) {
3445 // burn cycles to cause cc_interrupt, which will
3446 // reschedule next_interupt. Relies on CCREG from above.
3447 assem_debug("MTC0 DS %d\n", copr);
3448 emit_writeword(HOST_CCREG,(int)&last_count);
3449 emit_movimm(0,HOST_CCREG);
3450 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3451 emit_loadreg(rs1[i],1);
fca1aef2 3452 emit_movimm(copr,0);
3453 emit_call((int)pcsx_mtc0_ds);
042c7287 3454 emit_loadreg(rs1[i],s);
fca1aef2 3455 return;
3456 }
63cb0298 3457 emit_movimm(start+i*4+4,HOST_TEMPREG);
3458 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3459 emit_movimm(0,HOST_TEMPREG);
3460 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3461 }
3462 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3463 //else
caeefe31 3464 if(s==HOST_CCREG)
3465 emit_loadreg(rs1[i],1);
3466 else if(s!=1)
63cb0298 3467 emit_mov(s,1);
fca1aef2 3468 emit_movimm(copr,0);
3469 emit_call((int)pcsx_mtc0);
7139f3c8 3470 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3471 emit_readword((int)&Count,HOST_CCREG);
042c7287 3472 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3473 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3474 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3475 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3476 emit_storereg(CCREG,HOST_CCREG);
3477 }
7139f3c8 3478 if(copr==12||copr==13) {
57871462 3479 assert(!is_delayslot);
3480 emit_readword((int)&pending_exception,14);
042c7287 3481 emit_test(14,14);
3482 emit_jne((int)&do_interrupt);
57871462 3483 }
3484 emit_loadreg(rs1[i],s);
3485 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3486 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3487 cop1_usable=0;
3488 }
3489 else
3490 {
3491 assert(opcode2[i]==0x10);
576bbd8f 3492 if((source[i]&0x3f)==0x10) // RFE
3493 {
3494 emit_readword((int)&Status,0);
3495 emit_andimm(0,0x3c,1);
3496 emit_andimm(0,~0xf,0);
3497 emit_orrshr_imm(1,2,0);
3498 emit_writeword(0,(int)&Status);
3499 }
57871462 3500 }
3501}
3502
b9b61529 3503static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3504{
3505 switch (copr) {
3506 case 1:
3507 case 3:
3508 case 5:
3509 case 8:
3510 case 9:
3511 case 10:
3512 case 11:
3513 emit_readword((int)&reg_cop2d[copr],tl);
3514 emit_signextend16(tl,tl);
3515 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3516 break;
3517 case 7:
3518 case 16:
3519 case 17:
3520 case 18:
3521 case 19:
3522 emit_readword((int)&reg_cop2d[copr],tl);
3523 emit_andimm(tl,0xffff,tl);
3524 emit_writeword(tl,(int)&reg_cop2d[copr]);
3525 break;
3526 case 15:
3527 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3528 emit_writeword(tl,(int)&reg_cop2d[copr]);
3529 break;
3530 case 28:
b9b61529 3531 case 29:
3532 emit_readword((int)&reg_cop2d[9],temp);
3533 emit_testimm(temp,0x8000); // do we need this?
3534 emit_andimm(temp,0xf80,temp);
3535 emit_andne_imm(temp,0,temp);
f70d384d 3536 emit_shrimm(temp,7,tl);
b9b61529 3537 emit_readword((int)&reg_cop2d[10],temp);
3538 emit_testimm(temp,0x8000);
3539 emit_andimm(temp,0xf80,temp);
3540 emit_andne_imm(temp,0,temp);
f70d384d 3541 emit_orrshr_imm(temp,2,tl);
b9b61529 3542 emit_readword((int)&reg_cop2d[11],temp);
3543 emit_testimm(temp,0x8000);
3544 emit_andimm(temp,0xf80,temp);
3545 emit_andne_imm(temp,0,temp);
f70d384d 3546 emit_orrshl_imm(temp,3,tl);
b9b61529 3547 emit_writeword(tl,(int)&reg_cop2d[copr]);
3548 break;
3549 default:
3550 emit_readword((int)&reg_cop2d[copr],tl);
3551 break;
3552 }
3553}
3554
3555static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3556{
3557 switch (copr) {
3558 case 15:
3559 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3560 emit_writeword(sl,(int)&reg_cop2d[copr]);
3561 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3562 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3563 emit_writeword(sl,(int)&reg_cop2d[14]);
3564 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3565 break;
3566 case 28:
3567 emit_andimm(sl,0x001f,temp);
f70d384d 3568 emit_shlimm(temp,7,temp);
b9b61529 3569 emit_writeword(temp,(int)&reg_cop2d[9]);
3570 emit_andimm(sl,0x03e0,temp);
f70d384d 3571 emit_shlimm(temp,2,temp);
b9b61529 3572 emit_writeword(temp,(int)&reg_cop2d[10]);
3573 emit_andimm(sl,0x7c00,temp);
f70d384d 3574 emit_shrimm(temp,3,temp);
b9b61529 3575 emit_writeword(temp,(int)&reg_cop2d[11]);
3576 emit_writeword(sl,(int)&reg_cop2d[28]);
3577 break;
3578 case 30:
3579 emit_movs(sl,temp);
3580 emit_mvnmi(temp,temp);
665f33e1 3581#ifdef HAVE_ARMV5
b9b61529 3582 emit_clz(temp,temp);
665f33e1 3583#else
3584 emit_movs(temp,HOST_TEMPREG);
3585 emit_movimm(0,temp);
3586 emit_jeq((int)out+4*4);
3587 emit_addpl_imm(temp,1,temp);
3588 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3589 emit_jns((int)out-2*4);
3590#endif
b9b61529 3591 emit_writeword(sl,(int)&reg_cop2d[30]);
3592 emit_writeword(temp,(int)&reg_cop2d[31]);
3593 break;
b9b61529 3594 case 31:
3595 break;
3596 default:
3597 emit_writeword(sl,(int)&reg_cop2d[copr]);
3598 break;
3599 }
3600}
3601
e2b5e7aa 3602static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3603{
3604 u_int copr=(source[i]>>11)&0x1f;
3605 signed char temp=get_reg(i_regs->regmap,-1);
3606 if (opcode2[i]==0) { // MFC2
3607 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3608 if(tl>=0&&rt1[i]!=0)
b9b61529 3609 cop2_get_dreg(copr,tl,temp);
3610 }
3611 else if (opcode2[i]==4) { // MTC2
3612 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3613 cop2_put_dreg(copr,sl,temp);
3614 }
3615 else if (opcode2[i]==2) // CFC2
3616 {
3617 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3618 if(tl>=0&&rt1[i]!=0)
b9b61529 3619 emit_readword((int)&reg_cop2c[copr],tl);
3620 }
3621 else if (opcode2[i]==6) // CTC2
3622 {
3623 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3624 switch(copr) {
3625 case 4:
3626 case 12:
3627 case 20:
3628 case 26:
3629 case 27:
3630 case 29:
3631 case 30:
3632 emit_signextend16(sl,temp);
3633 break;
3634 case 31:
3635 //value = value & 0x7ffff000;
3636 //if (value & 0x7f87e000) value |= 0x80000000;
3637 emit_shrimm(sl,12,temp);
3638 emit_shlimm(temp,12,temp);
3639 emit_testimm(temp,0x7f000000);
3640 emit_testeqimm(temp,0x00870000);
3641 emit_testeqimm(temp,0x0000e000);
3642 emit_orrne_imm(temp,0x80000000,temp);
3643 break;
3644 default:
3645 temp=sl;
3646 break;
3647 }
3648 emit_writeword(temp,(int)&reg_cop2c[copr]);
3649 assert(sl>=0);
3650 }
3651}
3652
054175e9 3653static void c2op_prologue(u_int op,u_int reglist)
3654{
3655 save_regs_all(reglist);
82ed88eb 3656#ifdef PCNT
3657 emit_movimm(op,0);
3658 emit_call((int)pcnt_gte_start);
3659#endif
054175e9 3660 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3661}
3662
3663static void c2op_epilogue(u_int op,u_int reglist)
3664{
82ed88eb 3665#ifdef PCNT
3666 emit_movimm(op,0);
3667 emit_call((int)pcnt_gte_end);
3668#endif
054175e9 3669 restore_regs_all(reglist);
3670}
3671
6c0eefaf 3672static void c2op_call_MACtoIR(int lm,int need_flags)
3673{
3674 if(need_flags)
3675 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3676 else
3677 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3678}
3679
3680static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3681{
3682 emit_call((int)func);
3683 // func is C code and trashes r0
3684 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3685 if(need_flags||need_ir)
3686 c2op_call_MACtoIR(lm,need_flags);
3687 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3688}
3689
054175e9 3690static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3691{
b9b61529 3692 u_int c2op=source[i]&0x3f;
6c0eefaf 3693 u_int hr,reglist_full=0,reglist;
054175e9 3694 int need_flags,need_ir;
b9b61529 3695 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3696 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3697 }
4d646738 3698 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3699
3700 if (gte_handlers[c2op]!=NULL) {
bedfea38 3701 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3702 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3703 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3704 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3705 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3706 need_flags=0;
6c0eefaf 3707 int shift = (source[i] >> 19) & 1;
3708 int lm = (source[i] >> 10) & 1;
054175e9 3709 switch(c2op) {
19776aef 3710#ifndef DRC_DBG
054175e9 3711 case GTE_MVMVA: {
82336ba3 3712#ifdef HAVE_ARMV5
054175e9 3713 int v = (source[i] >> 15) & 3;
3714 int cv = (source[i] >> 13) & 3;
3715 int mx = (source[i] >> 17) & 3;
4d646738 3716 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3717 c2op_prologue(c2op,reglist);
3718 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3719 if(v<3)
3720 emit_ldrd(v*8,0,4);
3721 else {
3722 emit_movzwl_indexed(9*4,0,4); // gteIR
3723 emit_movzwl_indexed(10*4,0,6);
3724 emit_movzwl_indexed(11*4,0,5);
3725 emit_orrshl_imm(6,16,4);
3726 }
3727 if(mx<3)
3728 emit_addimm(0,32*4+mx*8*4,6);
3729 else
3730 emit_readword((int)&zeromem_ptr,6);
3731 if(cv<3)
3732 emit_addimm(0,32*4+(cv*8+5)*4,7);
3733 else
3734 emit_readword((int)&zeromem_ptr,7);
3735#ifdef __ARM_NEON__
3736 emit_movimm(source[i],1); // opcode
3737 emit_call((int)gteMVMVA_part_neon);
3738 if(need_flags) {
3739 emit_movimm(lm,1);
3740 emit_call((int)gteMACtoIR_flags_neon);
3741 }
3742#else
3743 if(cv==3&&shift)
3744 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3745 else {
3746 emit_movimm(shift,1);
3747 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3748 }
6c0eefaf 3749 if(need_flags||need_ir)
3750 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3751#endif
3752#else /* if not HAVE_ARMV5 */
3753 c2op_prologue(c2op,reglist);
3754 emit_movimm(source[i],1); // opcode
3755 emit_writeword(1,(int)&psxRegs.code);
3756 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3757#endif
3758 break;
3759 }
6c0eefaf 3760 case GTE_OP:
3761 c2op_prologue(c2op,reglist);
3762 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3763 if(need_flags||need_ir) {
3764 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3765 c2op_call_MACtoIR(lm,need_flags);
3766 }
3767 break;
3768 case GTE_DPCS:
3769 c2op_prologue(c2op,reglist);
3770 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3771 break;
3772 case GTE_INTPL:
3773 c2op_prologue(c2op,reglist);
3774 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3775 break;
3776 case GTE_SQR:
3777 c2op_prologue(c2op,reglist);
3778 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3779 if(need_flags||need_ir) {
3780 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3781 c2op_call_MACtoIR(lm,need_flags);
3782 }
3783 break;
3784 case GTE_DCPL:
3785 c2op_prologue(c2op,reglist);
3786 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3787 break;
3788 case GTE_GPF:
3789 c2op_prologue(c2op,reglist);
3790 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3791 break;
3792 case GTE_GPL:
3793 c2op_prologue(c2op,reglist);
3794 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3795 break;
19776aef 3796#endif
054175e9 3797 default:
054175e9 3798 c2op_prologue(c2op,reglist);
19776aef 3799#ifdef DRC_DBG
3800 emit_movimm(source[i],1); // opcode
3801 emit_writeword(1,(int)&psxRegs.code);
3802#endif
054175e9 3803 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3804 break;
3805 }
3806 c2op_epilogue(c2op,reglist);
3807 }
b9b61529 3808}
3809
e2b5e7aa 3810static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3811{
3812 // XXX: should just just do the exception instead
3813 if(!cop1_usable) {
3814 int jaddr=(int)out;
3815 emit_jmp(0);
3816 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3817 cop1_usable=1;
3818 }
3819}
3820
e2b5e7aa 3821static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3822{
3d624f89 3823 cop1_unusable(i, i_regs);
57871462 3824}
3825
e2b5e7aa 3826static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3827{
3d624f89 3828 cop1_unusable(i, i_regs);
57871462 3829}
3830#define fconv_assemble fconv_assemble_arm
3831
e2b5e7aa 3832static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3833{
3d624f89 3834 cop1_unusable(i, i_regs);
57871462 3835}
3836
e2b5e7aa 3837static void float_assemble(int i,struct regstat *i_regs)
57871462 3838{
3d624f89 3839 cop1_unusable(i, i_regs);
57871462 3840}
3841
e2b5e7aa 3842static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3843{
3844 // case 0x18: MULT
3845 // case 0x19: MULTU
3846 // case 0x1A: DIV
3847 // case 0x1B: DIVU
3848 // case 0x1C: DMULT
3849 // case 0x1D: DMULTU
3850 // case 0x1E: DDIV
3851 // case 0x1F: DDIVU
3852 if(rs1[i]&&rs2[i])
3853 {
3854 if((opcode2[i]&4)==0) // 32-bit
3855 {
3856 if(opcode2[i]==0x18) // MULT
3857 {
3858 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3859 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3860 signed char hi=get_reg(i_regs->regmap,HIREG);
3861 signed char lo=get_reg(i_regs->regmap,LOREG);
3862 assert(m1>=0);
3863 assert(m2>=0);
3864 assert(hi>=0);
3865 assert(lo>=0);
3866 emit_smull(m1,m2,hi,lo);
3867 }
3868 if(opcode2[i]==0x19) // MULTU
3869 {
3870 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3871 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3872 signed char hi=get_reg(i_regs->regmap,HIREG);
3873 signed char lo=get_reg(i_regs->regmap,LOREG);
3874 assert(m1>=0);
3875 assert(m2>=0);
3876 assert(hi>=0);
3877 assert(lo>=0);
3878 emit_umull(m1,m2,hi,lo);
3879 }
3880 if(opcode2[i]==0x1A) // DIV
3881 {
3882 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3883 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3884 assert(d1>=0);
3885 assert(d2>=0);
3886 signed char quotient=get_reg(i_regs->regmap,LOREG);
3887 signed char remainder=get_reg(i_regs->regmap,HIREG);
3888 assert(quotient>=0);
3889 assert(remainder>=0);
3890 emit_movs(d1,remainder);
44a80f6a 3891 emit_movimm(0xffffffff,quotient);
3892 emit_negmi(quotient,quotient); // .. quotient and ..
3893 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3894 emit_movs(d2,HOST_TEMPREG);
3895 emit_jeq((int)out+52); // Division by zero
82336ba3 3896 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3897#ifdef HAVE_ARMV5
57871462 3898 emit_clz(HOST_TEMPREG,quotient);
3899 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3900#else
3901 emit_movimm(0,quotient);
3902 emit_addpl_imm(quotient,1,quotient);
3903 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3904 emit_jns((int)out-2*4);
3905#endif
57871462 3906 emit_orimm(quotient,1<<31,quotient);
3907 emit_shr(quotient,quotient,quotient);
3908 emit_cmp(remainder,HOST_TEMPREG);
3909 emit_subcs(remainder,HOST_TEMPREG,remainder);
3910 emit_adcs(quotient,quotient,quotient);
3911 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3912 emit_jcc((int)out-16); // -4
3913 emit_teq(d1,d2);
3914 emit_negmi(quotient,quotient);
3915 emit_test(d1,d1);
3916 emit_negmi(remainder,remainder);
3917 }
3918 if(opcode2[i]==0x1B) // DIVU
3919 {
3920 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3921 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3922 assert(d1>=0);
3923 assert(d2>=0);
3924 signed char quotient=get_reg(i_regs->regmap,LOREG);
3925 signed char remainder=get_reg(i_regs->regmap,HIREG);
3926 assert(quotient>=0);
3927 assert(remainder>=0);
44a80f6a 3928 emit_mov(d1,remainder);
3929 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3930 emit_test(d2,d2);
44a80f6a 3931 emit_jeq((int)out+40); // Division by zero
665f33e1 3932#ifdef HAVE_ARMV5
57871462 3933 emit_clz(d2,HOST_TEMPREG);
3934 emit_movimm(1<<31,quotient);
3935 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3936#else
3937 emit_movimm(0,HOST_TEMPREG);
82336ba3 3938 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3939 emit_lslpls_imm(d2,1,d2);
665f33e1 3940 emit_jns((int)out-2*4);
3941 emit_movimm(1<<31,quotient);
3942#endif
57871462 3943 emit_shr(quotient,HOST_TEMPREG,quotient);
3944 emit_cmp(remainder,d2);
3945 emit_subcs(remainder,d2,remainder);
3946 emit_adcs(quotient,quotient,quotient);
3947 emit_shrcc_imm(d2,1,d2);
3948 emit_jcc((int)out-16); // -4
3949 }
3950 }
3951 else // 64-bit
71e490c5 3952 assert(0);
57871462 3953 }
3954 else
3955 {
3956 // Multiply by zero is zero.
3957 // MIPS does not have a divide by zero exception.
3958 // The result is undefined, we return zero.
3959 signed char hr=get_reg(i_regs->regmap,HIREG);
3960 signed char lr=get_reg(i_regs->regmap,LOREG);
3961 if(hr>=0) emit_zeroreg(hr);
3962 if(lr>=0) emit_zeroreg(lr);
3963 }
3964}
3965#define multdiv_assemble multdiv_assemble_arm
3966
e2b5e7aa 3967static void do_preload_rhash(int r) {
57871462 3968 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3969 // register. On ARM the hash can be done with a single instruction (below)
3970}
3971
e2b5e7aa 3972static void do_preload_rhtbl(int ht) {
57871462 3973 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3974}
3975
e2b5e7aa 3976static void do_rhash(int rs,int rh) {
57871462 3977 emit_andimm(rs,0xf8,rh);
3978}
3979
e2b5e7aa 3980static void do_miniht_load(int ht,int rh) {
57871462 3981 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3982 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3983}
3984
e2b5e7aa 3985static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3986 emit_cmp(rh,rs);
3987 emit_ldreq_indexed(ht,4,15);
3988 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3989 emit_mov(rs,7);
3990 emit_jmp(jump_vaddr_reg[7]);
3991 #else
3992 emit_jmp(jump_vaddr_reg[rs]);
3993 #endif
3994}
3995
e2b5e7aa 3996static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3997 #ifndef HAVE_ARMV7
57871462 3998 emit_movimm(return_address,rt); // PC into link register
3999 add_to_linker((int)out,return_address,1);
4000 emit_pcreladdr(temp);
4001 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4002 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4003 #else
4004 emit_movw(return_address&0x0000FFFF,rt);
4005 add_to_linker((int)out,return_address,1);
4006 emit_pcreladdr(temp);
4007 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4008 emit_movt(return_address&0xFFFF0000,rt);
4009 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4010 #endif
4011}
4012
e2b5e7aa 4013static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4014{
4015 //if(dirty_pre==dirty) return;
581335b0 4016 int hr,reg;
57871462 4017 for(hr=0;hr<HOST_REGS;hr++) {
4018 if(hr!=EXCLUDE_REG) {
4019 reg=pre[hr];
4020 if(((~u)>>(reg&63))&1) {
f776eb14 4021 if(reg>0) {
57871462 4022 if(((dirty_pre&~dirty)>>hr)&1) {
4023 if(reg>0&&reg<34) {
4024 emit_storereg(reg,hr);
4025 if( ((is32_pre&~uu)>>reg)&1 ) {
4026 emit_sarimm(hr,31,HOST_TEMPREG);
4027 emit_storereg(reg|64,HOST_TEMPREG);
4028 }
4029 }
4030 else if(reg>=64) {
4031 emit_storereg(reg,hr);
4032 }
4033 }
4034 }
57871462 4035 }
4036 }
4037 }
4038}
4039
4040
4041/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4042static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4043{
4044 int hr;
4045 int wrote=-1;
4046 for(hr=HOST_REGS-1;hr>=0;hr--) {
4047 if(hr!=EXCLUDE_REG) {
4048 if(pre[hr]!=entry[hr]) {
4049 if(pre[hr]>=0) {
4050 if((dirty>>hr)&1) {
4051 if(get_reg(entry,pre[hr])<0) {
4052 if(pre[hr]<64) {
4053 if(!((u>>pre[hr])&1)) {
4054 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4055 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4056 emit_sarimm(hr,31,hr+1);
4057 emit_strdreg(pre[hr],hr);
4058 }
4059 else
4060 emit_storereg(pre[hr],hr);
4061 }else{
4062 emit_storereg(pre[hr],hr);
4063 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4064 emit_sarimm(hr,31,hr);
4065 emit_storereg(pre[hr]|64,hr);
4066 }
4067 }
4068 }
4069 }else{
4070 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4071 emit_storereg(pre[hr],hr);
4072 }
4073 }
4074 wrote=hr;
4075 }
4076 }
4077 }
4078 }
4079 }
4080 }
4081 for(hr=0;hr<HOST_REGS;hr++) {
4082 if(hr!=EXCLUDE_REG) {
4083 if(pre[hr]!=entry[hr]) {
4084 if(pre[hr]>=0) {
4085 int nr;
4086 if((nr=get_reg(entry,pre[hr]))>=0) {
4087 emit_mov(hr,nr);
4088 }
4089 }
4090 }
4091 }
4092 }
4093}
4094#define wb_invalidate wb_invalidate_arm
4095*/
4096
dd3a91a1 4097// Clearing the cache is rather slow on ARM Linux, so mark the areas
4098// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4099static void do_clear_cache()
dd3a91a1 4100{
4101 int i,j;
4102 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4103 {
4104 u_int bitmap=needs_clear_cache[i];
4105 if(bitmap) {
4106 u_int start,end;
9f51b4b9 4107 for(j=0;j<32;j++)
dd3a91a1 4108 {
4109 if(bitmap&(1<<j)) {
bdeade46 4110 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4111 end=start+4095;
4112 j++;
4113 while(j<32) {
4114 if(bitmap&(1<<j)) {
4115 end+=4096;
4116 j++;
4117 }else{
4118 __clear_cache((void *)start,(void *)end);
4119 break;
4120 }
4121 }
4122 }
4123 }
4124 needs_clear_cache[i]=0;
4125 }
4126 }
4127}
4128
57871462 4129// CPU-architecture-specific initialization
71e490c5 4130static void arch_init() {
57871462 4131}
b9b61529 4132
4133// vim:shiftwidth=2:expandtab