drc: some vita and 3ds support
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33char *translation_cache;
34#else
bdeade46 35char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
57871462 46extern int cycle_count;
47extern int last_count;
48extern int pcaddr;
49extern int pending_exception;
50extern int branch_target;
51extern uint64_t readmem_dword;
57871462 52extern void *dynarec_local;
57871462 53extern u_int mini_ht[32][2];
57871462 54
55void indirect_jump_indexed();
56void indirect_jump();
57void do_interrupt();
58void jump_vaddr_r0();
59void jump_vaddr_r1();
60void jump_vaddr_r2();
61void jump_vaddr_r3();
62void jump_vaddr_r4();
63void jump_vaddr_r5();
64void jump_vaddr_r6();
65void jump_vaddr_r7();
66void jump_vaddr_r8();
67void jump_vaddr_r9();
68void jump_vaddr_r10();
69void jump_vaddr_r12();
70
71const u_int jump_vaddr_reg[16] = {
72 (int)jump_vaddr_r0,
73 (int)jump_vaddr_r1,
74 (int)jump_vaddr_r2,
75 (int)jump_vaddr_r3,
76 (int)jump_vaddr_r4,
77 (int)jump_vaddr_r5,
78 (int)jump_vaddr_r6,
79 (int)jump_vaddr_r7,
80 (int)jump_vaddr_r8,
81 (int)jump_vaddr_r9,
82 (int)jump_vaddr_r10,
83 0,
84 (int)jump_vaddr_r12,
85 0,
86 0,
87 0};
88
0bbd1454 89void invalidate_addr_r0();
90void invalidate_addr_r1();
91void invalidate_addr_r2();
92void invalidate_addr_r3();
93void invalidate_addr_r4();
94void invalidate_addr_r5();
95void invalidate_addr_r6();
96void invalidate_addr_r7();
97void invalidate_addr_r8();
98void invalidate_addr_r9();
99void invalidate_addr_r10();
100void invalidate_addr_r12();
101
102const u_int invalidate_addr_reg[16] = {
103 (int)invalidate_addr_r0,
104 (int)invalidate_addr_r1,
105 (int)invalidate_addr_r2,
106 (int)invalidate_addr_r3,
107 (int)invalidate_addr_r4,
108 (int)invalidate_addr_r5,
109 (int)invalidate_addr_r6,
110 (int)invalidate_addr_r7,
111 (int)invalidate_addr_r8,
112 (int)invalidate_addr_r9,
113 (int)invalidate_addr_r10,
114 0,
115 (int)invalidate_addr_r12,
116 0,
117 0,
118 0};
119
d148d265 120static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 121
57871462 122/* Linker */
123
e2b5e7aa 124static void set_jump_target(int addr,u_int target)
57871462 125{
126 u_char *ptr=(u_char *)addr;
127 u_int *ptr2=(u_int *)ptr;
128 if(ptr[3]==0xe2) {
129 assert((target-(u_int)ptr2-8)<1024);
130 assert((addr&3)==0);
131 assert((target&3)==0);
132 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
133 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
134 }
135 else if(ptr[3]==0x72) {
136 // generated by emit_jno_unlikely
137 if((target-(u_int)ptr2-8)<1024) {
138 assert((addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 }
142 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
143 assert((addr&3)==0);
144 assert((target&3)==0);
145 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
146 }
147 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
148 }
149 else {
150 assert((ptr[3]&0x0e)==0xa);
151 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
152 }
153}
154
155// This optionally copies the instruction from the target of the branch into
156// the space before the branch. Works, but the difference in speed is
157// usually insignificant.
e2b5e7aa 158#if 0
159static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 160{
161 u_char *ptr=(u_char *)addr;
162 u_int *ptr2=(u_int *)ptr;
163 assert(!copy||ptr2[-1]==0xe28dd000);
164 if(ptr[3]==0xe2) {
165 assert(!copy);
166 assert((target-(u_int)ptr2-8)<4096);
167 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
168 }
169 else {
170 assert((ptr[3]&0x0e)==0xa);
171 u_int target_insn=*(u_int *)target;
172 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
173 copy=0;
174 }
175 if((target_insn&0x0c100000)==0x04100000) { // Load
176 copy=0;
177 }
178 if(target_insn&0x08000000) {
179 copy=0;
180 }
181 if(copy) {
182 ptr2[-1]=target_insn;
183 target+=4;
184 }
185 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
186 }
187}
e2b5e7aa 188#endif
57871462 189
190/* Literal pool */
e2b5e7aa 191static void add_literal(int addr,int val)
57871462 192{
15776b68 193 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 194 literals[literalcount][0]=addr;
195 literals[literalcount][1]=val;
9f51b4b9 196 literalcount++;
197}
57871462 198
d148d265 199// from a pointer to external jump stub (which was produced by emit_extjump2)
200// find where the jumping insn is
201static void *find_extjump_insn(void *stub)
57871462 202{
203 int *ptr=(int *)(stub+4);
d148d265 204 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 205 u_int offset=*ptr&0xfff;
d148d265 206 void **l_ptr=(void *)ptr+offset+8;
207 return *l_ptr;
57871462 208}
209
f968d35d 210// find where external branch is liked to using addr of it's stub:
211// get address that insn one after stub loads (dyna_linker arg1),
212// treat it as a pointer to branch insn,
213// return addr where that branch jumps to
e2b5e7aa 214static int get_pointer(void *stub)
57871462 215{
216 //printf("get_pointer(%x)\n",(int)stub);
d148d265 217 int *i_ptr=find_extjump_insn(stub);
57871462 218 assert((*i_ptr&0x0f000000)==0x0a000000);
219 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
220}
221
222// Find the "clean" entry point from a "dirty" entry point
223// by skipping past the call to verify_code
e2b5e7aa 224static u_int get_clean_addr(int addr)
57871462 225{
226 int *ptr=(int *)addr;
665f33e1 227 #ifndef HAVE_ARMV7
57871462 228 ptr+=4;
229 #else
230 ptr+=6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
234 ptr++;
235 if((*ptr&0xFF000000)==0xea000000) {
236 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
237 }
238 return (u_int)ptr;
239}
240
e2b5e7aa 241static int verify_dirty(u_int *ptr)
57871462 242{
665f33e1 243 #ifndef HAVE_ARMV7
57871462 244 // get from literal pool
15776b68 245 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 246 u_int offset=*ptr&0xfff;
247 u_int *l_ptr=(void *)ptr+offset+8;
248 u_int source=l_ptr[0];
249 u_int copy=l_ptr[1];
250 u_int len=l_ptr[2];
251 ptr+=4;
252 #else
253 // ARMv7 movw/movt
254 assert((*ptr&0xFFF00000)==0xe3000000);
255 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
256 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
257 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
258 ptr+=6;
259 #endif
260 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
261 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 262 //printf("verify_dirty: %x %x %x\n",source,copy,len);
263 return !memcmp((void *)source,(void *)copy,len);
264}
265
266// This doesn't necessarily find all clean entry points, just
267// guarantees that it's not dirty
e2b5e7aa 268static int isclean(int addr)
57871462 269{
665f33e1 270 #ifndef HAVE_ARMV7
581335b0 271 u_int *ptr=((u_int *)addr)+4;
57871462 272 #else
581335b0 273 u_int *ptr=((u_int *)addr)+6;
57871462 274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
277 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
278 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
279 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
280 return 1;
281}
282
4a35de07 283// get source that block at addr was compiled from (host pointers)
e2b5e7aa 284static void get_bounds(int addr,u_int *start,u_int *end)
57871462 285{
286 u_int *ptr=(u_int *)addr;
665f33e1 287 #ifndef HAVE_ARMV7
57871462 288 // get from literal pool
15776b68 289 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 290 u_int offset=*ptr&0xfff;
291 u_int *l_ptr=(void *)ptr+offset+8;
292 u_int source=l_ptr[0];
293 //u_int copy=l_ptr[1];
294 u_int len=l_ptr[2];
295 ptr+=4;
296 #else
297 // ARMv7 movw/movt
298 assert((*ptr&0xFFF00000)==0xe3000000);
299 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
300 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
301 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
302 ptr+=6;
303 #endif
304 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
305 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 306 *start=source;
307 *end=source+len;
308}
309
310/* Register allocation */
311
312// Note: registers are allocated clean (unmodified state)
313// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 314static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 315{
316 int r,hr;
317 int preferred_reg = (reg&7);
318 if(reg==CCREG) preferred_reg=HOST_CCREG;
319 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 320
57871462 321 // Don't allocate unused registers
322 if((cur->u>>reg)&1) return;
9f51b4b9 323
57871462 324 // see if it's already allocated
325 for(hr=0;hr<HOST_REGS;hr++)
326 {
327 if(cur->regmap[hr]==reg) return;
328 }
9f51b4b9 329
57871462 330 // Keep the same mapping if the register was already allocated in a loop
331 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 332
57871462 333 // Try to allocate the preferred register
334 if(cur->regmap[preferred_reg]==-1) {
335 cur->regmap[preferred_reg]=reg;
336 cur->dirty&=~(1<<preferred_reg);
337 cur->isconst&=~(1<<preferred_reg);
338 return;
339 }
340 r=cur->regmap[preferred_reg];
341 if(r<64&&((cur->u>>r)&1)) {
342 cur->regmap[preferred_reg]=reg;
343 cur->dirty&=~(1<<preferred_reg);
344 cur->isconst&=~(1<<preferred_reg);
345 return;
346 }
347 if(r>=64&&((cur->uu>>(r&63))&1)) {
348 cur->regmap[preferred_reg]=reg;
349 cur->dirty&=~(1<<preferred_reg);
350 cur->isconst&=~(1<<preferred_reg);
351 return;
352 }
9f51b4b9 353
57871462 354 // Clear any unneeded registers
355 // We try to keep the mapping consistent, if possible, because it
356 // makes branches easier (especially loops). So we try to allocate
357 // first (see above) before removing old mappings. If this is not
358 // possible then go ahead and clear out the registers that are no
359 // longer needed.
360 for(hr=0;hr<HOST_REGS;hr++)
361 {
362 r=cur->regmap[hr];
363 if(r>=0) {
364 if(r<64) {
365 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
366 }
367 else
368 {
369 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
370 }
371 }
372 }
373 // Try to allocate any available register, but prefer
374 // registers that have not been used recently.
375 if(i>0) {
376 for(hr=0;hr<HOST_REGS;hr++) {
377 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
378 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
379 cur->regmap[hr]=reg;
380 cur->dirty&=~(1<<hr);
381 cur->isconst&=~(1<<hr);
382 return;
383 }
384 }
385 }
386 }
387 // Try to allocate any available register
388 for(hr=0;hr<HOST_REGS;hr++) {
389 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
390 cur->regmap[hr]=reg;
391 cur->dirty&=~(1<<hr);
392 cur->isconst&=~(1<<hr);
393 return;
394 }
395 }
9f51b4b9 396
57871462 397 // Ok, now we have to evict someone
398 // Pick a register we hopefully won't need soon
399 u_char hsn[MAXREG+1];
400 memset(hsn,10,sizeof(hsn));
401 int j;
402 lsn(hsn,i,&preferred_reg);
403 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
404 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
405 if(i>0) {
406 // Don't evict the cycle count at entry points, otherwise the entry
407 // stub will have to write it.
408 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
409 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
410 for(j=10;j>=3;j--)
411 {
412 // Alloc preferred register if available
413 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
414 for(hr=0;hr<HOST_REGS;hr++) {
415 // Evict both parts of a 64-bit register
416 if((cur->regmap[hr]&63)==r) {
417 cur->regmap[hr]=-1;
418 cur->dirty&=~(1<<hr);
419 cur->isconst&=~(1<<hr);
420 }
421 }
422 cur->regmap[preferred_reg]=reg;
423 return;
424 }
425 for(r=1;r<=MAXREG;r++)
426 {
427 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
430 if(cur->regmap[hr]==r+64) {
431 cur->regmap[hr]=reg;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 return;
435 }
436 }
437 }
438 for(hr=0;hr<HOST_REGS;hr++) {
439 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
440 if(cur->regmap[hr]==r) {
441 cur->regmap[hr]=reg;
442 cur->dirty&=~(1<<hr);
443 cur->isconst&=~(1<<hr);
444 return;
445 }
446 }
447 }
448 }
449 }
450 }
451 }
452 for(j=10;j>=0;j--)
453 {
454 for(r=1;r<=MAXREG;r++)
455 {
456 if(hsn[r]==j) {
457 for(hr=0;hr<HOST_REGS;hr++) {
458 if(cur->regmap[hr]==r+64) {
459 cur->regmap[hr]=reg;
460 cur->dirty&=~(1<<hr);
461 cur->isconst&=~(1<<hr);
462 return;
463 }
464 }
465 for(hr=0;hr<HOST_REGS;hr++) {
466 if(cur->regmap[hr]==r) {
467 cur->regmap[hr]=reg;
468 cur->dirty&=~(1<<hr);
469 cur->isconst&=~(1<<hr);
470 return;
471 }
472 }
473 }
474 }
475 }
c43b5311 476 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 477}
478
e2b5e7aa 479static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 480{
481 int preferred_reg = 8+(reg&1);
482 int r,hr;
9f51b4b9 483
57871462 484 // allocate the lower 32 bits
485 alloc_reg(cur,i,reg);
9f51b4b9 486
57871462 487 // Don't allocate unused registers
488 if((cur->uu>>reg)&1) return;
9f51b4b9 489
57871462 490 // see if the upper half is already allocated
491 for(hr=0;hr<HOST_REGS;hr++)
492 {
493 if(cur->regmap[hr]==reg+64) return;
494 }
9f51b4b9 495
57871462 496 // Keep the same mapping if the register was already allocated in a loop
497 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 498
57871462 499 // Try to allocate the preferred register
500 if(cur->regmap[preferred_reg]==-1) {
501 cur->regmap[preferred_reg]=reg|64;
502 cur->dirty&=~(1<<preferred_reg);
503 cur->isconst&=~(1<<preferred_reg);
504 return;
505 }
506 r=cur->regmap[preferred_reg];
507 if(r<64&&((cur->u>>r)&1)) {
508 cur->regmap[preferred_reg]=reg|64;
509 cur->dirty&=~(1<<preferred_reg);
510 cur->isconst&=~(1<<preferred_reg);
511 return;
512 }
513 if(r>=64&&((cur->uu>>(r&63))&1)) {
514 cur->regmap[preferred_reg]=reg|64;
515 cur->dirty&=~(1<<preferred_reg);
516 cur->isconst&=~(1<<preferred_reg);
517 return;
518 }
9f51b4b9 519
57871462 520 // Clear any unneeded registers
521 // We try to keep the mapping consistent, if possible, because it
522 // makes branches easier (especially loops). So we try to allocate
523 // first (see above) before removing old mappings. If this is not
524 // possible then go ahead and clear out the registers that are no
525 // longer needed.
526 for(hr=HOST_REGS-1;hr>=0;hr--)
527 {
528 r=cur->regmap[hr];
529 if(r>=0) {
530 if(r<64) {
531 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
532 }
533 else
534 {
535 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
536 }
537 }
538 }
539 // Try to allocate any available register, but prefer
540 // registers that have not been used recently.
541 if(i>0) {
542 for(hr=0;hr<HOST_REGS;hr++) {
543 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
544 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
545 cur->regmap[hr]=reg|64;
546 cur->dirty&=~(1<<hr);
547 cur->isconst&=~(1<<hr);
548 return;
549 }
550 }
551 }
552 }
553 // Try to allocate any available register
554 for(hr=0;hr<HOST_REGS;hr++) {
555 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
556 cur->regmap[hr]=reg|64;
557 cur->dirty&=~(1<<hr);
558 cur->isconst&=~(1<<hr);
559 return;
560 }
561 }
9f51b4b9 562
57871462 563 // Ok, now we have to evict someone
564 // Pick a register we hopefully won't need soon
565 u_char hsn[MAXREG+1];
566 memset(hsn,10,sizeof(hsn));
567 int j;
568 lsn(hsn,i,&preferred_reg);
569 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
570 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
571 if(i>0) {
572 // Don't evict the cycle count at entry points, otherwise the entry
573 // stub will have to write it.
574 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
575 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
576 for(j=10;j>=3;j--)
577 {
578 // Alloc preferred register if available
579 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
580 for(hr=0;hr<HOST_REGS;hr++) {
581 // Evict both parts of a 64-bit register
582 if((cur->regmap[hr]&63)==r) {
583 cur->regmap[hr]=-1;
584 cur->dirty&=~(1<<hr);
585 cur->isconst&=~(1<<hr);
586 }
587 }
588 cur->regmap[preferred_reg]=reg|64;
589 return;
590 }
591 for(r=1;r<=MAXREG;r++)
592 {
593 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
596 if(cur->regmap[hr]==r+64) {
597 cur->regmap[hr]=reg|64;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 return;
601 }
602 }
603 }
604 for(hr=0;hr<HOST_REGS;hr++) {
605 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
606 if(cur->regmap[hr]==r) {
607 cur->regmap[hr]=reg|64;
608 cur->dirty&=~(1<<hr);
609 cur->isconst&=~(1<<hr);
610 return;
611 }
612 }
613 }
614 }
615 }
616 }
617 }
618 for(j=10;j>=0;j--)
619 {
620 for(r=1;r<=MAXREG;r++)
621 {
622 if(hsn[r]==j) {
623 for(hr=0;hr<HOST_REGS;hr++) {
624 if(cur->regmap[hr]==r+64) {
625 cur->regmap[hr]=reg|64;
626 cur->dirty&=~(1<<hr);
627 cur->isconst&=~(1<<hr);
628 return;
629 }
630 }
631 for(hr=0;hr<HOST_REGS;hr++) {
632 if(cur->regmap[hr]==r) {
633 cur->regmap[hr]=reg|64;
634 cur->dirty&=~(1<<hr);
635 cur->isconst&=~(1<<hr);
636 return;
637 }
638 }
639 }
640 }
641 }
c43b5311 642 SysPrintf("This shouldn't happen");exit(1);
57871462 643}
644
645// Allocate a temporary register. This is done without regard to
646// dirty status or whether the register we request is on the unneeded list
647// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 648static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 649{
650 int r,hr;
651 int preferred_reg = -1;
9f51b4b9 652
57871462 653 // see if it's already allocated
654 for(hr=0;hr<HOST_REGS;hr++)
655 {
656 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
657 }
9f51b4b9 658
57871462 659 // Try to allocate any available register
660 for(hr=HOST_REGS-1;hr>=0;hr--) {
661 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
662 cur->regmap[hr]=reg;
663 cur->dirty&=~(1<<hr);
664 cur->isconst&=~(1<<hr);
665 return;
666 }
667 }
9f51b4b9 668
57871462 669 // Find an unneeded register
670 for(hr=HOST_REGS-1;hr>=0;hr--)
671 {
672 r=cur->regmap[hr];
673 if(r>=0) {
674 if(r<64) {
675 if((cur->u>>r)&1) {
676 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
677 cur->regmap[hr]=reg;
678 cur->dirty&=~(1<<hr);
679 cur->isconst&=~(1<<hr);
680 return;
681 }
682 }
683 }
684 else
685 {
686 if((cur->uu>>(r&63))&1) {
687 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
688 cur->regmap[hr]=reg;
689 cur->dirty&=~(1<<hr);
690 cur->isconst&=~(1<<hr);
691 return;
692 }
693 }
694 }
695 }
696 }
9f51b4b9 697
57871462 698 // Ok, now we have to evict someone
699 // Pick a register we hopefully won't need soon
700 // TODO: we might want to follow unconditional jumps here
701 // TODO: get rid of dupe code and make this into a function
702 u_char hsn[MAXREG+1];
703 memset(hsn,10,sizeof(hsn));
704 int j;
705 lsn(hsn,i,&preferred_reg);
706 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
707 if(i>0) {
708 // Don't evict the cycle count at entry points, otherwise the entry
709 // stub will have to write it.
710 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
711 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
712 for(j=10;j>=3;j--)
713 {
714 for(r=1;r<=MAXREG;r++)
715 {
716 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
717 for(hr=0;hr<HOST_REGS;hr++) {
718 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
719 if(cur->regmap[hr]==r+64) {
720 cur->regmap[hr]=reg;
721 cur->dirty&=~(1<<hr);
722 cur->isconst&=~(1<<hr);
723 return;
724 }
725 }
726 }
727 for(hr=0;hr<HOST_REGS;hr++) {
728 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
729 if(cur->regmap[hr]==r) {
730 cur->regmap[hr]=reg;
731 cur->dirty&=~(1<<hr);
732 cur->isconst&=~(1<<hr);
733 return;
734 }
735 }
736 }
737 }
738 }
739 }
740 }
741 for(j=10;j>=0;j--)
742 {
743 for(r=1;r<=MAXREG;r++)
744 {
745 if(hsn[r]==j) {
746 for(hr=0;hr<HOST_REGS;hr++) {
747 if(cur->regmap[hr]==r+64) {
748 cur->regmap[hr]=reg;
749 cur->dirty&=~(1<<hr);
750 cur->isconst&=~(1<<hr);
751 return;
752 }
753 }
754 for(hr=0;hr<HOST_REGS;hr++) {
755 if(cur->regmap[hr]==r) {
756 cur->regmap[hr]=reg;
757 cur->dirty&=~(1<<hr);
758 cur->isconst&=~(1<<hr);
759 return;
760 }
761 }
762 }
763 }
764 }
c43b5311 765 SysPrintf("This shouldn't happen");exit(1);
57871462 766}
e2b5e7aa 767
57871462 768// Allocate a specific ARM register.
e2b5e7aa 769static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 770{
771 int n;
f776eb14 772 int dirty=0;
9f51b4b9 773
57871462 774 // see if it's already allocated (and dealloc it)
775 for(n=0;n<HOST_REGS;n++)
776 {
f776eb14 777 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
778 dirty=(cur->dirty>>n)&1;
779 cur->regmap[n]=-1;
780 }
57871462 781 }
9f51b4b9 782
57871462 783 cur->regmap[hr]=reg;
784 cur->dirty&=~(1<<hr);
f776eb14 785 cur->dirty|=dirty<<hr;
57871462 786 cur->isconst&=~(1<<hr);
787}
788
789// Alloc cycle count into dedicated register
e2b5e7aa 790static void alloc_cc(struct regstat *cur,int i)
57871462 791{
792 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
793}
794
795/* Special alloc */
796
797
798/* Assembler */
799
e2b5e7aa 800static unused char regname[16][4] = {
57871462 801 "r0",
802 "r1",
803 "r2",
804 "r3",
805 "r4",
806 "r5",
807 "r6",
808 "r7",
809 "r8",
810 "r9",
811 "r10",
812 "fp",
813 "r12",
814 "sp",
815 "lr",
816 "pc"};
817
e2b5e7aa 818static void output_w32(u_int word)
57871462 819{
820 *((u_int *)out)=word;
821 out+=4;
822}
e2b5e7aa 823
824static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 825{
826 assert(rd<16);
827 assert(rn<16);
828 assert(rm<16);
829 return((rn<<16)|(rd<<12)|rm);
830}
e2b5e7aa 831
832static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 833{
834 assert(rd<16);
835 assert(rn<16);
836 assert(imm<256);
837 assert((shift&1)==0);
838 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
839}
e2b5e7aa 840
841static u_int genimm(u_int imm,u_int *encoded)
57871462 842{
c2e3bd42 843 *encoded=0;
844 if(imm==0) return 1;
57871462 845 int i=32;
846 while(i>0)
847 {
848 if(imm<256) {
849 *encoded=((i&30)<<7)|imm;
850 return 1;
851 }
852 imm=(imm>>2)|(imm<<30);i-=2;
853 }
854 return 0;
855}
e2b5e7aa 856
857static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 858{
859 u_int ret=genimm(imm,encoded);
860 assert(ret);
581335b0 861 (void)ret;
cfbd3c6e 862}
e2b5e7aa 863
864static u_int genjmp(u_int addr)
57871462 865{
866 int offset=addr-(int)out-8;
e80343e2 867 if(offset<-33554432||offset>=33554432) {
868 if (addr>2) {
c43b5311 869 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 870 exit(1);
871 }
872 return 0;
873 }
57871462 874 return ((u_int)offset>>2)&0xffffff;
875}
876
e2b5e7aa 877static void emit_mov(int rs,int rt)
57871462 878{
879 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
880 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
881}
882
e2b5e7aa 883static void emit_movs(int rs,int rt)
57871462 884{
885 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
886 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
887}
888
e2b5e7aa 889static void emit_add(int rs1,int rs2,int rt)
57871462 890{
891 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
892 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
893}
894
e2b5e7aa 895static void emit_adds(int rs1,int rs2,int rt)
57871462 896{
897 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
898 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
899}
900
e2b5e7aa 901static void emit_adcs(int rs1,int rs2,int rt)
57871462 902{
903 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
904 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
905}
906
e2b5e7aa 907static void emit_sbc(int rs1,int rs2,int rt)
57871462 908{
909 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
910 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
911}
912
e2b5e7aa 913static void emit_sbcs(int rs1,int rs2,int rt)
57871462 914{
915 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
916 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
917}
918
e2b5e7aa 919static void emit_neg(int rs, int rt)
57871462 920{
921 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
922 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
923}
924
e2b5e7aa 925static void emit_negs(int rs, int rt)
57871462 926{
927 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
928 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
929}
930
e2b5e7aa 931static void emit_sub(int rs1,int rs2,int rt)
57871462 932{
933 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
934 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
935}
936
e2b5e7aa 937static void emit_subs(int rs1,int rs2,int rt)
57871462 938{
939 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
940 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
941}
942
e2b5e7aa 943static void emit_zeroreg(int rt)
57871462 944{
945 assem_debug("mov %s,#0\n",regname[rt]);
946 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
947}
948
e2b5e7aa 949static void emit_loadlp(u_int imm,u_int rt)
790ee18e 950{
951 add_literal((int)out,imm);
952 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
953 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
954}
e2b5e7aa 955
956static void emit_movw(u_int imm,u_int rt)
790ee18e 957{
958 assert(imm<65536);
959 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
960 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
961}
e2b5e7aa 962
963static void emit_movt(u_int imm,u_int rt)
790ee18e 964{
965 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
966 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
967}
e2b5e7aa 968
969static void emit_movimm(u_int imm,u_int rt)
790ee18e 970{
971 u_int armval;
972 if(genimm(imm,&armval)) {
973 assem_debug("mov %s,#%d\n",regname[rt],imm);
974 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
975 }else if(genimm(~imm,&armval)) {
976 assem_debug("mvn %s,#%d\n",regname[rt],imm);
977 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
978 }else if(imm<65536) {
665f33e1 979 #ifndef HAVE_ARMV7
790ee18e 980 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
981 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
982 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
983 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
984 #else
985 emit_movw(imm,rt);
986 #endif
987 }else{
665f33e1 988 #ifndef HAVE_ARMV7
790ee18e 989 emit_loadlp(imm,rt);
990 #else
991 emit_movw(imm&0x0000FFFF,rt);
992 emit_movt(imm&0xFFFF0000,rt);
993 #endif
994 }
995}
e2b5e7aa 996
997static void emit_pcreladdr(u_int rt)
790ee18e 998{
999 assem_debug("add %s,pc,#?\n",regname[rt]);
1000 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1001}
1002
e2b5e7aa 1003static void emit_loadreg(int r, int hr)
57871462 1004{
3d624f89 1005 if(r&64) {
c43b5311 1006 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1007 assert(0);
1008 return;
3d624f89 1009 }
57871462 1010 if((r&63)==0)
1011 emit_zeroreg(hr);
1012 else {
3d624f89 1013 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1014 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1015 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1016 if(r==CCREG) addr=(int)&cycle_count;
1017 if(r==CSREG) addr=(int)&Status;
1018 if(r==FSREG) addr=(int)&FCR31;
1019 if(r==INVCP) addr=(int)&invc_ptr;
1020 u_int offset = addr-(u_int)&dynarec_local;
1021 assert(offset<4096);
1022 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1023 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1024 }
1025}
e2b5e7aa 1026
1027static void emit_storereg(int r, int hr)
57871462 1028{
3d624f89 1029 if(r&64) {
c43b5311 1030 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
3d624f89 1034 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1035 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1036 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1037 if(r==CCREG) addr=(int)&cycle_count;
1038 if(r==FSREG) addr=(int)&FCR31;
1039 u_int offset = addr-(u_int)&dynarec_local;
1040 assert(offset<4096);
1041 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1042 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1043}
1044
e2b5e7aa 1045static void emit_test(int rs, int rt)
57871462 1046{
1047 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1048 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1049}
1050
e2b5e7aa 1051static void emit_testimm(int rs,int imm)
57871462 1052{
1053 u_int armval;
5a05d80c 1054 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1055 genimm_checked(imm,&armval);
57871462 1056 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1057}
1058
e2b5e7aa 1059static void emit_testeqimm(int rs,int imm)
b9b61529 1060{
1061 u_int armval;
1062 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1063 genimm_checked(imm,&armval);
b9b61529 1064 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1065}
1066
e2b5e7aa 1067static void emit_not(int rs,int rt)
57871462 1068{
1069 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1070 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1071}
1072
e2b5e7aa 1073static void emit_mvnmi(int rs,int rt)
b9b61529 1074{
1075 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1076 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1077}
1078
e2b5e7aa 1079static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1080{
1081 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1082 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1083}
1084
e2b5e7aa 1085static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1086{
1087 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1088 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1089}
e2b5e7aa 1090
1091static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1092{
1093 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1094 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1095}
1096
e2b5e7aa 1097static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1098{
1099 assert(rs<16);
1100 assert(rt<16);
1101 assert(imm<32);
1102 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1103 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1104}
1105
e2b5e7aa 1106static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1107{
1108 assert(rs<16);
1109 assert(rt<16);
1110 assert(imm<32);
1111 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1112 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1113}
1114
e2b5e7aa 1115static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1116{
1117 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1118 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1119}
1120
e2b5e7aa 1121static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1122{
1123 assert(rs<16);
1124 assert(rt<16);
1125 if(imm!=0) {
57871462 1126 u_int armval;
1127 if(genimm(imm,&armval)) {
1128 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1129 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1130 }else if(genimm(-imm,&armval)) {
8a0a8423 1131 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1132 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1133 #ifdef HAVE_ARMV7
1134 }else if(rt!=rs&&(u_int)imm<65536) {
1135 emit_movw(imm&0x0000ffff,rt);
1136 emit_add(rs,rt,rt);
1137 }else if(rt!=rs&&(u_int)-imm<65536) {
1138 emit_movw(-imm&0x0000ffff,rt);
1139 emit_sub(rs,rt,rt);
1140 #endif
1141 }else if((u_int)-imm<65536) {
57871462 1142 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1143 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1144 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1145 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1146 }else {
1147 do {
1148 int shift = (ffs(imm) - 1) & ~1;
1149 int imm8 = imm & (0xff << shift);
1150 genimm_checked(imm8,&armval);
1151 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1152 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1153 rs = rt;
1154 imm &= ~imm8;
1155 }
1156 while (imm != 0);
57871462 1157 }
1158 }
1159 else if(rs!=rt) emit_mov(rs,rt);
1160}
1161
e2b5e7aa 1162static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1163{
1164 assert(imm>-65536&&imm<65536);
1165 u_int armval;
1166 if(genimm(imm,&armval)) {
1167 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1168 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1169 }else if(genimm(-imm,&armval)) {
1170 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1171 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1172 }else if(imm<0) {
1173 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1174 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1175 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1176 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1177 }else{
1178 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1179 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1180 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1181 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1182 }
1183}
e2b5e7aa 1184
1185static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1186{
1187 emit_addimm(rt,imm,rt);
1188}
1189
e2b5e7aa 1190static void emit_addnop(u_int r)
57871462 1191{
1192 assert(r<16);
1193 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1194 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1195}
1196
e2b5e7aa 1197static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1198{
1199 u_int armval;
cfbd3c6e 1200 genimm_checked(imm,&armval);
57871462 1201 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1202 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1203}
1edfcc68 1204
e2b5e7aa 1205static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1206{
1207 assert(0);
1208 u_int armval;
cfbd3c6e 1209 genimm_checked(imm,&armval);
57871462 1210 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1211 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1212}
1213
e2b5e7aa 1214static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1215{
1216 // TODO: if(genimm(imm,&armval)) ...
1217 // else
1218 emit_movimm(imm,HOST_TEMPREG);
1219 emit_adds(HOST_TEMPREG,rsl,rtl);
1220 emit_adcimm(rsh,0,rth);
1221}
1222
e2b5e7aa 1223static void emit_andimm(int rs,int imm,int rt)
57871462 1224{
1225 u_int armval;
790ee18e 1226 if(imm==0) {
1227 emit_zeroreg(rt);
1228 }else if(genimm(imm,&armval)) {
57871462 1229 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1230 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1231 }else if(genimm(~imm,&armval)) {
1232 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1234 }else if(imm==65535) {
332a4533 1235 #ifndef HAVE_ARMV6
57871462 1236 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1237 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1238 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1239 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1240 #else
1241 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1242 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1243 #endif
1244 }else{
1245 assert(imm>0&&imm<65535);
665f33e1 1246 #ifndef HAVE_ARMV7
57871462 1247 assem_debug("mov r14,#%d\n",imm&0xFF00);
1248 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1249 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1250 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1251 #else
1252 emit_movw(imm,HOST_TEMPREG);
1253 #endif
1254 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1255 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1256 }
1257}
1258
e2b5e7aa 1259static void emit_orimm(int rs,int imm,int rt)
57871462 1260{
1261 u_int armval;
790ee18e 1262 if(imm==0) {
1263 if(rs!=rt) emit_mov(rs,rt);
1264 }else if(genimm(imm,&armval)) {
57871462 1265 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1266 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1267 }else{
1268 assert(imm>0&&imm<65536);
1269 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1270 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1271 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1272 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1273 }
1274}
1275
e2b5e7aa 1276static void emit_xorimm(int rs,int imm,int rt)
57871462 1277{
57871462 1278 u_int armval;
790ee18e 1279 if(imm==0) {
1280 if(rs!=rt) emit_mov(rs,rt);
1281 }else if(genimm(imm,&armval)) {
57871462 1282 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1283 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1284 }else{
514ed0d9 1285 assert(imm>0&&imm<65536);
57871462 1286 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1287 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1288 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1289 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1290 }
1291}
1292
e2b5e7aa 1293static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1294{
1295 assert(imm>0);
1296 assert(imm<32);
1297 //if(imm==1) ...
1298 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1299 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1300}
1301
e2b5e7aa 1302static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1303{
1304 assert(imm>0);
1305 assert(imm<32);
1306 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1308}
1309
e2b5e7aa 1310static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1311{
1312 assert(imm>0);
1313 assert(imm<32);
1314 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1315 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1316}
1317
e2b5e7aa 1318static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1319{
1320 assert(imm>0);
1321 assert(imm<32);
1322 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1323 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1324}
1325
e2b5e7aa 1326static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1327{
1328 assert(imm>0);
1329 assert(imm<32);
1330 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1331 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1332}
1333
e2b5e7aa 1334static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1335{
1336 assert(imm>0);
1337 assert(imm<32);
1338 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1339 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1340}
1341
e2b5e7aa 1342static void emit_shldimm(int rs,int rs2,u_int imm,int rt)
57871462 1343{
1344 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1345 assert(imm>0);
1346 assert(imm<32);
1347 //if(imm==1) ...
1348 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1349 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1350 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1351 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1352}
1353
e2b5e7aa 1354static void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
57871462 1355{
1356 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1357 assert(imm>0);
1358 assert(imm<32);
1359 //if(imm==1) ...
1360 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1362 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1363 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1364}
1365
e2b5e7aa 1366static void emit_signextend16(int rs,int rt)
b9b61529 1367{
332a4533 1368 #ifndef HAVE_ARMV6
b9b61529 1369 emit_shlimm(rs,16,rt);
1370 emit_sarimm(rt,16,rt);
1371 #else
1372 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1373 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1374 #endif
1375}
1376
e2b5e7aa 1377static void emit_signextend8(int rs,int rt)
c6c3b1b3 1378{
332a4533 1379 #ifndef HAVE_ARMV6
c6c3b1b3 1380 emit_shlimm(rs,24,rt);
1381 emit_sarimm(rt,24,rt);
1382 #else
1383 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1384 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1385 #endif
1386}
1387
e2b5e7aa 1388static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1389{
1390 assert(rs<16);
1391 assert(rt<16);
1392 assert(shift<16);
1393 //if(imm==1) ...
1394 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1395 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1396}
e2b5e7aa 1397
1398static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1399{
1400 assert(rs<16);
1401 assert(rt<16);
1402 assert(shift<16);
1403 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1404 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1405}
e2b5e7aa 1406
1407static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1408{
1409 assert(rs<16);
1410 assert(rt<16);
1411 assert(shift<16);
1412 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1413 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1414}
57871462 1415
e2b5e7aa 1416static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1417{
1418 assert(rs<16);
1419 assert(rt<16);
1420 assert(shift<16);
1421 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1422 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1423}
e2b5e7aa 1424
1425static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1426{
1427 assert(rs<16);
1428 assert(rt<16);
1429 assert(shift<16);
1430 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1431 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1432}
1433
e2b5e7aa 1434static void emit_cmpimm(int rs,int imm)
57871462 1435{
1436 u_int armval;
1437 if(genimm(imm,&armval)) {
5a05d80c 1438 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1439 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1440 }else if(genimm(-imm,&armval)) {
5a05d80c 1441 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1442 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1443 }else if(imm>0) {
1444 assert(imm<65536);
57871462 1445 emit_movimm(imm,HOST_TEMPREG);
57871462 1446 assem_debug("cmp %s,r14\n",regname[rs]);
1447 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1448 }else{
1449 assert(imm>-65536);
57871462 1450 emit_movimm(-imm,HOST_TEMPREG);
57871462 1451 assem_debug("cmn %s,r14\n",regname[rs]);
1452 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1453 }
1454}
1455
e2b5e7aa 1456static void emit_cmovne_imm(int imm,int rt)
57871462 1457{
1458 assem_debug("movne %s,#%d\n",regname[rt],imm);
1459 u_int armval;
cfbd3c6e 1460 genimm_checked(imm,&armval);
57871462 1461 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1462}
e2b5e7aa 1463
1464static void emit_cmovl_imm(int imm,int rt)
57871462 1465{
1466 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1467 u_int armval;
cfbd3c6e 1468 genimm_checked(imm,&armval);
57871462 1469 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1470}
e2b5e7aa 1471
1472static void emit_cmovb_imm(int imm,int rt)
57871462 1473{
1474 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1475 u_int armval;
cfbd3c6e 1476 genimm_checked(imm,&armval);
57871462 1477 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1478}
e2b5e7aa 1479
1480static void emit_cmovs_imm(int imm,int rt)
57871462 1481{
1482 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1483 u_int armval;
cfbd3c6e 1484 genimm_checked(imm,&armval);
57871462 1485 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1486}
e2b5e7aa 1487
1488static void emit_cmove_reg(int rs,int rt)
57871462 1489{
1490 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1491 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1492}
e2b5e7aa 1493
1494static void emit_cmovne_reg(int rs,int rt)
57871462 1495{
1496 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1497 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1498}
e2b5e7aa 1499
1500static void emit_cmovl_reg(int rs,int rt)
57871462 1501{
1502 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1503 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1504}
e2b5e7aa 1505
1506static void emit_cmovs_reg(int rs,int rt)
57871462 1507{
1508 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1509 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1510}
1511
e2b5e7aa 1512static void emit_slti32(int rs,int imm,int rt)
57871462 1513{
1514 if(rs!=rt) emit_zeroreg(rt);
1515 emit_cmpimm(rs,imm);
1516 if(rs==rt) emit_movimm(0,rt);
1517 emit_cmovl_imm(1,rt);
1518}
e2b5e7aa 1519
1520static void emit_sltiu32(int rs,int imm,int rt)
57871462 1521{
1522 if(rs!=rt) emit_zeroreg(rt);
1523 emit_cmpimm(rs,imm);
1524 if(rs==rt) emit_movimm(0,rt);
1525 emit_cmovb_imm(1,rt);
1526}
e2b5e7aa 1527
1528static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1529{
1530 assert(rsh!=rt);
1531 emit_slti32(rsl,imm,rt);
1532 if(imm>=0)
1533 {
1534 emit_test(rsh,rsh);
1535 emit_cmovne_imm(0,rt);
1536 emit_cmovs_imm(1,rt);
1537 }
1538 else
1539 {
1540 emit_cmpimm(rsh,-1);
1541 emit_cmovne_imm(0,rt);
1542 emit_cmovl_imm(1,rt);
1543 }
1544}
e2b5e7aa 1545
1546static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1547{
1548 assert(rsh!=rt);
1549 emit_sltiu32(rsl,imm,rt);
1550 if(imm>=0)
1551 {
1552 emit_test(rsh,rsh);
1553 emit_cmovne_imm(0,rt);
1554 }
1555 else
1556 {
1557 emit_cmpimm(rsh,-1);
1558 emit_cmovne_imm(1,rt);
1559 }
1560}
1561
e2b5e7aa 1562static void emit_cmp(int rs,int rt)
57871462 1563{
1564 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1565 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1566}
e2b5e7aa 1567
1568static void emit_set_gz32(int rs, int rt)
57871462 1569{
1570 //assem_debug("set_gz32\n");
1571 emit_cmpimm(rs,1);
1572 emit_movimm(1,rt);
1573 emit_cmovl_imm(0,rt);
1574}
e2b5e7aa 1575
1576static void emit_set_nz32(int rs, int rt)
57871462 1577{
1578 //assem_debug("set_nz32\n");
1579 if(rs!=rt) emit_movs(rs,rt);
1580 else emit_test(rs,rs);
1581 emit_cmovne_imm(1,rt);
1582}
e2b5e7aa 1583
1584static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1585{
1586 //assem_debug("set_gz64\n");
1587 emit_set_gz32(rsl,rt);
1588 emit_test(rsh,rsh);
1589 emit_cmovne_imm(1,rt);
1590 emit_cmovs_imm(0,rt);
1591}
e2b5e7aa 1592
1593static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1594{
1595 //assem_debug("set_nz64\n");
1596 emit_or_and_set_flags(rsh,rsl,rt);
1597 emit_cmovne_imm(1,rt);
1598}
e2b5e7aa 1599
1600static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1601{
1602 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1603 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1604 emit_cmp(rs1,rs2);
1605 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1606 emit_cmovl_imm(1,rt);
1607}
e2b5e7aa 1608
1609static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1610{
1611 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1612 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1613 emit_cmp(rs1,rs2);
1614 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1615 emit_cmovb_imm(1,rt);
1616}
e2b5e7aa 1617
1618static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1619{
1620 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1621 assert(u1!=rt);
1622 assert(u2!=rt);
1623 emit_cmp(l1,l2);
1624 emit_movimm(0,rt);
1625 emit_sbcs(u1,u2,HOST_TEMPREG);
1626 emit_cmovl_imm(1,rt);
1627}
e2b5e7aa 1628
1629static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1630{
1631 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1632 assert(u1!=rt);
1633 assert(u2!=rt);
1634 emit_cmp(l1,l2);
1635 emit_movimm(0,rt);
1636 emit_sbcs(u1,u2,HOST_TEMPREG);
1637 emit_cmovb_imm(1,rt);
1638}
1639
e2b5e7aa 1640static void emit_call(int a)
57871462 1641{
1642 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1643 u_int offset=genjmp(a);
1644 output_w32(0xeb000000|offset);
1645}
e2b5e7aa 1646
1647static void emit_jmp(int a)
57871462 1648{
1649 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1650 u_int offset=genjmp(a);
1651 output_w32(0xea000000|offset);
1652}
e2b5e7aa 1653
1654static void emit_jne(int a)
57871462 1655{
1656 assem_debug("bne %x\n",a);
1657 u_int offset=genjmp(a);
1658 output_w32(0x1a000000|offset);
1659}
e2b5e7aa 1660
1661static void emit_jeq(int a)
57871462 1662{
1663 assem_debug("beq %x\n",a);
1664 u_int offset=genjmp(a);
1665 output_w32(0x0a000000|offset);
1666}
e2b5e7aa 1667
1668static void emit_js(int a)
57871462 1669{
1670 assem_debug("bmi %x\n",a);
1671 u_int offset=genjmp(a);
1672 output_w32(0x4a000000|offset);
1673}
e2b5e7aa 1674
1675static void emit_jns(int a)
57871462 1676{
1677 assem_debug("bpl %x\n",a);
1678 u_int offset=genjmp(a);
1679 output_w32(0x5a000000|offset);
1680}
e2b5e7aa 1681
1682static void emit_jl(int a)
57871462 1683{
1684 assem_debug("blt %x\n",a);
1685 u_int offset=genjmp(a);
1686 output_w32(0xba000000|offset);
1687}
e2b5e7aa 1688
1689static void emit_jge(int a)
57871462 1690{
1691 assem_debug("bge %x\n",a);
1692 u_int offset=genjmp(a);
1693 output_w32(0xaa000000|offset);
1694}
e2b5e7aa 1695
1696static void emit_jno(int a)
57871462 1697{
1698 assem_debug("bvc %x\n",a);
1699 u_int offset=genjmp(a);
1700 output_w32(0x7a000000|offset);
1701}
e2b5e7aa 1702
1703static void emit_jc(int a)
57871462 1704{
1705 assem_debug("bcs %x\n",a);
1706 u_int offset=genjmp(a);
1707 output_w32(0x2a000000|offset);
1708}
e2b5e7aa 1709
1710static void emit_jcc(int a)
57871462 1711{
1712 assem_debug("bcc %x\n",a);
1713 u_int offset=genjmp(a);
1714 output_w32(0x3a000000|offset);
1715}
1716
e2b5e7aa 1717static void emit_callreg(u_int r)
57871462 1718{
c6c3b1b3 1719 assert(r<15);
1720 assem_debug("blx %s\n",regname[r]);
1721 output_w32(0xe12fff30|r);
57871462 1722}
e2b5e7aa 1723
1724static void emit_jmpreg(u_int r)
57871462 1725{
1726 assem_debug("mov pc,%s\n",regname[r]);
1727 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1728}
1729
e2b5e7aa 1730static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1731{
1732 assert(offset>-4096&&offset<4096);
1733 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1734 if(offset>=0) {
1735 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1736 }else{
1737 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1738 }
1739}
e2b5e7aa 1740
1741static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1742{
1743 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1744 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1745}
e2b5e7aa 1746
1747static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1748{
1749 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1750 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1751}
e2b5e7aa 1752
1753static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1754{
1755 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1756 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1757}
e2b5e7aa 1758
1759static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1760{
1761 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1762 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1763}
e2b5e7aa 1764
1765static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1766{
1767 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1768 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1769}
e2b5e7aa 1770
1771static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1772{
1773 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1774 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1775}
e2b5e7aa 1776
1777static void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1778{
1779 if(map<0) emit_readword_indexed(addr, rs, rt);
1780 else {
1781 assert(addr==0);
1782 emit_readword_dualindexedx4(rs, map, rt);
1783 }
1784}
e2b5e7aa 1785
1786static void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
57871462 1787{
1788 if(map<0) {
1789 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1790 emit_readword_indexed(addr+4, rs, rl);
1791 }else{
1792 assert(rh!=rs);
1793 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1794 emit_addimm(map,1,map);
1795 emit_readword_indexed_tlb(addr, rs, map, rl);
1796 }
1797}
e2b5e7aa 1798
1799static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1800{
1801 assert(offset>-256&&offset<256);
1802 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1803 if(offset>=0) {
1804 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1805 }else{
1806 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1807 }
1808}
e2b5e7aa 1809
1810static void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1811{
1812 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1813 else {
1814 if(addr==0) {
1815 emit_shlimm(map,2,map);
1816 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1817 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1818 }else{
1819 assert(addr>-256&&addr<256);
1820 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1821 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1822 emit_movsbl_indexed(addr, rt, rt);
1823 }
1824 }
1825}
e2b5e7aa 1826
1827static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1828{
1829 assert(offset>-256&&offset<256);
1830 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1831 if(offset>=0) {
1832 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1833 }else{
1834 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1835 }
1836}
e2b5e7aa 1837
1838static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1839{
1840 assert(offset>-4096&&offset<4096);
1841 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1842 if(offset>=0) {
1843 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1844 }else{
1845 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1846 }
1847}
e2b5e7aa 1848
1849static void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
57871462 1850{
1851 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1852 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1853}
e2b5e7aa 1854
1855static void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
57871462 1856{
1857 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1858 else {
1859 if(addr==0) {
1860 emit_movzbl_dualindexedx4(rs, map, rt);
1861 }else{
1862 emit_addimm(rs,addr,rt);
1863 emit_movzbl_dualindexedx4(rt, map, rt);
1864 }
1865 }
1866}
e2b5e7aa 1867
1868static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1869{
1870 assert(offset>-256&&offset<256);
1871 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1872 if(offset>=0) {
1873 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1874 }else{
1875 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1876 }
1877}
e2b5e7aa 1878
054175e9 1879static void emit_ldrd(int offset, int rs, int rt)
1880{
1881 assert(offset>-256&&offset<256);
1882 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1883 if(offset>=0) {
1884 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1885 }else{
1886 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1887 }
1888}
e2b5e7aa 1889
1890static void emit_readword(int addr, int rt)
57871462 1891{
1892 u_int offset = addr-(u_int)&dynarec_local;
1893 assert(offset<4096);
1894 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1895 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1896}
e2b5e7aa 1897
1898static unused void emit_movsbl(int addr, int rt)
57871462 1899{
1900 u_int offset = addr-(u_int)&dynarec_local;
1901 assert(offset<256);
1902 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1903 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1904}
e2b5e7aa 1905
1906static unused void emit_movswl(int addr, int rt)
57871462 1907{
1908 u_int offset = addr-(u_int)&dynarec_local;
1909 assert(offset<256);
1910 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1911 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1912}
e2b5e7aa 1913
1914static unused void emit_movzbl(int addr, int rt)
57871462 1915{
1916 u_int offset = addr-(u_int)&dynarec_local;
1917 assert(offset<4096);
1918 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1919 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1920}
e2b5e7aa 1921
1922static unused void emit_movzwl(int addr, int rt)
57871462 1923{
1924 u_int offset = addr-(u_int)&dynarec_local;
1925 assert(offset<256);
1926 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1927 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1928}
57871462 1929
e2b5e7aa 1930static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1931{
1932 assert(offset>-4096&&offset<4096);
1933 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1934 if(offset>=0) {
1935 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1936 }else{
1937 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1938 }
1939}
e2b5e7aa 1940
1941static void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
57871462 1942{
1943 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1944 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
1945}
e2b5e7aa 1946
1947static void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 1948{
1949 if(map<0) emit_writeword_indexed(rt, addr, rs);
1950 else {
1951 assert(addr==0);
1952 emit_writeword_dualindexedx4(rt, rs, map);
1953 }
1954}
e2b5e7aa 1955
1956static void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
57871462 1957{
1958 if(map<0) {
1959 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
1960 emit_writeword_indexed(rl, addr+4, rs);
1961 }else{
1962 assert(rh>=0);
1963 if(temp!=rs) emit_addimm(map,1,temp);
1964 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
1965 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
1966 else {
1967 emit_addimm(rs,4,rs);
1968 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
1969 }
1970 }
1971}
e2b5e7aa 1972
1973static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1974{
1975 assert(offset>-256&&offset<256);
1976 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1977 if(offset>=0) {
1978 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1979 }else{
1980 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1981 }
1982}
e2b5e7aa 1983
1984static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1985{
1986 assert(offset>-4096&&offset<4096);
1987 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1988 if(offset>=0) {
1989 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1990 }else{
1991 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1992 }
1993}
e2b5e7aa 1994
1995static void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
57871462 1996{
1997 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1998 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1999}
e2b5e7aa 2000
2001static void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
57871462 2002{
2003 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2004 else {
2005 if(addr==0) {
2006 emit_writebyte_dualindexedx4(rt, rs, map);
2007 }else{
2008 emit_addimm(rs,addr,temp);
2009 emit_writebyte_dualindexedx4(rt, temp, map);
2010 }
2011 }
2012}
e2b5e7aa 2013
2014static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2015{
2016 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2017 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2018}
e2b5e7aa 2019
2020static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2021{
2022 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2023 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2024}
e2b5e7aa 2025
2026static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 2027{
2028 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2029 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2030}
e2b5e7aa 2031
2032static void emit_writeword(int rt, int addr)
57871462 2033{
2034 u_int offset = addr-(u_int)&dynarec_local;
2035 assert(offset<4096);
2036 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2037 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2038}
e2b5e7aa 2039
2040static unused void emit_writehword(int rt, int addr)
57871462 2041{
2042 u_int offset = addr-(u_int)&dynarec_local;
2043 assert(offset<256);
2044 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2045 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2046}
e2b5e7aa 2047
2048static unused void emit_writebyte(int rt, int addr)
57871462 2049{
2050 u_int offset = addr-(u_int)&dynarec_local;
2051 assert(offset<4096);
74426039 2052 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2053 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2054}
57871462 2055
e2b5e7aa 2056static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2057{
2058 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2059 assert(rs1<16);
2060 assert(rs2<16);
2061 assert(hi<16);
2062 assert(lo<16);
2063 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2064}
e2b5e7aa 2065
2066static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 2067{
2068 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2069 assert(rs1<16);
2070 assert(rs2<16);
2071 assert(hi<16);
2072 assert(lo<16);
2073 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2074}
2075
e2b5e7aa 2076static void emit_clz(int rs,int rt)
57871462 2077{
2078 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2079 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2080}
2081
e2b5e7aa 2082static void emit_subcs(int rs1,int rs2,int rt)
57871462 2083{
2084 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2085 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2086}
2087
e2b5e7aa 2088static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 2089{
2090 assert(imm>0);
2091 assert(imm<32);
2092 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2093 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2094}
2095
e2b5e7aa 2096static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 2097{
2098 assert(imm>0);
2099 assert(imm<32);
2100 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2101 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2102}
2103
e2b5e7aa 2104static void emit_negmi(int rs, int rt)
57871462 2105{
2106 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2107 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2108}
2109
e2b5e7aa 2110static void emit_negsmi(int rs, int rt)
57871462 2111{
2112 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2113 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2114}
2115
e2b5e7aa 2116static void emit_orreq(u_int rs1,u_int rs2,u_int rt)
57871462 2117{
2118 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2119 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2120}
2121
e2b5e7aa 2122static void emit_orrne(u_int rs1,u_int rs2,u_int rt)
57871462 2123{
2124 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2125 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2126}
2127
e2b5e7aa 2128static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2129{
2130 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2131 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2132}
2133
e2b5e7aa 2134static void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2135{
2136 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2137 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2138}
2139
e2b5e7aa 2140static void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2141{
2142 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2143 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2144}
2145
e2b5e7aa 2146static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2147{
2148 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2149 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2150}
2151
e2b5e7aa 2152static void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2153{
2154 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2155 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2156}
2157
e2b5e7aa 2158static void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2159{
2160 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2161 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2162}
2163
e2b5e7aa 2164static void emit_teq(int rs, int rt)
57871462 2165{
2166 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2167 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2168}
2169
e2b5e7aa 2170static void emit_rsbimm(int rs, int imm, int rt)
57871462 2171{
2172 u_int armval;
cfbd3c6e 2173 genimm_checked(imm,&armval);
57871462 2174 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2175 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2176}
2177
2178// Load 2 immediates optimizing for small code size
e2b5e7aa 2179static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2180{
2181 emit_movimm(imm1,rt1);
2182 u_int armval;
2183 if(genimm(imm2-imm1,&armval)) {
2184 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2185 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2186 }else if(genimm(imm1-imm2,&armval)) {
2187 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2188 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2189 }
2190 else emit_movimm(imm2,rt2);
2191}
2192
2193// Conditionally select one of two immediates, optimizing for small code size
2194// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2195static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2196{
2197 u_int armval;
2198 if(genimm(imm2-imm1,&armval)) {
2199 emit_movimm(imm1,rt);
2200 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2201 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2202 }else if(genimm(imm1-imm2,&armval)) {
2203 emit_movimm(imm1,rt);
2204 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2205 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2206 }
2207 else {
665f33e1 2208 #ifndef HAVE_ARMV7
57871462 2209 emit_movimm(imm1,rt);
2210 add_literal((int)out,imm2);
2211 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2212 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2213 #else
2214 emit_movw(imm1&0x0000FFFF,rt);
2215 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2216 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2217 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2218 }
2219 emit_movt(imm1&0xFFFF0000,rt);
2220 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2221 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2222 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2223 }
2224 #endif
2225 }
2226}
2227
57871462 2228// special case for checking invalid_code
e2b5e7aa 2229static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2230{
2231 assert(imm<128&&imm>=0);
2232 assert(r>=0&&r<16);
2233 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2234 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2235 emit_cmpimm(HOST_TEMPREG,imm);
2236}
2237
e2b5e7aa 2238static void emit_callne(int a)
0bbd1454 2239{
2240 assem_debug("blne %x\n",a);
2241 u_int offset=genjmp(a);
2242 output_w32(0x1b000000|offset);
2243}
2244
57871462 2245// Used to preload hash table entries
e2b5e7aa 2246static unused void emit_prefetchreg(int r)
57871462 2247{
2248 assem_debug("pld %s\n",regname[r]);
2249 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2250}
2251
2252// Special case for mini_ht
e2b5e7aa 2253static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2254{
2255 assert(offset<4096);
2256 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2257 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2258}
2259
e2b5e7aa 2260static unused void emit_bicne_imm(int rs,int imm,int rt)
57871462 2261{
2262 u_int armval;
cfbd3c6e 2263 genimm_checked(imm,&armval);
57871462 2264 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2265 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2266}
2267
e2b5e7aa 2268static unused void emit_biccs_imm(int rs,int imm,int rt)
57871462 2269{
2270 u_int armval;
cfbd3c6e 2271 genimm_checked(imm,&armval);
57871462 2272 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2273 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2274}
2275
e2b5e7aa 2276static unused void emit_bicvc_imm(int rs,int imm,int rt)
57871462 2277{
2278 u_int armval;
cfbd3c6e 2279 genimm_checked(imm,&armval);
57871462 2280 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2281 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2282}
2283
e2b5e7aa 2284static unused void emit_bichi_imm(int rs,int imm,int rt)
57871462 2285{
2286 u_int armval;
cfbd3c6e 2287 genimm_checked(imm,&armval);
57871462 2288 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2289 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2290}
2291
e2b5e7aa 2292static unused void emit_orrvs_imm(int rs,int imm,int rt)
57871462 2293{
2294 u_int armval;
cfbd3c6e 2295 genimm_checked(imm,&armval);
57871462 2296 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2297 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2298}
2299
e2b5e7aa 2300static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2301{
2302 u_int armval;
cfbd3c6e 2303 genimm_checked(imm,&armval);
b9b61529 2304 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2305 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2306}
2307
e2b5e7aa 2308static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2309{
2310 u_int armval;
cfbd3c6e 2311 genimm_checked(imm,&armval);
b9b61529 2312 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2313 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2314}
2315
e2b5e7aa 2316static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2317{
2318 u_int armval;
2319 genimm_checked(imm,&armval);
2320 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2321 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2322}
2323
e2b5e7aa 2324static void emit_jno_unlikely(int a)
57871462 2325{
2326 //emit_jno(a);
2327 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2328 output_w32(0x72800000|rd_rn_rm(15,15,0));
2329}
2330
054175e9 2331static void save_regs_all(u_int reglist)
57871462 2332{
054175e9 2333 int i;
57871462 2334 if(!reglist) return;
2335 assem_debug("stmia fp,{");
054175e9 2336 for(i=0;i<16;i++)
2337 if(reglist&(1<<i))
2338 assem_debug("r%d,",i);
57871462 2339 assem_debug("}\n");
2340 output_w32(0xe88b0000|reglist);
2341}
e2b5e7aa 2342
054175e9 2343static void restore_regs_all(u_int reglist)
57871462 2344{
054175e9 2345 int i;
57871462 2346 if(!reglist) return;
2347 assem_debug("ldmia fp,{");
054175e9 2348 for(i=0;i<16;i++)
2349 if(reglist&(1<<i))
2350 assem_debug("r%d,",i);
57871462 2351 assem_debug("}\n");
2352 output_w32(0xe89b0000|reglist);
2353}
e2b5e7aa 2354
054175e9 2355// Save registers before function call
2356static void save_regs(u_int reglist)
2357{
4d646738 2358 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2359 save_regs_all(reglist);
2360}
e2b5e7aa 2361
054175e9 2362// Restore registers after function call
2363static void restore_regs(u_int reglist)
2364{
4d646738 2365 reglist&=CALLER_SAVE_REGS;
054175e9 2366 restore_regs_all(reglist);
2367}
57871462 2368
57871462 2369/* Stubs/epilogue */
2370
e2b5e7aa 2371static void literal_pool(int n)
57871462 2372{
2373 if(!literalcount) return;
2374 if(n) {
2375 if((int)out-literals[0][0]<4096-n) return;
2376 }
2377 u_int *ptr;
2378 int i;
2379 for(i=0;i<literalcount;i++)
2380 {
77750690 2381 u_int l_addr=(u_int)out;
2382 int j;
2383 for(j=0;j<i;j++) {
2384 if(literals[j][1]==literals[i][1]) {
2385 //printf("dup %08x\n",literals[i][1]);
2386 l_addr=literals[j][0];
2387 break;
2388 }
2389 }
57871462 2390 ptr=(u_int *)literals[i][0];
77750690 2391 u_int offset=l_addr-(u_int)ptr-8;
57871462 2392 assert(offset<4096);
2393 assert(!(offset&3));
2394 *ptr|=offset;
77750690 2395 if(l_addr==(u_int)out) {
2396 literals[i][0]=l_addr; // remember for dupes
2397 output_w32(literals[i][1]);
2398 }
57871462 2399 }
2400 literalcount=0;
2401}
2402
e2b5e7aa 2403static void literal_pool_jumpover(int n)
57871462 2404{
2405 if(!literalcount) return;
2406 if(n) {
2407 if((int)out-literals[0][0]<4096-n) return;
2408 }
2409 int jaddr=(int)out;
2410 emit_jmp(0);
2411 literal_pool(0);
2412 set_jump_target(jaddr,(int)out);
2413}
2414
e2b5e7aa 2415static void emit_extjump2(u_int addr, int target, int linker)
57871462 2416{
2417 u_char *ptr=(u_char *)addr;
2418 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2419 (void)ptr;
2420
57871462 2421 emit_loadlp(target,0);
2422 emit_loadlp(addr,1);
24385cae 2423 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2424 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2425//DEBUG >
2426#ifdef DEBUG_CYCLE_COUNT
2427 emit_readword((int)&last_count,ECX);
2428 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2429 emit_readword((int)&next_interupt,ECX);
2430 emit_writeword(HOST_CCREG,(int)&Count);
2431 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2432 emit_writeword(ECX,(int)&last_count);
2433#endif
2434//DEBUG <
2435 emit_jmp(linker);
2436}
2437
e2b5e7aa 2438static void emit_extjump(int addr, int target)
57871462 2439{
2440 emit_extjump2(addr, target, (int)dyna_linker);
2441}
e2b5e7aa 2442
2443static void emit_extjump_ds(int addr, int target)
57871462 2444{
2445 emit_extjump2(addr, target, (int)dyna_linker_ds);
2446}
2447
13e35c04 2448// put rt_val into rt, potentially making use of rs with value rs_val
2449static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2450{
8575a877 2451 u_int armval;
2452 int diff;
2453 if(genimm(rt_val,&armval)) {
2454 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2455 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2456 return;
2457 }
2458 if(genimm(~rt_val,&armval)) {
2459 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2460 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2461 return;
2462 }
2463 diff=rt_val-rs_val;
2464 if(genimm(diff,&armval)) {
2465 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2466 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2467 return;
2468 }else if(genimm(-diff,&armval)) {
2469 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2470 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2471 return;
2472 }
2473 emit_movimm(rt_val,rt);
2474}
2475
2476// return 1 if above function can do it's job cheaply
2477static int is_similar_value(u_int v1,u_int v2)
2478{
13e35c04 2479 u_int xs;
8575a877 2480 int diff;
2481 if(v1==v2) return 1;
2482 diff=v2-v1;
2483 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2484 ;
8575a877 2485 if(xs<0x100) return 1;
2486 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2487 ;
2488 if(xs<0x100) return 1;
2489 return 0;
13e35c04 2490}
cbbab9cd 2491
b96d3df7 2492// trashes r2
2493static void pass_args(int a0, int a1)
2494{
2495 if(a0==1&&a1==0) {
2496 // must swap
2497 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2498 }
2499 else if(a0!=0&&a1==0) {
2500 emit_mov(a1,1);
2501 if (a0>=0) emit_mov(a0,0);
2502 }
2503 else {
2504 if(a0>=0&&a0!=0) emit_mov(a0,0);
2505 if(a1>=0&&a1!=1) emit_mov(a1,1);
2506 }
2507}
2508
b1be1eee 2509static void mov_loadtype_adj(int type,int rs,int rt)
2510{
2511 switch(type) {
2512 case LOADB_STUB: emit_signextend8(rs,rt); break;
2513 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2514 case LOADH_STUB: emit_signextend16(rs,rt); break;
2515 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2516 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2517 default: assert(0);
2518 }
2519}
2520
b1be1eee 2521#include "pcsxmem.h"
2522#include "pcsxmem_inline.c"
b1be1eee 2523
e2b5e7aa 2524static void do_readstub(int n)
57871462 2525{
2526 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2527 literal_pool(256);
2528 set_jump_target(stubs[n][1],(int)out);
2529 int type=stubs[n][0];
2530 int i=stubs[n][3];
2531 int rs=stubs[n][4];
2532 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2533 u_int reglist=stubs[n][7];
2534 signed char *i_regmap=i_regs->regmap;
581335b0 2535 int rt;
b9b61529 2536 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2537 rt=get_reg(i_regmap,FTEMP);
2538 }else{
57871462 2539 rt=get_reg(i_regmap,rt1[i]);
2540 }
2541 assert(rs>=0);
c6c3b1b3 2542 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2543 reglist|=(1<<rs);
2544 for(r=0;r<=12;r++) {
2545 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2546 temp=r; break;
2547 }
2548 }
db829eeb 2549 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2550 reglist&=~(1<<rt);
2551 if(temp==-1) {
2552 save_regs(reglist);
2553 regs_saved=1;
2554 temp=(rs==0)?2:0;
2555 }
2556 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2557 temp2=1;
2558 emit_readword((int)&mem_rtab,temp);
2559 emit_shrimm(rs,12,temp2);
2560 emit_readword_dualindexedx4(temp,temp2,temp2);
2561 emit_lsls_imm(temp2,1,temp2);
2562 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2563 switch(type) {
2564 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2565 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2566 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2567 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2568 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2569 }
2570 }
2571 if(regs_saved) {
2572 restore_jump=(int)out;
2573 emit_jcc(0); // jump to reg restore
2574 }
2575 else
2576 emit_jcc(stubs[n][2]); // return address
2577
2578 if(!regs_saved)
2579 save_regs(reglist);
2580 int handler=0;
2581 if(type==LOADB_STUB||type==LOADBU_STUB)
2582 handler=(int)jump_handler_read8;
2583 if(type==LOADH_STUB||type==LOADHU_STUB)
2584 handler=(int)jump_handler_read16;
2585 if(type==LOADW_STUB)
2586 handler=(int)jump_handler_read32;
2587 assert(handler!=0);
b96d3df7 2588 pass_args(rs,temp2);
c6c3b1b3 2589 int cc=get_reg(i_regmap,CCREG);
2590 if(cc<0)
2591 emit_loadreg(CCREG,2);
2573466a 2592 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2593 emit_call(handler);
2594 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2595 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2596 }
2597 if(restore_jump)
2598 set_jump_target(restore_jump,(int)out);
2599 restore_regs(reglist);
2600 emit_jmp(stubs[n][2]); // return address
57871462 2601}
2602
c6c3b1b3 2603// return memhandler, or get directly accessable address and return 0
e2b5e7aa 2604static u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
c6c3b1b3 2605{
2606 u_int l1,l2=0;
2607 l1=((u_int *)table)[addr>>12];
2608 if((l1&(1<<31))==0) {
2609 u_int v=l1<<1;
2610 *addr_host=v+addr;
2611 return 0;
2612 }
2613 else {
2614 l1<<=1;
2615 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2616 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2617 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2618 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2619 else
2620 l2=((u_int *)l1)[(addr&0xfff)/4];
2621 if((l2&(1<<31))==0) {
2622 u_int v=l2<<1;
2623 *addr_host=v+(addr&0xfff);
2624 return 0;
2625 }
2626 return l2<<1;
2627 }
2628}
c6c3b1b3 2629
e2b5e7aa 2630static void inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2631{
2632 int rs=get_reg(regmap,target);
57871462 2633 int rt=get_reg(regmap,target);
535d208a 2634 if(rs<0) rs=get_reg(regmap,-1);
57871462 2635 assert(rs>=0);
b1be1eee 2636 u_int handler,host_addr=0,is_dynamic,far_call=0;
2637 int cc=get_reg(regmap,CCREG);
2638 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2639 return;
c6c3b1b3 2640 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
2641 if (handler==0) {
db829eeb 2642 if(rt<0||rt1[i]==0)
c6c3b1b3 2643 return;
13e35c04 2644 if(addr!=host_addr)
2645 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2646 switch(type) {
2647 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2648 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2649 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2650 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2651 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2652 default: assert(0);
2653 }
2654 return;
2655 }
b1be1eee 2656 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2657 if(is_dynamic) {
2658 if(type==LOADB_STUB||type==LOADBU_STUB)
2659 handler=(int)jump_handler_read8;
2660 if(type==LOADH_STUB||type==LOADHU_STUB)
2661 handler=(int)jump_handler_read16;
2662 if(type==LOADW_STUB)
2663 handler=(int)jump_handler_read32;
2664 }
c6c3b1b3 2665
2666 // call a memhandler
db829eeb 2667 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2668 reglist&=~(1<<rt);
2669 save_regs(reglist);
2670 if(target==0)
2671 emit_movimm(addr,0);
2672 else if(rs!=0)
2673 emit_mov(rs,0);
c6c3b1b3 2674 int offset=(int)handler-(int)out-8;
2675 if(offset<-33554432||offset>=33554432) {
2676 // unreachable memhandler, a plugin func perhaps
b1be1eee 2677 emit_movimm(handler,12);
2678 far_call=1;
2679 }
2680 if(cc<0)
2681 emit_loadreg(CCREG,2);
2682 if(is_dynamic) {
2683 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2684 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2685 }
b1be1eee 2686 else {
2687 emit_readword((int)&last_count,3);
2688 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2689 emit_add(2,3,2);
2690 emit_writeword(2,(int)&Count);
2691 }
2692
2693 if(far_call)
2694 emit_callreg(12);
c6c3b1b3 2695 else
2696 emit_call(handler);
b1be1eee 2697
db829eeb 2698 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2699 switch(type) {
2700 case LOADB_STUB: emit_signextend8(0,rt); break;
2701 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2702 case LOADH_STUB: emit_signextend16(0,rt); break;
2703 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2704 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2705 default: assert(0);
2706 }
2707 }
2708 restore_regs(reglist);
57871462 2709}
2710
e2b5e7aa 2711static void do_writestub(int n)
57871462 2712{
2713 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
2714 literal_pool(256);
2715 set_jump_target(stubs[n][1],(int)out);
2716 int type=stubs[n][0];
2717 int i=stubs[n][3];
2718 int rs=stubs[n][4];
2719 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2720 u_int reglist=stubs[n][7];
2721 signed char *i_regmap=i_regs->regmap;
581335b0 2722 int rt,r;
b9b61529 2723 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2724 rt=get_reg(i_regmap,r=FTEMP);
2725 }else{
57871462 2726 rt=get_reg(i_regmap,r=rs2[i]);
2727 }
2728 assert(rs>=0);
2729 assert(rt>=0);
b96d3df7 2730 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
2731 int reglist2=reglist|(1<<rs)|(1<<rt);
2732 for(rtmp=0;rtmp<=12;rtmp++) {
2733 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2734 temp=rtmp; break;
2735 }
2736 }
2737 if(temp==-1) {
2738 save_regs(reglist);
2739 regs_saved=1;
2740 for(rtmp=0;rtmp<=3;rtmp++)
2741 if(rtmp!=rs&&rtmp!=rt)
2742 {temp=rtmp;break;}
2743 }
2744 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2745 temp2=3;
2746 emit_readword((int)&mem_wtab,temp);
2747 emit_shrimm(rs,12,temp2);
2748 emit_readword_dualindexedx4(temp,temp2,temp2);
2749 emit_lsls_imm(temp2,1,temp2);
2750 switch(type) {
2751 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2752 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2753 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2754 default: assert(0);
2755 }
2756 if(regs_saved) {
2757 restore_jump=(int)out;
2758 emit_jcc(0); // jump to reg restore
2759 }
2760 else
2761 emit_jcc(stubs[n][2]); // return address (invcode check)
2762
2763 if(!regs_saved)
2764 save_regs(reglist);
2765 int handler=0;
2766 switch(type) {
2767 case STOREB_STUB: handler=(int)jump_handler_write8; break;
2768 case STOREH_STUB: handler=(int)jump_handler_write16; break;
2769 case STOREW_STUB: handler=(int)jump_handler_write32; break;
2770 }
2771 assert(handler!=0);
2772 pass_args(rs,rt);
2773 if(temp2!=3)
2774 emit_mov(temp2,3);
2775 int cc=get_reg(i_regmap,CCREG);
2776 if(cc<0)
2777 emit_loadreg(CCREG,2);
2573466a 2778 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2779 // returns new cycle_count
2780 emit_call(handler);
2573466a 2781 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2782 if(cc<0)
2783 emit_storereg(CCREG,2);
2784 if(restore_jump)
2785 set_jump_target(restore_jump,(int)out);
2786 restore_regs(reglist);
2787 ra=stubs[n][2];
b96d3df7 2788 emit_jmp(ra);
57871462 2789}
2790
e2b5e7aa 2791static void inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2792{
2793 int rs=get_reg(regmap,-1);
57871462 2794 int rt=get_reg(regmap,target);
2795 assert(rs>=0);
2796 assert(rt>=0);
b96d3df7 2797 u_int handler,host_addr=0;
b96d3df7 2798 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
2799 if (handler==0) {
13e35c04 2800 if(addr!=host_addr)
2801 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2802 switch(type) {
2803 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2804 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2805 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2806 default: assert(0);
2807 }
2808 return;
2809 }
2810
2811 // call a memhandler
2812 save_regs(reglist);
13e35c04 2813 pass_args(rs,rt);
b96d3df7 2814 int cc=get_reg(regmap,CCREG);
2815 if(cc<0)
2816 emit_loadreg(CCREG,2);
2573466a 2817 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 2818 emit_movimm(handler,3);
2819 // returns new cycle_count
2820 emit_call((int)jump_handler_write_h);
2573466a 2821 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2822 if(cc<0)
2823 emit_storereg(CCREG,2);
2824 restore_regs(reglist);
57871462 2825}
2826
e2b5e7aa 2827static void do_unalignedwritestub(int n)
57871462 2828{
b7918751 2829 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
2830 literal_pool(256);
57871462 2831 set_jump_target(stubs[n][1],(int)out);
b7918751 2832
2833 int i=stubs[n][3];
2834 struct regstat *i_regs=(struct regstat *)stubs[n][4];
2835 int addr=stubs[n][5];
2836 u_int reglist=stubs[n][7];
2837 signed char *i_regmap=i_regs->regmap;
2838 int temp2=get_reg(i_regmap,FTEMP);
2839 int rt;
b7918751 2840 rt=get_reg(i_regmap,rs2[i]);
2841 assert(rt>=0);
2842 assert(addr>=0);
2843 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2844 reglist|=(1<<addr);
2845 reglist&=~(1<<temp2);
2846
b96d3df7 2847#if 1
2848 // don't bother with it and call write handler
2849 save_regs(reglist);
2850 pass_args(addr,rt);
2851 int cc=get_reg(i_regmap,CCREG);
2852 if(cc<0)
2853 emit_loadreg(CCREG,2);
2573466a 2854 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 2855 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 2856 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 2857 if(cc<0)
2858 emit_storereg(CCREG,2);
2859 restore_regs(reglist);
2860 emit_jmp(stubs[n][2]); // return address
2861#else
b7918751 2862 emit_andimm(addr,0xfffffffc,temp2);
2863 emit_writeword(temp2,(int)&address);
2864
2865 save_regs(reglist);
b7918751 2866 emit_shrimm(addr,16,1);
2867 int cc=get_reg(i_regmap,CCREG);
2868 if(cc<0) {
2869 emit_loadreg(CCREG,2);
2870 }
2871 emit_movimm((u_int)readmem,0);
2872 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
b7918751 2873 emit_call((int)&indirect_jump_indexed);
2874 restore_regs(reglist);
2875
2876 emit_readword((int)&readmem_dword,temp2);
2877 int temp=addr; //hmh
2878 emit_shlimm(addr,3,temp);
2879 emit_andimm(temp,24,temp);
2880#ifdef BIG_ENDIAN_MIPS
2881 if (opcode[i]==0x2e) // SWR
2882#else
2883 if (opcode[i]==0x2a) // SWL
2884#endif
2885 emit_xorimm(temp,24,temp);
2886 emit_movimm(-1,HOST_TEMPREG);
55439448 2887 if (opcode[i]==0x2a) { // SWL
b7918751 2888 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2889 emit_orrshr(rt,temp,temp2);
2890 }else{
2891 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2892 emit_orrshl(rt,temp,temp2);
2893 }
2894 emit_readword((int)&address,addr);
2895 emit_writeword(temp2,(int)&word);
2896 //save_regs(reglist); // don't need to, no state changes
2897 emit_shrimm(addr,16,1);
2898 emit_movimm((u_int)writemem,0);
2899 //emit_call((int)&indirect_jump_indexed);
2900 emit_mov(15,14);
2901 emit_readword_dualindexedx4(0,1,15);
2902 emit_readword((int)&Count,HOST_TEMPREG);
2903 emit_readword((int)&next_interupt,2);
2904 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2905 emit_writeword(2,(int)&last_count);
2906 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2907 if(cc<0) {
2908 emit_storereg(CCREG,HOST_TEMPREG);
2909 }
2910 restore_regs(reglist);
57871462 2911 emit_jmp(stubs[n][2]); // return address
b96d3df7 2912#endif
57871462 2913}
2914
e2b5e7aa 2915static void do_invstub(int n)
57871462 2916{
2917 literal_pool(20);
2918 u_int reglist=stubs[n][3];
2919 set_jump_target(stubs[n][1],(int)out);
2920 save_regs(reglist);
2921 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
2922 emit_call((int)&invalidate_addr);
2923 restore_regs(reglist);
2924 emit_jmp(stubs[n][2]); // return address
2925}
2926
2927int do_dirty_stub(int i)
2928{
2929 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2930 u_int addr=(u_int)source;
57871462 2931 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2932 #ifndef HAVE_ARMV7
ac545b3a 2933 emit_loadlp(addr,1);
57871462 2934 emit_loadlp((int)copy,2);
2935 emit_loadlp(slen*4,3);
2936 #else
ac545b3a 2937 emit_movw(addr&0x0000FFFF,1);
57871462 2938 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2939 emit_movt(addr&0xFFFF0000,1);
57871462 2940 emit_movt(((u_int)copy)&0xFFFF0000,2);
2941 emit_movw(slen*4,3);
2942 #endif
2943 emit_movimm(start+i*4,0);
2944 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
2945 int entry=(int)out;
2946 load_regs_entry(i);
2947 if(entry==(int)out) entry=instr_addr[i];
2948 emit_jmp(instr_addr[i]);
2949 return entry;
2950}
2951
e2b5e7aa 2952static void do_dirty_stub_ds()
57871462 2953{
2954 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2955 #ifndef HAVE_ARMV7
57871462 2956 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2957 emit_loadlp((int)copy,2);
2958 emit_loadlp(slen*4,3);
2959 #else
2960 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2961 emit_movw(((u_int)copy)&0x0000FFFF,2);
2962 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2963 emit_movt(((u_int)copy)&0xFFFF0000,2);
2964 emit_movw(slen*4,3);
2965 #endif
2966 emit_movimm(start+1,0);
2967 emit_call((int)&verify_code_ds);
2968}
2969
e2b5e7aa 2970static void do_cop1stub(int n)
57871462 2971{
2972 literal_pool(256);
2973 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
2974 set_jump_target(stubs[n][1],(int)out);
2975 int i=stubs[n][3];
3d624f89 2976// int rs=stubs[n][4];
57871462 2977 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2978 int ds=stubs[n][6];
2979 if(!ds) {
2980 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2981 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2982 }
2983 //else {printf("fp exception in delay slot\n");}
2984 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2985 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2986 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2987 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 2988 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
2989}
2990
57871462 2991/* Special assem */
2992
e2b5e7aa 2993static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 2994{
2995 if(rt1[i]) {
2996 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2997 {
2998 signed char s,t,shift;
2999 t=get_reg(i_regs->regmap,rt1[i]);
3000 s=get_reg(i_regs->regmap,rs1[i]);
3001 shift=get_reg(i_regs->regmap,rs2[i]);
3002 if(t>=0){
3003 if(rs1[i]==0)
3004 {
3005 emit_zeroreg(t);
3006 }
3007 else if(rs2[i]==0)
3008 {
3009 assert(s>=0);
3010 if(s!=t) emit_mov(s,t);
3011 }
3012 else
3013 {
3014 emit_andimm(shift,31,HOST_TEMPREG);
3015 if(opcode2[i]==4) // SLLV
3016 {
3017 emit_shl(s,HOST_TEMPREG,t);
3018 }
3019 if(opcode2[i]==6) // SRLV
3020 {
3021 emit_shr(s,HOST_TEMPREG,t);
3022 }
3023 if(opcode2[i]==7) // SRAV
3024 {
3025 emit_sar(s,HOST_TEMPREG,t);
3026 }
3027 }
3028 }
3029 } else { // DSLLV/DSRLV/DSRAV
3030 signed char sh,sl,th,tl,shift;
3031 th=get_reg(i_regs->regmap,rt1[i]|64);
3032 tl=get_reg(i_regs->regmap,rt1[i]);
3033 sh=get_reg(i_regs->regmap,rs1[i]|64);
3034 sl=get_reg(i_regs->regmap,rs1[i]);
3035 shift=get_reg(i_regs->regmap,rs2[i]);
3036 if(tl>=0){
3037 if(rs1[i]==0)
3038 {
3039 emit_zeroreg(tl);
3040 if(th>=0) emit_zeroreg(th);
3041 }
3042 else if(rs2[i]==0)
3043 {
3044 assert(sl>=0);
3045 if(sl!=tl) emit_mov(sl,tl);
3046 if(th>=0&&sh!=th) emit_mov(sh,th);
3047 }
3048 else
3049 {
3050 // FIXME: What if shift==tl ?
3051 assert(shift!=tl);
3052 int temp=get_reg(i_regs->regmap,-1);
3053 int real_th=th;
3054 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3055 assert(sl>=0);
3056 assert(sh>=0);
3057 emit_andimm(shift,31,HOST_TEMPREG);
3058 if(opcode2[i]==0x14) // DSLLV
3059 {
3060 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3061 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3062 emit_orrshr(sl,HOST_TEMPREG,th);
3063 emit_andimm(shift,31,HOST_TEMPREG);
3064 emit_testimm(shift,32);
3065 emit_shl(sl,HOST_TEMPREG,tl);
3066 if(th>=0) emit_cmovne_reg(tl,th);
3067 emit_cmovne_imm(0,tl);
3068 }
3069 if(opcode2[i]==0x16) // DSRLV
3070 {
3071 assert(th>=0);
3072 emit_shr(sl,HOST_TEMPREG,tl);
3073 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3074 emit_orrshl(sh,HOST_TEMPREG,tl);
3075 emit_andimm(shift,31,HOST_TEMPREG);
3076 emit_testimm(shift,32);
3077 emit_shr(sh,HOST_TEMPREG,th);
3078 emit_cmovne_reg(th,tl);
3079 if(real_th>=0) emit_cmovne_imm(0,th);
3080 }
3081 if(opcode2[i]==0x17) // DSRAV
3082 {
3083 assert(th>=0);
3084 emit_shr(sl,HOST_TEMPREG,tl);
3085 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3086 if(real_th>=0) {
3087 assert(temp>=0);
3088 emit_sarimm(th,31,temp);
3089 }
3090 emit_orrshl(sh,HOST_TEMPREG,tl);
3091 emit_andimm(shift,31,HOST_TEMPREG);
3092 emit_testimm(shift,32);
3093 emit_sar(sh,HOST_TEMPREG,th);
3094 emit_cmovne_reg(th,tl);
3095 if(real_th>=0) emit_cmovne_reg(temp,th);
3096 }
3097 }
3098 }
3099 }
3100 }
3101}
ffb0b9e0 3102
ffb0b9e0 3103static void speculate_mov(int rs,int rt)
3104{
3105 if(rt!=0) {
3106 smrv_strong_next|=1<<rt;
3107 smrv[rt]=smrv[rs];
3108 }
3109}
3110
3111static void speculate_mov_weak(int rs,int rt)
3112{
3113 if(rt!=0) {
3114 smrv_weak_next|=1<<rt;
3115 smrv[rt]=smrv[rs];
3116 }
3117}
3118
3119static void speculate_register_values(int i)
3120{
3121 if(i==0) {
3122 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3123 // gp,sp are likely to stay the same throughout the block
3124 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3125 smrv_weak_next=~smrv_strong_next;
3126 //printf(" llr %08x\n", smrv[4]);
3127 }
3128 smrv_strong=smrv_strong_next;
3129 smrv_weak=smrv_weak_next;
3130 switch(itype[i]) {
3131 case ALU:
3132 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3133 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3134 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3135 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3136 else {
3137 smrv_strong_next&=~(1<<rt1[i]);
3138 smrv_weak_next&=~(1<<rt1[i]);
3139 }
3140 break;
3141 case SHIFTIMM:
3142 smrv_strong_next&=~(1<<rt1[i]);
3143 smrv_weak_next&=~(1<<rt1[i]);
3144 // fallthrough
3145 case IMM16:
3146 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3147 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3148 if(hr>=0) {
3149 if(get_final_value(hr,i,&value))
3150 smrv[rt1[i]]=value;
3151 else smrv[rt1[i]]=constmap[i][hr];
3152 smrv_strong_next|=1<<rt1[i];
3153 }
3154 }
3155 else {
3156 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3157 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3158 }
3159 break;
3160 case LOAD:
3161 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3162 // special case for BIOS
3163 smrv[rt1[i]]=0xa0000000;
3164 smrv_strong_next|=1<<rt1[i];
3165 break;
3166 }
3167 // fallthrough
3168 case SHIFT:
3169 case LOADLR:
3170 case MOV:
3171 smrv_strong_next&=~(1<<rt1[i]);
3172 smrv_weak_next&=~(1<<rt1[i]);
3173 break;
3174 case COP0:
3175 case COP2:
3176 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3177 smrv_strong_next&=~(1<<rt1[i]);
3178 smrv_weak_next&=~(1<<rt1[i]);
3179 }
3180 break;
3181 case C2LS:
3182 if (opcode[i]==0x32) { // LWC2
3183 smrv_strong_next&=~(1<<rt1[i]);
3184 smrv_weak_next&=~(1<<rt1[i]);
3185 }
3186 break;
3187 }
3188#if 0
3189 int r=4;
3190 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3191 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3192#endif
3193}
3194
3195enum {
3196 MTYPE_8000 = 0,
3197 MTYPE_8020,
3198 MTYPE_0000,
3199 MTYPE_A000,
3200 MTYPE_1F80,
3201};
3202
3203static int get_ptr_mem_type(u_int a)
3204{
3205 if(a < 0x00200000) {
3206 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3207 // return wrong, must use memhandler for BIOS self-test to pass
3208 // 007 does similar stuff from a00 mirror, weird stuff
3209 return MTYPE_8000;
3210 return MTYPE_0000;
3211 }
3212 if(0x1f800000 <= a && a < 0x1f801000)
3213 return MTYPE_1F80;
3214 if(0x80200000 <= a && a < 0x80800000)
3215 return MTYPE_8020;
3216 if(0xa0000000 <= a && a < 0xa0200000)
3217 return MTYPE_A000;
3218 return MTYPE_8000;
3219}
ffb0b9e0 3220
3221static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3222{
581335b0 3223 int jaddr=0,type=0;
ffb0b9e0 3224 int mr=rs1[i];
3225 if(((smrv_strong|smrv_weak)>>mr)&1) {
3226 type=get_ptr_mem_type(smrv[mr]);
3227 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3228 }
3229 else {
3230 // use the mirror we are running on
3231 type=get_ptr_mem_type(start);
3232 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3233 }
3234
3235 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3236 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3237 addr=*addr_reg_override=HOST_TEMPREG;
3238 type=0;
3239 }
3240 else if(type==MTYPE_0000) { // RAM 0 mirror
3241 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3242 addr=*addr_reg_override=HOST_TEMPREG;
3243 type=0;
3244 }
3245 else if(type==MTYPE_A000) { // RAM A mirror
3246 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3247 addr=*addr_reg_override=HOST_TEMPREG;
3248 type=0;
3249 }
3250 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3251 if (psxH == (void *)0x1f800000) {
3252 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3253 emit_cmpimm(HOST_TEMPREG,0x1000);
3254 jaddr=(int)out;
3255 emit_jc(0);
3256 }
3257 else {
3258 // do usual RAM check, jump will go to the right handler
3259 type=0;
3260 }
ffb0b9e0 3261 }
ffb0b9e0 3262
3263 if(type==0)
3264 {
3265 emit_cmpimm(addr,RAM_SIZE);
3266 jaddr=(int)out;
3267 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3268 // Hint to branch predictor that the branch is unlikely to be taken
3269 if(rs1[i]>=28)
3270 emit_jno_unlikely(0);
3271 else
3272 #endif
3273 emit_jno(0);
a327ad27 3274 if(ram_offset!=0) {
3275 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3276 addr=*addr_reg_override=HOST_TEMPREG;
3277 }
ffb0b9e0 3278 }
3279
3280 return jaddr;
3281}
3282
57871462 3283#define shift_assemble shift_assemble_arm
3284
e2b5e7aa 3285static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3286{
3287 int s,th,tl,temp,temp2,addr,map=-1;
3288 int offset;
3289 int jaddr=0;
af4ee1fe 3290 int memtarget=0,c=0;
ffb0b9e0 3291 int fastload_reg_override=0;
57871462 3292 u_int hr,reglist=0;
3293 th=get_reg(i_regs->regmap,rt1[i]|64);
3294 tl=get_reg(i_regs->regmap,rt1[i]);
3295 s=get_reg(i_regs->regmap,rs1[i]);
3296 temp=get_reg(i_regs->regmap,-1);
3297 temp2=get_reg(i_regs->regmap,FTEMP);
3298 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3299 assert(addr<0);
3300 offset=imm[i];
3301 for(hr=0;hr<HOST_REGS;hr++) {
3302 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3303 }
3304 reglist|=1<<temp;
3305 if(offset||s<0||c) addr=temp2;
3306 else addr=s;
3307 if(s>=0) {
3308 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3309 if(c) {
3310 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3311 }
57871462 3312 }
1edfcc68 3313 if(!c) {
3314 #ifdef RAM_OFFSET
3315 map=get_reg(i_regs->regmap,ROREG);
3316 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
3317 #endif
3318 emit_shlimm(addr,3,temp);
3319 if (opcode[i]==0x22||opcode[i]==0x26) {
3320 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3321 }else{
3322 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3323 }
1edfcc68 3324 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3325 }
3326 else {
3327 if(ram_offset&&memtarget) {
3328 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3329 fastload_reg_override=HOST_TEMPREG;
57871462 3330 }
1edfcc68 3331 if (opcode[i]==0x22||opcode[i]==0x26) {
3332 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3333 }else{
1edfcc68 3334 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3335 }
535d208a 3336 }
3337 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3338 if(!c||memtarget) {
ffb0b9e0 3339 int a=temp2;
3340 if(fastload_reg_override) a=fastload_reg_override;
535d208a 3341 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 3342 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 3343 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3344 }
3345 else
3346 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3347 if(rt1[i]) {
3348 assert(tl>=0);
57871462 3349 emit_andimm(temp,24,temp);
2002a1db 3350#ifdef BIG_ENDIAN_MIPS
3351 if (opcode[i]==0x26) // LWR
3352#else
3353 if (opcode[i]==0x22) // LWL
3354#endif
3355 emit_xorimm(temp,24,temp);
57871462 3356 emit_movimm(-1,HOST_TEMPREG);
3357 if (opcode[i]==0x26) {
3358 emit_shr(temp2,temp,temp2);
3359 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3360 }else{
3361 emit_shl(temp2,temp,temp2);
3362 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3363 }
3364 emit_or(temp2,tl,tl);
57871462 3365 }
535d208a 3366 //emit_storereg(rt1[i],tl); // DEBUG
3367 }
3368 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 3369 // FIXME: little endian, fastload_reg_override
535d208a 3370 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
3371 if(!c||memtarget) {
3372 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
3373 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
3374 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
3375 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
3376 }
3377 else
3378 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
3379 if(rt1[i]) {
3380 assert(th>=0);
3381 assert(tl>=0);
57871462 3382 emit_testimm(temp,32);
3383 emit_andimm(temp,24,temp);
3384 if (opcode[i]==0x1A) { // LDL
3385 emit_rsbimm(temp,32,HOST_TEMPREG);
3386 emit_shl(temp2h,temp,temp2h);
3387 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
3388 emit_movimm(-1,HOST_TEMPREG);
3389 emit_shl(temp2,temp,temp2);
3390 emit_cmove_reg(temp2h,th);
3391 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
3392 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
3393 emit_orreq(temp2,tl,tl);
3394 emit_orrne(temp2,th,th);
3395 }
3396 if (opcode[i]==0x1B) { // LDR
3397 emit_xorimm(temp,24,temp);
3398 emit_rsbimm(temp,32,HOST_TEMPREG);
3399 emit_shr(temp2,temp,temp2);
3400 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
3401 emit_movimm(-1,HOST_TEMPREG);
3402 emit_shr(temp2h,temp,temp2h);
3403 emit_cmovne_reg(temp2,tl);
3404 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
3405 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
3406 emit_orrne(temp2h,th,th);
3407 emit_orreq(temp2h,tl,tl);
3408 }
3409 }
3410 }
3411}
3412#define loadlr_assemble loadlr_assemble_arm
3413
e2b5e7aa 3414static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3415{
3416 if(opcode2[i]==0) // MFC0
3417 {
3418 signed char t=get_reg(i_regs->regmap,rt1[i]);
3419 char copr=(source[i]>>11)&0x1f;
3420 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3421 if(t>=0&&rt1[i]!=0) {
7139f3c8 3422 emit_readword((int)&reg_cop0+copr*4,t);
57871462 3423 }
3424 }
3425 else if(opcode2[i]==4) // MTC0
3426 {
3427 signed char s=get_reg(i_regs->regmap,rs1[i]);
3428 char copr=(source[i]>>11)&0x1f;
3429 assert(s>=0);
63cb0298 3430 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3431 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 3432 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 3433 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3434 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3435 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 3436 emit_writeword(HOST_CCREG,(int)&Count);
3437 }
3438 // What a mess. The status register (12) can enable interrupts,
3439 // so needs a special case to handle a pending interrupt.
3440 // The interrupt must be taken immediately, because a subsequent
3441 // instruction might disable interrupts again.
7139f3c8 3442 if(copr==12||copr==13) {
fca1aef2 3443 if (is_delayslot) {
3444 // burn cycles to cause cc_interrupt, which will
3445 // reschedule next_interupt. Relies on CCREG from above.
3446 assem_debug("MTC0 DS %d\n", copr);
3447 emit_writeword(HOST_CCREG,(int)&last_count);
3448 emit_movimm(0,HOST_CCREG);
3449 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3450 emit_loadreg(rs1[i],1);
fca1aef2 3451 emit_movimm(copr,0);
3452 emit_call((int)pcsx_mtc0_ds);
042c7287 3453 emit_loadreg(rs1[i],s);
fca1aef2 3454 return;
3455 }
63cb0298 3456 emit_movimm(start+i*4+4,HOST_TEMPREG);
3457 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
3458 emit_movimm(0,HOST_TEMPREG);
3459 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 3460 }
3461 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3462 //else
caeefe31 3463 if(s==HOST_CCREG)
3464 emit_loadreg(rs1[i],1);
3465 else if(s!=1)
63cb0298 3466 emit_mov(s,1);
fca1aef2 3467 emit_movimm(copr,0);
3468 emit_call((int)pcsx_mtc0);
7139f3c8 3469 if(copr==9||copr==11||copr==12||copr==13) {
57871462 3470 emit_readword((int)&Count,HOST_CCREG);
042c7287 3471 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 3472 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3473 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3474 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 3475 emit_storereg(CCREG,HOST_CCREG);
3476 }
7139f3c8 3477 if(copr==12||copr==13) {
57871462 3478 assert(!is_delayslot);
3479 emit_readword((int)&pending_exception,14);
042c7287 3480 emit_test(14,14);
3481 emit_jne((int)&do_interrupt);
57871462 3482 }
3483 emit_loadreg(rs1[i],s);
3484 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3485 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3486 cop1_usable=0;
3487 }
3488 else
3489 {
3490 assert(opcode2[i]==0x10);
576bbd8f 3491 if((source[i]&0x3f)==0x10) // RFE
3492 {
3493 emit_readword((int)&Status,0);
3494 emit_andimm(0,0x3c,1);
3495 emit_andimm(0,~0xf,0);
3496 emit_orrshr_imm(1,2,0);
3497 emit_writeword(0,(int)&Status);
3498 }
57871462 3499 }
3500}
3501
b9b61529 3502static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3503{
3504 switch (copr) {
3505 case 1:
3506 case 3:
3507 case 5:
3508 case 8:
3509 case 9:
3510 case 10:
3511 case 11:
3512 emit_readword((int)&reg_cop2d[copr],tl);
3513 emit_signextend16(tl,tl);
3514 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
3515 break;
3516 case 7:
3517 case 16:
3518 case 17:
3519 case 18:
3520 case 19:
3521 emit_readword((int)&reg_cop2d[copr],tl);
3522 emit_andimm(tl,0xffff,tl);
3523 emit_writeword(tl,(int)&reg_cop2d[copr]);
3524 break;
3525 case 15:
3526 emit_readword((int)&reg_cop2d[14],tl); // SXY2
3527 emit_writeword(tl,(int)&reg_cop2d[copr]);
3528 break;
3529 case 28:
b9b61529 3530 case 29:
3531 emit_readword((int)&reg_cop2d[9],temp);
3532 emit_testimm(temp,0x8000); // do we need this?
3533 emit_andimm(temp,0xf80,temp);
3534 emit_andne_imm(temp,0,temp);
f70d384d 3535 emit_shrimm(temp,7,tl);
b9b61529 3536 emit_readword((int)&reg_cop2d[10],temp);
3537 emit_testimm(temp,0x8000);
3538 emit_andimm(temp,0xf80,temp);
3539 emit_andne_imm(temp,0,temp);
f70d384d 3540 emit_orrshr_imm(temp,2,tl);
b9b61529 3541 emit_readword((int)&reg_cop2d[11],temp);
3542 emit_testimm(temp,0x8000);
3543 emit_andimm(temp,0xf80,temp);
3544 emit_andne_imm(temp,0,temp);
f70d384d 3545 emit_orrshl_imm(temp,3,tl);
b9b61529 3546 emit_writeword(tl,(int)&reg_cop2d[copr]);
3547 break;
3548 default:
3549 emit_readword((int)&reg_cop2d[copr],tl);
3550 break;
3551 }
3552}
3553
3554static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3555{
3556 switch (copr) {
3557 case 15:
3558 emit_readword((int)&reg_cop2d[13],temp); // SXY1
3559 emit_writeword(sl,(int)&reg_cop2d[copr]);
3560 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
3561 emit_readword((int)&reg_cop2d[14],temp); // SXY2
3562 emit_writeword(sl,(int)&reg_cop2d[14]);
3563 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
3564 break;
3565 case 28:
3566 emit_andimm(sl,0x001f,temp);
f70d384d 3567 emit_shlimm(temp,7,temp);
b9b61529 3568 emit_writeword(temp,(int)&reg_cop2d[9]);
3569 emit_andimm(sl,0x03e0,temp);
f70d384d 3570 emit_shlimm(temp,2,temp);
b9b61529 3571 emit_writeword(temp,(int)&reg_cop2d[10]);
3572 emit_andimm(sl,0x7c00,temp);
f70d384d 3573 emit_shrimm(temp,3,temp);
b9b61529 3574 emit_writeword(temp,(int)&reg_cop2d[11]);
3575 emit_writeword(sl,(int)&reg_cop2d[28]);
3576 break;
3577 case 30:
3578 emit_movs(sl,temp);
3579 emit_mvnmi(temp,temp);
665f33e1 3580#ifdef HAVE_ARMV5
b9b61529 3581 emit_clz(temp,temp);
665f33e1 3582#else
3583 emit_movs(temp,HOST_TEMPREG);
3584 emit_movimm(0,temp);
3585 emit_jeq((int)out+4*4);
3586 emit_addpl_imm(temp,1,temp);
3587 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3588 emit_jns((int)out-2*4);
3589#endif
b9b61529 3590 emit_writeword(sl,(int)&reg_cop2d[30]);
3591 emit_writeword(temp,(int)&reg_cop2d[31]);
3592 break;
b9b61529 3593 case 31:
3594 break;
3595 default:
3596 emit_writeword(sl,(int)&reg_cop2d[copr]);
3597 break;
3598 }
3599}
3600
e2b5e7aa 3601static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3602{
3603 u_int copr=(source[i]>>11)&0x1f;
3604 signed char temp=get_reg(i_regs->regmap,-1);
3605 if (opcode2[i]==0) { // MFC2
3606 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3607 if(tl>=0&&rt1[i]!=0)
b9b61529 3608 cop2_get_dreg(copr,tl,temp);
3609 }
3610 else if (opcode2[i]==4) { // MTC2
3611 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3612 cop2_put_dreg(copr,sl,temp);
3613 }
3614 else if (opcode2[i]==2) // CFC2
3615 {
3616 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3617 if(tl>=0&&rt1[i]!=0)
b9b61529 3618 emit_readword((int)&reg_cop2c[copr],tl);
3619 }
3620 else if (opcode2[i]==6) // CTC2
3621 {
3622 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3623 switch(copr) {
3624 case 4:
3625 case 12:
3626 case 20:
3627 case 26:
3628 case 27:
3629 case 29:
3630 case 30:
3631 emit_signextend16(sl,temp);
3632 break;
3633 case 31:
3634 //value = value & 0x7ffff000;
3635 //if (value & 0x7f87e000) value |= 0x80000000;
3636 emit_shrimm(sl,12,temp);
3637 emit_shlimm(temp,12,temp);
3638 emit_testimm(temp,0x7f000000);
3639 emit_testeqimm(temp,0x00870000);
3640 emit_testeqimm(temp,0x0000e000);
3641 emit_orrne_imm(temp,0x80000000,temp);
3642 break;
3643 default:
3644 temp=sl;
3645 break;
3646 }
3647 emit_writeword(temp,(int)&reg_cop2c[copr]);
3648 assert(sl>=0);
3649 }
3650}
3651
054175e9 3652static void c2op_prologue(u_int op,u_int reglist)
3653{
3654 save_regs_all(reglist);
82ed88eb 3655#ifdef PCNT
3656 emit_movimm(op,0);
3657 emit_call((int)pcnt_gte_start);
3658#endif
054175e9 3659 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3660}
3661
3662static void c2op_epilogue(u_int op,u_int reglist)
3663{
82ed88eb 3664#ifdef PCNT
3665 emit_movimm(op,0);
3666 emit_call((int)pcnt_gte_end);
3667#endif
054175e9 3668 restore_regs_all(reglist);
3669}
3670
6c0eefaf 3671static void c2op_call_MACtoIR(int lm,int need_flags)
3672{
3673 if(need_flags)
3674 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
3675 else
3676 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
3677}
3678
3679static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3680{
3681 emit_call((int)func);
3682 // func is C code and trashes r0
3683 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3684 if(need_flags||need_ir)
3685 c2op_call_MACtoIR(lm,need_flags);
3686 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
3687}
3688
054175e9 3689static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3690{
b9b61529 3691 u_int c2op=source[i]&0x3f;
6c0eefaf 3692 u_int hr,reglist_full=0,reglist;
054175e9 3693 int need_flags,need_ir;
b9b61529 3694 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3695 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3696 }
4d646738 3697 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3698
3699 if (gte_handlers[c2op]!=NULL) {
bedfea38 3700 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3701 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3702 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3703 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3704 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3705 need_flags=0;
6c0eefaf 3706 int shift = (source[i] >> 19) & 1;
3707 int lm = (source[i] >> 10) & 1;
054175e9 3708 switch(c2op) {
19776aef 3709#ifndef DRC_DBG
054175e9 3710 case GTE_MVMVA: {
82336ba3 3711#ifdef HAVE_ARMV5
054175e9 3712 int v = (source[i] >> 15) & 3;
3713 int cv = (source[i] >> 13) & 3;
3714 int mx = (source[i] >> 17) & 3;
4d646738 3715 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3716 c2op_prologue(c2op,reglist);
3717 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3718 if(v<3)
3719 emit_ldrd(v*8,0,4);
3720 else {
3721 emit_movzwl_indexed(9*4,0,4); // gteIR
3722 emit_movzwl_indexed(10*4,0,6);
3723 emit_movzwl_indexed(11*4,0,5);
3724 emit_orrshl_imm(6,16,4);
3725 }
3726 if(mx<3)
3727 emit_addimm(0,32*4+mx*8*4,6);
3728 else
3729 emit_readword((int)&zeromem_ptr,6);
3730 if(cv<3)
3731 emit_addimm(0,32*4+(cv*8+5)*4,7);
3732 else
3733 emit_readword((int)&zeromem_ptr,7);
3734#ifdef __ARM_NEON__
3735 emit_movimm(source[i],1); // opcode
3736 emit_call((int)gteMVMVA_part_neon);
3737 if(need_flags) {
3738 emit_movimm(lm,1);
3739 emit_call((int)gteMACtoIR_flags_neon);
3740 }
3741#else
3742 if(cv==3&&shift)
3743 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3744 else {
3745 emit_movimm(shift,1);
3746 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3747 }
6c0eefaf 3748 if(need_flags||need_ir)
3749 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3750#endif
3751#else /* if not HAVE_ARMV5 */
3752 c2op_prologue(c2op,reglist);
3753 emit_movimm(source[i],1); // opcode
3754 emit_writeword(1,(int)&psxRegs.code);
3755 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3756#endif
3757 break;
3758 }
6c0eefaf 3759 case GTE_OP:
3760 c2op_prologue(c2op,reglist);
3761 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
3762 if(need_flags||need_ir) {
3763 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3764 c2op_call_MACtoIR(lm,need_flags);
3765 }
3766 break;
3767 case GTE_DPCS:
3768 c2op_prologue(c2op,reglist);
3769 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3770 break;
3771 case GTE_INTPL:
3772 c2op_prologue(c2op,reglist);
3773 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3774 break;
3775 case GTE_SQR:
3776 c2op_prologue(c2op,reglist);
3777 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
3778 if(need_flags||need_ir) {
3779 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3780 c2op_call_MACtoIR(lm,need_flags);
3781 }
3782 break;
3783 case GTE_DCPL:
3784 c2op_prologue(c2op,reglist);
3785 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3786 break;
3787 case GTE_GPF:
3788 c2op_prologue(c2op,reglist);
3789 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3790 break;
3791 case GTE_GPL:
3792 c2op_prologue(c2op,reglist);
3793 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3794 break;
19776aef 3795#endif
054175e9 3796 default:
054175e9 3797 c2op_prologue(c2op,reglist);
19776aef 3798#ifdef DRC_DBG
3799 emit_movimm(source[i],1); // opcode
3800 emit_writeword(1,(int)&psxRegs.code);
3801#endif
054175e9 3802 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3803 break;
3804 }
3805 c2op_epilogue(c2op,reglist);
3806 }
b9b61529 3807}
3808
e2b5e7aa 3809static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3810{
3811 // XXX: should just just do the exception instead
3812 if(!cop1_usable) {
3813 int jaddr=(int)out;
3814 emit_jmp(0);
3815 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
3816 cop1_usable=1;
3817 }
3818}
3819
e2b5e7aa 3820static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3821{
3d624f89 3822 cop1_unusable(i, i_regs);
57871462 3823}
3824
e2b5e7aa 3825static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3826{
3d624f89 3827 cop1_unusable(i, i_regs);
57871462 3828}
3829#define fconv_assemble fconv_assemble_arm
3830
e2b5e7aa 3831static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3832{
3d624f89 3833 cop1_unusable(i, i_regs);
57871462 3834}
3835
e2b5e7aa 3836static void float_assemble(int i,struct regstat *i_regs)
57871462 3837{
3d624f89 3838 cop1_unusable(i, i_regs);
57871462 3839}
3840
e2b5e7aa 3841static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3842{
3843 // case 0x18: MULT
3844 // case 0x19: MULTU
3845 // case 0x1A: DIV
3846 // case 0x1B: DIVU
3847 // case 0x1C: DMULT
3848 // case 0x1D: DMULTU
3849 // case 0x1E: DDIV
3850 // case 0x1F: DDIVU
3851 if(rs1[i]&&rs2[i])
3852 {
3853 if((opcode2[i]&4)==0) // 32-bit
3854 {
3855 if(opcode2[i]==0x18) // MULT
3856 {
3857 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3858 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3859 signed char hi=get_reg(i_regs->regmap,HIREG);
3860 signed char lo=get_reg(i_regs->regmap,LOREG);
3861 assert(m1>=0);
3862 assert(m2>=0);
3863 assert(hi>=0);
3864 assert(lo>=0);
3865 emit_smull(m1,m2,hi,lo);
3866 }
3867 if(opcode2[i]==0x19) // MULTU
3868 {
3869 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3870 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3871 signed char hi=get_reg(i_regs->regmap,HIREG);
3872 signed char lo=get_reg(i_regs->regmap,LOREG);
3873 assert(m1>=0);
3874 assert(m2>=0);
3875 assert(hi>=0);
3876 assert(lo>=0);
3877 emit_umull(m1,m2,hi,lo);
3878 }
3879 if(opcode2[i]==0x1A) // DIV
3880 {
3881 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3882 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3883 assert(d1>=0);
3884 assert(d2>=0);
3885 signed char quotient=get_reg(i_regs->regmap,LOREG);
3886 signed char remainder=get_reg(i_regs->regmap,HIREG);
3887 assert(quotient>=0);
3888 assert(remainder>=0);
3889 emit_movs(d1,remainder);
44a80f6a 3890 emit_movimm(0xffffffff,quotient);
3891 emit_negmi(quotient,quotient); // .. quotient and ..
3892 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3893 emit_movs(d2,HOST_TEMPREG);
3894 emit_jeq((int)out+52); // Division by zero
82336ba3 3895 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3896#ifdef HAVE_ARMV5
57871462 3897 emit_clz(HOST_TEMPREG,quotient);
3898 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3899#else
3900 emit_movimm(0,quotient);
3901 emit_addpl_imm(quotient,1,quotient);
3902 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3903 emit_jns((int)out-2*4);
3904#endif
57871462 3905 emit_orimm(quotient,1<<31,quotient);
3906 emit_shr(quotient,quotient,quotient);
3907 emit_cmp(remainder,HOST_TEMPREG);
3908 emit_subcs(remainder,HOST_TEMPREG,remainder);
3909 emit_adcs(quotient,quotient,quotient);
3910 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3911 emit_jcc((int)out-16); // -4
3912 emit_teq(d1,d2);
3913 emit_negmi(quotient,quotient);
3914 emit_test(d1,d1);
3915 emit_negmi(remainder,remainder);
3916 }
3917 if(opcode2[i]==0x1B) // DIVU
3918 {
3919 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3920 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3921 assert(d1>=0);
3922 assert(d2>=0);
3923 signed char quotient=get_reg(i_regs->regmap,LOREG);
3924 signed char remainder=get_reg(i_regs->regmap,HIREG);
3925 assert(quotient>=0);
3926 assert(remainder>=0);
44a80f6a 3927 emit_mov(d1,remainder);
3928 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3929 emit_test(d2,d2);
44a80f6a 3930 emit_jeq((int)out+40); // Division by zero
665f33e1 3931#ifdef HAVE_ARMV5
57871462 3932 emit_clz(d2,HOST_TEMPREG);
3933 emit_movimm(1<<31,quotient);
3934 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3935#else
3936 emit_movimm(0,HOST_TEMPREG);
82336ba3 3937 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3938 emit_lslpls_imm(d2,1,d2);
665f33e1 3939 emit_jns((int)out-2*4);
3940 emit_movimm(1<<31,quotient);
3941#endif
57871462 3942 emit_shr(quotient,HOST_TEMPREG,quotient);
3943 emit_cmp(remainder,d2);
3944 emit_subcs(remainder,d2,remainder);
3945 emit_adcs(quotient,quotient,quotient);
3946 emit_shrcc_imm(d2,1,d2);
3947 emit_jcc((int)out-16); // -4
3948 }
3949 }
3950 else // 64-bit
71e490c5 3951 assert(0);
57871462 3952 }
3953 else
3954 {
3955 // Multiply by zero is zero.
3956 // MIPS does not have a divide by zero exception.
3957 // The result is undefined, we return zero.
3958 signed char hr=get_reg(i_regs->regmap,HIREG);
3959 signed char lr=get_reg(i_regs->regmap,LOREG);
3960 if(hr>=0) emit_zeroreg(hr);
3961 if(lr>=0) emit_zeroreg(lr);
3962 }
3963}
3964#define multdiv_assemble multdiv_assemble_arm
3965
e2b5e7aa 3966static void do_preload_rhash(int r) {
57871462 3967 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3968 // register. On ARM the hash can be done with a single instruction (below)
3969}
3970
e2b5e7aa 3971static void do_preload_rhtbl(int ht) {
57871462 3972 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3973}
3974
e2b5e7aa 3975static void do_rhash(int rs,int rh) {
57871462 3976 emit_andimm(rs,0xf8,rh);
3977}
3978
e2b5e7aa 3979static void do_miniht_load(int ht,int rh) {
57871462 3980 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3981 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3982}
3983
e2b5e7aa 3984static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3985 emit_cmp(rh,rs);
3986 emit_ldreq_indexed(ht,4,15);
3987 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3988 emit_mov(rs,7);
3989 emit_jmp(jump_vaddr_reg[7]);
3990 #else
3991 emit_jmp(jump_vaddr_reg[rs]);
3992 #endif
3993}
3994
e2b5e7aa 3995static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3996 #ifndef HAVE_ARMV7
57871462 3997 emit_movimm(return_address,rt); // PC into link register
3998 add_to_linker((int)out,return_address,1);
3999 emit_pcreladdr(temp);
4000 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4001 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4002 #else
4003 emit_movw(return_address&0x0000FFFF,rt);
4004 add_to_linker((int)out,return_address,1);
4005 emit_pcreladdr(temp);
4006 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
4007 emit_movt(return_address&0xFFFF0000,rt);
4008 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
4009 #endif
4010}
4011
e2b5e7aa 4012static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 4013{
4014 //if(dirty_pre==dirty) return;
581335b0 4015 int hr,reg;
57871462 4016 for(hr=0;hr<HOST_REGS;hr++) {
4017 if(hr!=EXCLUDE_REG) {
4018 reg=pre[hr];
4019 if(((~u)>>(reg&63))&1) {
f776eb14 4020 if(reg>0) {
57871462 4021 if(((dirty_pre&~dirty)>>hr)&1) {
4022 if(reg>0&&reg<34) {
4023 emit_storereg(reg,hr);
4024 if( ((is32_pre&~uu)>>reg)&1 ) {
4025 emit_sarimm(hr,31,HOST_TEMPREG);
4026 emit_storereg(reg|64,HOST_TEMPREG);
4027 }
4028 }
4029 else if(reg>=64) {
4030 emit_storereg(reg,hr);
4031 }
4032 }
4033 }
57871462 4034 }
4035 }
4036 }
4037}
4038
4039
4040/* using strd could possibly help but you'd have to allocate registers in pairs
e2b5e7aa 4041static void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
57871462 4042{
4043 int hr;
4044 int wrote=-1;
4045 for(hr=HOST_REGS-1;hr>=0;hr--) {
4046 if(hr!=EXCLUDE_REG) {
4047 if(pre[hr]!=entry[hr]) {
4048 if(pre[hr]>=0) {
4049 if((dirty>>hr)&1) {
4050 if(get_reg(entry,pre[hr])<0) {
4051 if(pre[hr]<64) {
4052 if(!((u>>pre[hr])&1)) {
4053 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
4054 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4055 emit_sarimm(hr,31,hr+1);
4056 emit_strdreg(pre[hr],hr);
4057 }
4058 else
4059 emit_storereg(pre[hr],hr);
4060 }else{
4061 emit_storereg(pre[hr],hr);
4062 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
4063 emit_sarimm(hr,31,hr);
4064 emit_storereg(pre[hr]|64,hr);
4065 }
4066 }
4067 }
4068 }else{
4069 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
4070 emit_storereg(pre[hr],hr);
4071 }
4072 }
4073 wrote=hr;
4074 }
4075 }
4076 }
4077 }
4078 }
4079 }
4080 for(hr=0;hr<HOST_REGS;hr++) {
4081 if(hr!=EXCLUDE_REG) {
4082 if(pre[hr]!=entry[hr]) {
4083 if(pre[hr]>=0) {
4084 int nr;
4085 if((nr=get_reg(entry,pre[hr]))>=0) {
4086 emit_mov(hr,nr);
4087 }
4088 }
4089 }
4090 }
4091 }
4092}
4093#define wb_invalidate wb_invalidate_arm
4094*/
4095
d148d265 4096static void mark_clear_cache(void *target)
4097{
4098 u_long offset = (char *)target - (char *)BASE_ADDR;
4099 u_int mask = 1u << ((offset >> 12) & 31);
4100 if (!(needs_clear_cache[offset >> 17] & mask)) {
4101 char *start = (char *)((u_long)target & ~4095ul);
4102 start_tcache_write(start, start + 4096);
4103 needs_clear_cache[offset >> 17] |= mask;
4104 }
4105}
4106
dd3a91a1 4107// Clearing the cache is rather slow on ARM Linux, so mark the areas
4108// that need to be cleared, and then only clear these areas once.
e2b5e7aa 4109static void do_clear_cache()
dd3a91a1 4110{
4111 int i,j;
4112 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
4113 {
4114 u_int bitmap=needs_clear_cache[i];
4115 if(bitmap) {
4116 u_int start,end;
9f51b4b9 4117 for(j=0;j<32;j++)
dd3a91a1 4118 {
4119 if(bitmap&(1<<j)) {
bdeade46 4120 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 4121 end=start+4095;
4122 j++;
4123 while(j<32) {
4124 if(bitmap&(1<<j)) {
4125 end+=4096;
4126 j++;
4127 }else{
d148d265 4128 end_tcache_write((void *)start,(void *)end);
dd3a91a1 4129 break;
4130 }
4131 }
4132 }
4133 }
4134 needs_clear_cache[i]=0;
4135 }
4136 }
4137}
4138
57871462 4139// CPU-architecture-specific initialization
71e490c5 4140static void arch_init() {
57871462 4141}
b9b61529 4142
4143// vim:shiftwidth=2:expandtab