drc: remove some leftover n64-only stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
6c0eefaf 22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
054175e9 26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
665f33e1 29#include "arm_features.h"
054175e9 30
1e212a25 31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
643aeae3 33u_char *translation_cache;
1e212a25 34#else
643aeae3 35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
bdeade46 36#endif
37
4d646738 38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
e2b5e7aa 44#define unused __attribute__((unused))
45
dd114d7d 46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
57871462 52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
57871462 58extern void *dynarec_local;
57871462 59extern u_int mini_ht[32][2];
57871462 60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
b14b6a8f 77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
57871462 89 0,
b14b6a8f 90 jump_vaddr_r12,
57871462 91 0,
92 0,
b14b6a8f 93 0
94};
57871462 95
0bbd1454 96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
d148d265 127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
dd3a91a1 128
57871462 129/* Linker */
130
df4dc2b1 131static void set_jump_target(void *addr, void *target_)
57871462 132{
df4dc2b1 133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
57871462 135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
df4dc2b1 138 assert(((uintptr_t)addr&3)==0);
57871462 139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
df4dc2b1 141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
57871462 142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
df4dc2b1 146 assert(((uintptr_t)addr&3)==0);
57871462 147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
df4dc2b1 151 assert(((uintptr_t)addr&3)==0);
57871462 152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
e2b5e7aa 166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
57871462 168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
e2b5e7aa 196#endif
57871462 197
198/* Literal pool */
e2b5e7aa 199static void add_literal(int addr,int val)
57871462 200{
15776b68 201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
9f51b4b9 204 literalcount++;
205}
57871462 206
d148d265 207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
57871462 210{
211 int *ptr=(int *)(stub+4);
d148d265 212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
57871462 213 u_int offset=*ptr&0xfff;
d148d265 214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
57871462 216}
217
f968d35d 218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
643aeae3 222static void *get_pointer(void *stub)
57871462 223{
224 //printf("get_pointer(%x)\n",(int)stub);
d148d265 225 int *i_ptr=find_extjump_insn(stub);
57871462 226 assert((*i_ptr&0x0f000000)==0x0a000000);
643aeae3 227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
57871462 228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
df4dc2b1 232static void *get_clean_addr(void *addr)
57871462 233{
df4dc2b1 234 signed int *ptr = addr;
665f33e1 235 #ifndef HAVE_ARMV7
57871462 236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
df4dc2b1 244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
57871462 245 }
df4dc2b1 246 return ptr;
57871462 247}
248
e2b5e7aa 249static int verify_dirty(u_int *ptr)
57871462 250{
665f33e1 251 #ifndef HAVE_ARMV7
16c8be17 252 u_int offset;
57871462 253 // get from literal pool
15776b68 254 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
57871462 267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
df4dc2b1 283static int isclean(void *addr)
57871462 284{
665f33e1 285 #ifndef HAVE_ARMV7
581335b0 286 u_int *ptr=((u_int *)addr)+4;
57871462 287 #else
581335b0 288 u_int *ptr=((u_int *)addr)+6;
57871462 289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
4a35de07 298// get source that block at addr was compiled from (host pointers)
643aeae3 299static void get_bounds(void *addr,u_int *start,u_int *end)
57871462 300{
643aeae3 301 u_int *ptr = addr;
665f33e1 302 #ifndef HAVE_ARMV7
16c8be17 303 u_int offset;
57871462 304 // get from literal pool
15776b68 305 assert((*ptr&0xFFFF0000)==0xe59f0000);
16c8be17 306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
57871462 318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
57871462 328 *start=source;
329 *end=source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
e2b5e7aa 336static void alloc_reg(struct regstat *cur,int i,signed char reg)
57871462 337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
9f51b4b9 342
57871462 343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
9f51b4b9 345
57871462 346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
9f51b4b9 351
57871462 352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 354
57871462 355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 if(r<64&&((cur->u>>r)&1)) {
364 cur->regmap[preferred_reg]=reg;
365 cur->dirty&=~(1<<preferred_reg);
366 cur->isconst&=~(1<<preferred_reg);
367 return;
368 }
369 if(r>=64&&((cur->uu>>(r&63))&1)) {
370 cur->regmap[preferred_reg]=reg;
371 cur->dirty&=~(1<<preferred_reg);
372 cur->isconst&=~(1<<preferred_reg);
373 return;
374 }
9f51b4b9 375
57871462 376 // Clear any unneeded registers
377 // We try to keep the mapping consistent, if possible, because it
378 // makes branches easier (especially loops). So we try to allocate
379 // first (see above) before removing old mappings. If this is not
380 // possible then go ahead and clear out the registers that are no
381 // longer needed.
382 for(hr=0;hr<HOST_REGS;hr++)
383 {
384 r=cur->regmap[hr];
385 if(r>=0) {
386 if(r<64) {
387 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
388 }
389 else
390 {
391 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
392 }
393 }
394 }
395 // Try to allocate any available register, but prefer
396 // registers that have not been used recently.
397 if(i>0) {
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 // Try to allocate any available register
410 for(hr=0;hr<HOST_REGS;hr++) {
411 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
412 cur->regmap[hr]=reg;
413 cur->dirty&=~(1<<hr);
414 cur->isconst&=~(1<<hr);
415 return;
416 }
417 }
9f51b4b9 418
57871462 419 // Ok, now we have to evict someone
420 // Pick a register we hopefully won't need soon
421 u_char hsn[MAXREG+1];
422 memset(hsn,10,sizeof(hsn));
423 int j;
424 lsn(hsn,i,&preferred_reg);
425 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
426 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
427 if(i>0) {
428 // Don't evict the cycle count at entry points, otherwise the entry
429 // stub will have to write it.
430 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
431 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
432 for(j=10;j>=3;j--)
433 {
434 // Alloc preferred register if available
435 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
436 for(hr=0;hr<HOST_REGS;hr++) {
437 // Evict both parts of a 64-bit register
438 if((cur->regmap[hr]&63)==r) {
439 cur->regmap[hr]=-1;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 }
443 }
444 cur->regmap[preferred_reg]=reg;
445 return;
446 }
447 for(r=1;r<=MAXREG;r++)
448 {
449 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 for(hr=0;hr<HOST_REGS;hr++) {
461 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
462 if(cur->regmap[hr]==r) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 }
470 }
471 }
472 }
473 }
474 for(j=10;j>=0;j--)
475 {
476 for(r=1;r<=MAXREG;r++)
477 {
478 if(hsn[r]==j) {
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r+64) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 for(hr=0;hr<HOST_REGS;hr++) {
488 if(cur->regmap[hr]==r) {
489 cur->regmap[hr]=reg;
490 cur->dirty&=~(1<<hr);
491 cur->isconst&=~(1<<hr);
492 return;
493 }
494 }
495 }
496 }
497 }
c43b5311 498 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
57871462 499}
500
e2b5e7aa 501static void alloc_reg64(struct regstat *cur,int i,signed char reg)
57871462 502{
503 int preferred_reg = 8+(reg&1);
504 int r,hr;
9f51b4b9 505
57871462 506 // allocate the lower 32 bits
507 alloc_reg(cur,i,reg);
9f51b4b9 508
57871462 509 // Don't allocate unused registers
510 if((cur->uu>>reg)&1) return;
9f51b4b9 511
57871462 512 // see if the upper half is already allocated
513 for(hr=0;hr<HOST_REGS;hr++)
514 {
515 if(cur->regmap[hr]==reg+64) return;
516 }
9f51b4b9 517
57871462 518 // Keep the same mapping if the register was already allocated in a loop
519 preferred_reg = loop_reg(i,reg,preferred_reg);
9f51b4b9 520
57871462 521 // Try to allocate the preferred register
522 if(cur->regmap[preferred_reg]==-1) {
523 cur->regmap[preferred_reg]=reg|64;
524 cur->dirty&=~(1<<preferred_reg);
525 cur->isconst&=~(1<<preferred_reg);
526 return;
527 }
528 r=cur->regmap[preferred_reg];
529 if(r<64&&((cur->u>>r)&1)) {
530 cur->regmap[preferred_reg]=reg|64;
531 cur->dirty&=~(1<<preferred_reg);
532 cur->isconst&=~(1<<preferred_reg);
533 return;
534 }
535 if(r>=64&&((cur->uu>>(r&63))&1)) {
536 cur->regmap[preferred_reg]=reg|64;
537 cur->dirty&=~(1<<preferred_reg);
538 cur->isconst&=~(1<<preferred_reg);
539 return;
540 }
9f51b4b9 541
57871462 542 // Clear any unneeded registers
543 // We try to keep the mapping consistent, if possible, because it
544 // makes branches easier (especially loops). So we try to allocate
545 // first (see above) before removing old mappings. If this is not
546 // possible then go ahead and clear out the registers that are no
547 // longer needed.
548 for(hr=HOST_REGS-1;hr>=0;hr--)
549 {
550 r=cur->regmap[hr];
551 if(r>=0) {
552 if(r<64) {
553 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
554 }
555 else
556 {
557 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
558 }
559 }
560 }
561 // Try to allocate any available register, but prefer
562 // registers that have not been used recently.
563 if(i>0) {
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 // Try to allocate any available register
576 for(hr=0;hr<HOST_REGS;hr++) {
577 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
578 cur->regmap[hr]=reg|64;
579 cur->dirty&=~(1<<hr);
580 cur->isconst&=~(1<<hr);
581 return;
582 }
583 }
9f51b4b9 584
57871462 585 // Ok, now we have to evict someone
586 // Pick a register we hopefully won't need soon
587 u_char hsn[MAXREG+1];
588 memset(hsn,10,sizeof(hsn));
589 int j;
590 lsn(hsn,i,&preferred_reg);
591 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
592 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
593 if(i>0) {
594 // Don't evict the cycle count at entry points, otherwise the entry
595 // stub will have to write it.
596 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
597 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
598 for(j=10;j>=3;j--)
599 {
600 // Alloc preferred register if available
601 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
602 for(hr=0;hr<HOST_REGS;hr++) {
603 // Evict both parts of a 64-bit register
604 if((cur->regmap[hr]&63)==r) {
605 cur->regmap[hr]=-1;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 }
609 }
610 cur->regmap[preferred_reg]=reg|64;
611 return;
612 }
613 for(r=1;r<=MAXREG;r++)
614 {
615 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 }
626 for(hr=0;hr<HOST_REGS;hr++) {
627 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
628 if(cur->regmap[hr]==r) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 }
636 }
637 }
638 }
639 }
640 for(j=10;j>=0;j--)
641 {
642 for(r=1;r<=MAXREG;r++)
643 {
644 if(hsn[r]==j) {
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r+64) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 for(hr=0;hr<HOST_REGS;hr++) {
654 if(cur->regmap[hr]==r) {
655 cur->regmap[hr]=reg|64;
656 cur->dirty&=~(1<<hr);
657 cur->isconst&=~(1<<hr);
658 return;
659 }
660 }
661 }
662 }
663 }
c43b5311 664 SysPrintf("This shouldn't happen");exit(1);
57871462 665}
666
667// Allocate a temporary register. This is done without regard to
668// dirty status or whether the register we request is on the unneeded list
669// Note: This will only allocate one register, even if called multiple times
e2b5e7aa 670static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
57871462 671{
672 int r,hr;
673 int preferred_reg = -1;
9f51b4b9 674
57871462 675 // see if it's already allocated
676 for(hr=0;hr<HOST_REGS;hr++)
677 {
678 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
679 }
9f51b4b9 680
57871462 681 // Try to allocate any available register
682 for(hr=HOST_REGS-1;hr>=0;hr--) {
683 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
684 cur->regmap[hr]=reg;
685 cur->dirty&=~(1<<hr);
686 cur->isconst&=~(1<<hr);
687 return;
688 }
689 }
9f51b4b9 690
57871462 691 // Find an unneeded register
692 for(hr=HOST_REGS-1;hr>=0;hr--)
693 {
694 r=cur->regmap[hr];
695 if(r>=0) {
696 if(r<64) {
697 if((cur->u>>r)&1) {
698 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
699 cur->regmap[hr]=reg;
700 cur->dirty&=~(1<<hr);
701 cur->isconst&=~(1<<hr);
702 return;
703 }
704 }
705 }
706 else
707 {
708 if((cur->uu>>(r&63))&1) {
709 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
710 cur->regmap[hr]=reg;
711 cur->dirty&=~(1<<hr);
712 cur->isconst&=~(1<<hr);
713 return;
714 }
715 }
716 }
717 }
718 }
9f51b4b9 719
57871462 720 // Ok, now we have to evict someone
721 // Pick a register we hopefully won't need soon
722 // TODO: we might want to follow unconditional jumps here
723 // TODO: get rid of dupe code and make this into a function
724 u_char hsn[MAXREG+1];
725 memset(hsn,10,sizeof(hsn));
726 int j;
727 lsn(hsn,i,&preferred_reg);
728 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
729 if(i>0) {
730 // Don't evict the cycle count at entry points, otherwise the entry
731 // stub will have to write it.
732 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
733 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
734 for(j=10;j>=3;j--)
735 {
736 for(r=1;r<=MAXREG;r++)
737 {
738 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
739 for(hr=0;hr<HOST_REGS;hr++) {
740 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 }
749 for(hr=0;hr<HOST_REGS;hr++) {
750 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
751 if(cur->regmap[hr]==r) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 }
759 }
760 }
761 }
762 }
763 for(j=10;j>=0;j--)
764 {
765 for(r=1;r<=MAXREG;r++)
766 {
767 if(hsn[r]==j) {
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r+64) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 for(hr=0;hr<HOST_REGS;hr++) {
777 if(cur->regmap[hr]==r) {
778 cur->regmap[hr]=reg;
779 cur->dirty&=~(1<<hr);
780 cur->isconst&=~(1<<hr);
781 return;
782 }
783 }
784 }
785 }
786 }
c43b5311 787 SysPrintf("This shouldn't happen");exit(1);
57871462 788}
e2b5e7aa 789
57871462 790// Allocate a specific ARM register.
e2b5e7aa 791static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
57871462 792{
793 int n;
f776eb14 794 int dirty=0;
9f51b4b9 795
57871462 796 // see if it's already allocated (and dealloc it)
797 for(n=0;n<HOST_REGS;n++)
798 {
f776eb14 799 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
800 dirty=(cur->dirty>>n)&1;
801 cur->regmap[n]=-1;
802 }
57871462 803 }
9f51b4b9 804
57871462 805 cur->regmap[hr]=reg;
806 cur->dirty&=~(1<<hr);
f776eb14 807 cur->dirty|=dirty<<hr;
57871462 808 cur->isconst&=~(1<<hr);
809}
810
811// Alloc cycle count into dedicated register
e2b5e7aa 812static void alloc_cc(struct regstat *cur,int i)
57871462 813{
814 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
815}
816
817/* Special alloc */
818
819
820/* Assembler */
821
e2b5e7aa 822static unused char regname[16][4] = {
57871462 823 "r0",
824 "r1",
825 "r2",
826 "r3",
827 "r4",
828 "r5",
829 "r6",
830 "r7",
831 "r8",
832 "r9",
833 "r10",
834 "fp",
835 "r12",
836 "sp",
837 "lr",
838 "pc"};
839
e2b5e7aa 840static void output_w32(u_int word)
57871462 841{
842 *((u_int *)out)=word;
843 out+=4;
844}
e2b5e7aa 845
846static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
57871462 847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
e2b5e7aa 853
854static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
57871462 855{
856 assert(rd<16);
857 assert(rn<16);
858 assert(imm<256);
859 assert((shift&1)==0);
860 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
861}
e2b5e7aa 862
863static u_int genimm(u_int imm,u_int *encoded)
57871462 864{
c2e3bd42 865 *encoded=0;
866 if(imm==0) return 1;
57871462 867 int i=32;
868 while(i>0)
869 {
870 if(imm<256) {
871 *encoded=((i&30)<<7)|imm;
872 return 1;
873 }
874 imm=(imm>>2)|(imm<<30);i-=2;
875 }
876 return 0;
877}
e2b5e7aa 878
879static void genimm_checked(u_int imm,u_int *encoded)
cfbd3c6e 880{
881 u_int ret=genimm(imm,encoded);
882 assert(ret);
581335b0 883 (void)ret;
cfbd3c6e 884}
e2b5e7aa 885
886static u_int genjmp(u_int addr)
57871462 887{
888 int offset=addr-(int)out-8;
e80343e2 889 if(offset<-33554432||offset>=33554432) {
890 if (addr>2) {
c43b5311 891 SysPrintf("genjmp: out of range: %08x\n", offset);
e80343e2 892 exit(1);
893 }
894 return 0;
895 }
57871462 896 return ((u_int)offset>>2)&0xffffff;
897}
898
e2b5e7aa 899static void emit_mov(int rs,int rt)
57871462 900{
901 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
902 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
903}
904
e2b5e7aa 905static void emit_movs(int rs,int rt)
57871462 906{
907 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
908 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
909}
910
e2b5e7aa 911static void emit_add(int rs1,int rs2,int rt)
57871462 912{
913 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
915}
916
e2b5e7aa 917static void emit_adds(int rs1,int rs2,int rt)
57871462 918{
919 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
921}
922
e2b5e7aa 923static void emit_adcs(int rs1,int rs2,int rt)
57871462 924{
925 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
926 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
927}
928
e2b5e7aa 929static void emit_sbc(int rs1,int rs2,int rt)
57871462 930{
931 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
932 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
933}
934
e2b5e7aa 935static void emit_sbcs(int rs1,int rs2,int rt)
57871462 936{
937 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
939}
940
e2b5e7aa 941static void emit_neg(int rs, int rt)
57871462 942{
943 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
944 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
945}
946
e2b5e7aa 947static void emit_negs(int rs, int rt)
57871462 948{
949 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
950 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
951}
952
e2b5e7aa 953static void emit_sub(int rs1,int rs2,int rt)
57871462 954{
955 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
956 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
957}
958
e2b5e7aa 959static void emit_subs(int rs1,int rs2,int rt)
57871462 960{
961 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
962 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
963}
964
e2b5e7aa 965static void emit_zeroreg(int rt)
57871462 966{
967 assem_debug("mov %s,#0\n",regname[rt]);
968 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
969}
970
e2b5e7aa 971static void emit_loadlp(u_int imm,u_int rt)
790ee18e 972{
973 add_literal((int)out,imm);
974 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
975 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
976}
e2b5e7aa 977
978static void emit_movw(u_int imm,u_int rt)
790ee18e 979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
e2b5e7aa 984
985static void emit_movt(u_int imm,u_int rt)
790ee18e 986{
987 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
988 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
989}
e2b5e7aa 990
991static void emit_movimm(u_int imm,u_int rt)
790ee18e 992{
993 u_int armval;
994 if(genimm(imm,&armval)) {
995 assem_debug("mov %s,#%d\n",regname[rt],imm);
996 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
997 }else if(genimm(~imm,&armval)) {
998 assem_debug("mvn %s,#%d\n",regname[rt],imm);
999 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1000 }else if(imm<65536) {
665f33e1 1001 #ifndef HAVE_ARMV7
790ee18e 1002 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1003 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1004 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1005 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1006 #else
1007 emit_movw(imm,rt);
1008 #endif
1009 }else{
665f33e1 1010 #ifndef HAVE_ARMV7
790ee18e 1011 emit_loadlp(imm,rt);
1012 #else
1013 emit_movw(imm&0x0000FFFF,rt);
1014 emit_movt(imm&0xFFFF0000,rt);
1015 #endif
1016 }
1017}
e2b5e7aa 1018
1019static void emit_pcreladdr(u_int rt)
790ee18e 1020{
1021 assem_debug("add %s,pc,#?\n",regname[rt]);
1022 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1023}
1024
e2b5e7aa 1025static void emit_loadreg(int r, int hr)
57871462 1026{
3d624f89 1027 if(r&64) {
c43b5311 1028 SysPrintf("64bit load in 32bit mode!\n");
7f2607ea 1029 assert(0);
1030 return;
3d624f89 1031 }
57871462 1032 if((r&63)==0)
1033 emit_zeroreg(hr);
1034 else {
3d624f89 1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==CSREG) addr=(int)&Status;
1040 if(r==FSREG) addr=(int)&FCR31;
1041 if(r==INVCP) addr=(int)&invc_ptr;
1042 u_int offset = addr-(u_int)&dynarec_local;
1043 assert(offset<4096);
1044 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1045 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1046 }
1047}
e2b5e7aa 1048
1049static void emit_storereg(int r, int hr)
57871462 1050{
3d624f89 1051 if(r&64) {
c43b5311 1052 SysPrintf("64bit store in 32bit mode!\n");
7f2607ea 1053 assert(0);
1054 return;
3d624f89 1055 }
3d624f89 1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
e2b5e7aa 1067static void emit_test(int rs, int rt)
57871462 1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
e2b5e7aa 1073static void emit_testimm(int rs,int imm)
57871462 1074{
1075 u_int armval;
5a05d80c 1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1077 genimm_checked(imm,&armval);
57871462 1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
e2b5e7aa 1081static void emit_testeqimm(int rs,int imm)
b9b61529 1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1085 genimm_checked(imm,&armval);
b9b61529 1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
e2b5e7aa 1089static void emit_not(int rs,int rt)
57871462 1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
e2b5e7aa 1095static void emit_mvnmi(int rs,int rt)
b9b61529 1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
e2b5e7aa 1101static void emit_and(u_int rs1,u_int rs2,u_int rt)
57871462 1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
e2b5e7aa 1107static void emit_or(u_int rs1,u_int rs2,u_int rt)
57871462 1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
e2b5e7aa 1112
1113static void emit_or_and_set_flags(int rs1,int rs2,int rt)
57871462 1114{
1115 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1116 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1117}
1118
e2b5e7aa 1119static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
f70d384d 1120{
1121 assert(rs<16);
1122 assert(rt<16);
1123 assert(imm<32);
1124 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1125 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1126}
1127
e2b5e7aa 1128static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
576bbd8f 1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 assert(imm<32);
1133 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1134 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1135}
1136
e2b5e7aa 1137static void emit_xor(u_int rs1,u_int rs2,u_int rt)
57871462 1138{
1139 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1140 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1141}
1142
e2b5e7aa 1143static void emit_addimm(u_int rs,int imm,u_int rt)
57871462 1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 if(imm!=0) {
57871462 1148 u_int armval;
1149 if(genimm(imm,&armval)) {
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1151 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1152 }else if(genimm(-imm,&armval)) {
8a0a8423 1153 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1154 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
397614d0 1155 #ifdef HAVE_ARMV7
1156 }else if(rt!=rs&&(u_int)imm<65536) {
1157 emit_movw(imm&0x0000ffff,rt);
1158 emit_add(rs,rt,rt);
1159 }else if(rt!=rs&&(u_int)-imm<65536) {
1160 emit_movw(-imm&0x0000ffff,rt);
1161 emit_sub(rs,rt,rt);
1162 #endif
1163 }else if((u_int)-imm<65536) {
57871462 1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
397614d0 1168 }else {
1169 do {
1170 int shift = (ffs(imm) - 1) & ~1;
1171 int imm8 = imm & (0xff << shift);
1172 genimm_checked(imm8,&armval);
1173 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1174 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1175 rs = rt;
1176 imm &= ~imm8;
1177 }
1178 while (imm != 0);
57871462 1179 }
1180 }
1181 else if(rs!=rt) emit_mov(rs,rt);
1182}
1183
e2b5e7aa 1184static void emit_addimm_and_set_flags(int imm,int rt)
57871462 1185{
1186 assert(imm>-65536&&imm<65536);
1187 u_int armval;
1188 if(genimm(imm,&armval)) {
1189 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1190 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1191 }else if(genimm(-imm,&armval)) {
1192 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1193 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1194 }else if(imm<0) {
1195 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1196 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1197 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1198 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1199 }else{
1200 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1201 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1202 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1203 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1204 }
1205}
e2b5e7aa 1206
1207static void emit_addimm_no_flags(u_int imm,u_int rt)
57871462 1208{
1209 emit_addimm(rt,imm,rt);
1210}
1211
e2b5e7aa 1212static void emit_addnop(u_int r)
57871462 1213{
1214 assert(r<16);
1215 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1216 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1217}
1218
e2b5e7aa 1219static void emit_adcimm(u_int rs,int imm,u_int rt)
57871462 1220{
1221 u_int armval;
cfbd3c6e 1222 genimm_checked(imm,&armval);
57871462 1223 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1edfcc68 1226
e2b5e7aa 1227static void emit_rscimm(int rs,int imm,u_int rt)
57871462 1228{
1229 assert(0);
1230 u_int armval;
cfbd3c6e 1231 genimm_checked(imm,&armval);
57871462 1232 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1234}
1235
e2b5e7aa 1236static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
57871462 1237{
1238 // TODO: if(genimm(imm,&armval)) ...
1239 // else
1240 emit_movimm(imm,HOST_TEMPREG);
1241 emit_adds(HOST_TEMPREG,rsl,rtl);
1242 emit_adcimm(rsh,0,rth);
1243}
1244
e2b5e7aa 1245static void emit_andimm(int rs,int imm,int rt)
57871462 1246{
1247 u_int armval;
790ee18e 1248 if(imm==0) {
1249 emit_zeroreg(rt);
1250 }else if(genimm(imm,&armval)) {
57871462 1251 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1252 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1253 }else if(genimm(~imm,&armval)) {
1254 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(imm==65535) {
332a4533 1257 #ifndef HAVE_ARMV6
57871462 1258 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1260 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1261 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1262 #else
1263 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1265 #endif
1266 }else{
1267 assert(imm>0&&imm<65535);
665f33e1 1268 #ifndef HAVE_ARMV7
57871462 1269 assem_debug("mov r14,#%d\n",imm&0xFF00);
1270 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1271 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1272 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1273 #else
1274 emit_movw(imm,HOST_TEMPREG);
1275 #endif
1276 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1277 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1278 }
1279}
1280
e2b5e7aa 1281static void emit_orimm(int rs,int imm,int rt)
57871462 1282{
1283 u_int armval;
790ee18e 1284 if(imm==0) {
1285 if(rs!=rt) emit_mov(rs,rt);
1286 }else if(genimm(imm,&armval)) {
57871462 1287 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1289 }else{
1290 assert(imm>0&&imm<65536);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1294 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1295 }
1296}
1297
e2b5e7aa 1298static void emit_xorimm(int rs,int imm,int rt)
57871462 1299{
57871462 1300 u_int armval;
790ee18e 1301 if(imm==0) {
1302 if(rs!=rt) emit_mov(rs,rt);
1303 }else if(genimm(imm,&armval)) {
57871462 1304 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1305 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1306 }else{
514ed0d9 1307 assert(imm>0&&imm<65536);
57871462 1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1311 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1312 }
1313}
1314
e2b5e7aa 1315static void emit_shlimm(int rs,u_int imm,int rt)
57871462 1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 //if(imm==1) ...
1320 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
e2b5e7aa 1324static void emit_lsls_imm(int rs,int imm,int rt)
c6c3b1b3 1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
e2b5e7aa 1332static unused void emit_lslpls_imm(int rs,int imm,int rt)
665f33e1 1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1338}
1339
e2b5e7aa 1340static void emit_shrimm(int rs,u_int imm,int rt)
57871462 1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1346}
1347
e2b5e7aa 1348static void emit_sarimm(int rs,u_int imm,int rt)
57871462 1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1354}
1355
e2b5e7aa 1356static void emit_rorimm(int rs,u_int imm,int rt)
57871462 1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1362}
1363
e2b5e7aa 1364static void emit_signextend16(int rs,int rt)
b9b61529 1365{
332a4533 1366 #ifndef HAVE_ARMV6
b9b61529 1367 emit_shlimm(rs,16,rt);
1368 emit_sarimm(rt,16,rt);
1369 #else
1370 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1371 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1372 #endif
1373}
1374
e2b5e7aa 1375static void emit_signextend8(int rs,int rt)
c6c3b1b3 1376{
332a4533 1377 #ifndef HAVE_ARMV6
c6c3b1b3 1378 emit_shlimm(rs,24,rt);
1379 emit_sarimm(rt,24,rt);
1380 #else
1381 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1382 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1383 #endif
1384}
1385
e2b5e7aa 1386static void emit_shl(u_int rs,u_int shift,u_int rt)
57871462 1387{
1388 assert(rs<16);
1389 assert(rt<16);
1390 assert(shift<16);
1391 //if(imm==1) ...
1392 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1393 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1394}
e2b5e7aa 1395
1396static void emit_shr(u_int rs,u_int shift,u_int rt)
57871462 1397{
1398 assert(rs<16);
1399 assert(rt<16);
1400 assert(shift<16);
1401 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1402 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1403}
e2b5e7aa 1404
1405static void emit_sar(u_int rs,u_int shift,u_int rt)
57871462 1406{
1407 assert(rs<16);
1408 assert(rt<16);
1409 assert(shift<16);
1410 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1411 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1412}
57871462 1413
e2b5e7aa 1414static void emit_orrshl(u_int rs,u_int shift,u_int rt)
57871462 1415{
1416 assert(rs<16);
1417 assert(rt<16);
1418 assert(shift<16);
1419 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1420 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1421}
e2b5e7aa 1422
1423static void emit_orrshr(u_int rs,u_int shift,u_int rt)
57871462 1424{
1425 assert(rs<16);
1426 assert(rt<16);
1427 assert(shift<16);
1428 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1429 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1430}
1431
e2b5e7aa 1432static void emit_cmpimm(int rs,int imm)
57871462 1433{
1434 u_int armval;
1435 if(genimm(imm,&armval)) {
5a05d80c 1436 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1437 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1438 }else if(genimm(-imm,&armval)) {
5a05d80c 1439 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1440 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1441 }else if(imm>0) {
1442 assert(imm<65536);
57871462 1443 emit_movimm(imm,HOST_TEMPREG);
57871462 1444 assem_debug("cmp %s,r14\n",regname[rs]);
1445 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1446 }else{
1447 assert(imm>-65536);
57871462 1448 emit_movimm(-imm,HOST_TEMPREG);
57871462 1449 assem_debug("cmn %s,r14\n",regname[rs]);
1450 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1451 }
1452}
1453
e2b5e7aa 1454static void emit_cmovne_imm(int imm,int rt)
57871462 1455{
1456 assem_debug("movne %s,#%d\n",regname[rt],imm);
1457 u_int armval;
cfbd3c6e 1458 genimm_checked(imm,&armval);
57871462 1459 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1460}
e2b5e7aa 1461
1462static void emit_cmovl_imm(int imm,int rt)
57871462 1463{
1464 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1465 u_int armval;
cfbd3c6e 1466 genimm_checked(imm,&armval);
57871462 1467 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1468}
e2b5e7aa 1469
1470static void emit_cmovb_imm(int imm,int rt)
57871462 1471{
1472 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1473 u_int armval;
cfbd3c6e 1474 genimm_checked(imm,&armval);
57871462 1475 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1476}
e2b5e7aa 1477
1478static void emit_cmovs_imm(int imm,int rt)
57871462 1479{
1480 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1481 u_int armval;
cfbd3c6e 1482 genimm_checked(imm,&armval);
57871462 1483 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1484}
e2b5e7aa 1485
e2b5e7aa 1486static void emit_cmovne_reg(int rs,int rt)
57871462 1487{
1488 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1489 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1490}
e2b5e7aa 1491
1492static void emit_cmovl_reg(int rs,int rt)
57871462 1493{
1494 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1495 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1496}
e2b5e7aa 1497
1498static void emit_cmovs_reg(int rs,int rt)
57871462 1499{
1500 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1501 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1502}
1503
e2b5e7aa 1504static void emit_slti32(int rs,int imm,int rt)
57871462 1505{
1506 if(rs!=rt) emit_zeroreg(rt);
1507 emit_cmpimm(rs,imm);
1508 if(rs==rt) emit_movimm(0,rt);
1509 emit_cmovl_imm(1,rt);
1510}
e2b5e7aa 1511
1512static void emit_sltiu32(int rs,int imm,int rt)
57871462 1513{
1514 if(rs!=rt) emit_zeroreg(rt);
1515 emit_cmpimm(rs,imm);
1516 if(rs==rt) emit_movimm(0,rt);
1517 emit_cmovb_imm(1,rt);
1518}
e2b5e7aa 1519
1520static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
57871462 1521{
1522 assert(rsh!=rt);
1523 emit_slti32(rsl,imm,rt);
1524 if(imm>=0)
1525 {
1526 emit_test(rsh,rsh);
1527 emit_cmovne_imm(0,rt);
1528 emit_cmovs_imm(1,rt);
1529 }
1530 else
1531 {
1532 emit_cmpimm(rsh,-1);
1533 emit_cmovne_imm(0,rt);
1534 emit_cmovl_imm(1,rt);
1535 }
1536}
e2b5e7aa 1537
1538static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
57871462 1539{
1540 assert(rsh!=rt);
1541 emit_sltiu32(rsl,imm,rt);
1542 if(imm>=0)
1543 {
1544 emit_test(rsh,rsh);
1545 emit_cmovne_imm(0,rt);
1546 }
1547 else
1548 {
1549 emit_cmpimm(rsh,-1);
1550 emit_cmovne_imm(1,rt);
1551 }
1552}
1553
e2b5e7aa 1554static void emit_cmp(int rs,int rt)
57871462 1555{
1556 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1557 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1558}
e2b5e7aa 1559
1560static void emit_set_gz32(int rs, int rt)
57871462 1561{
1562 //assem_debug("set_gz32\n");
1563 emit_cmpimm(rs,1);
1564 emit_movimm(1,rt);
1565 emit_cmovl_imm(0,rt);
1566}
e2b5e7aa 1567
1568static void emit_set_nz32(int rs, int rt)
57871462 1569{
1570 //assem_debug("set_nz32\n");
1571 if(rs!=rt) emit_movs(rs,rt);
1572 else emit_test(rs,rs);
1573 emit_cmovne_imm(1,rt);
1574}
e2b5e7aa 1575
1576static void emit_set_gz64_32(int rsh, int rsl, int rt)
57871462 1577{
1578 //assem_debug("set_gz64\n");
1579 emit_set_gz32(rsl,rt);
1580 emit_test(rsh,rsh);
1581 emit_cmovne_imm(1,rt);
1582 emit_cmovs_imm(0,rt);
1583}
e2b5e7aa 1584
1585static void emit_set_nz64_32(int rsh, int rsl, int rt)
57871462 1586{
1587 //assem_debug("set_nz64\n");
1588 emit_or_and_set_flags(rsh,rsl,rt);
1589 emit_cmovne_imm(1,rt);
1590}
e2b5e7aa 1591
1592static void emit_set_if_less32(int rs1, int rs2, int rt)
57871462 1593{
1594 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1595 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1596 emit_cmp(rs1,rs2);
1597 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1598 emit_cmovl_imm(1,rt);
1599}
e2b5e7aa 1600
1601static void emit_set_if_carry32(int rs1, int rs2, int rt)
57871462 1602{
1603 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1604 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1605 emit_cmp(rs1,rs2);
1606 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1607 emit_cmovb_imm(1,rt);
1608}
e2b5e7aa 1609
1610static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1611{
1612 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1613 assert(u1!=rt);
1614 assert(u2!=rt);
1615 emit_cmp(l1,l2);
1616 emit_movimm(0,rt);
1617 emit_sbcs(u1,u2,HOST_TEMPREG);
1618 emit_cmovl_imm(1,rt);
1619}
e2b5e7aa 1620
1621static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
57871462 1622{
1623 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1624 assert(u1!=rt);
1625 assert(u2!=rt);
1626 emit_cmp(l1,l2);
1627 emit_movimm(0,rt);
1628 emit_sbcs(u1,u2,HOST_TEMPREG);
1629 emit_cmovb_imm(1,rt);
1630}
1631
dd114d7d 1632#ifdef DRC_DBG
1633extern void gen_interupt();
1634extern void do_insn_cmp();
1635#define FUNCNAME(f) { (intptr_t)f, " " #f }
1636static const struct {
1637 intptr_t addr;
1638 const char *name;
1639} function_names[] = {
1640 FUNCNAME(cc_interrupt),
1641 FUNCNAME(gen_interupt),
1642 FUNCNAME(get_addr_ht),
1643 FUNCNAME(get_addr),
1644 FUNCNAME(jump_handler_read8),
1645 FUNCNAME(jump_handler_read16),
1646 FUNCNAME(jump_handler_read32),
1647 FUNCNAME(jump_handler_write8),
1648 FUNCNAME(jump_handler_write16),
1649 FUNCNAME(jump_handler_write32),
1650 FUNCNAME(invalidate_addr),
1651 FUNCNAME(verify_code_vm),
1652 FUNCNAME(verify_code),
1653 FUNCNAME(jump_hlecall),
1654 FUNCNAME(jump_syscall_hle),
1655 FUNCNAME(new_dyna_leave),
1656 FUNCNAME(pcsx_mtc0),
1657 FUNCNAME(pcsx_mtc0_ds),
1658 FUNCNAME(do_insn_cmp),
1659};
1660
1661static const char *func_name(intptr_t a)
1662{
1663 int i;
1664 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1665 if (function_names[i].addr == a)
1666 return function_names[i].name;
1667 return "";
1668}
1669#else
1670#define func_name(x) ""
1671#endif
1672
643aeae3 1673static void emit_call(const void *a_)
57871462 1674{
643aeae3 1675 int a = (int)a_;
dd114d7d 1676 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1677 u_int offset=genjmp(a);
1678 output_w32(0xeb000000|offset);
1679}
e2b5e7aa 1680
b14b6a8f 1681static void emit_jmp(const void *a_)
57871462 1682{
b14b6a8f 1683 int a = (int)a_;
dd114d7d 1684 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
57871462 1685 u_int offset=genjmp(a);
1686 output_w32(0xea000000|offset);
1687}
e2b5e7aa 1688
643aeae3 1689static void emit_jne(const void *a_)
57871462 1690{
643aeae3 1691 int a = (int)a_;
57871462 1692 assem_debug("bne %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0x1a000000|offset);
1695}
e2b5e7aa 1696
1697static void emit_jeq(int a)
57871462 1698{
1699 assem_debug("beq %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0x0a000000|offset);
1702}
e2b5e7aa 1703
1704static void emit_js(int a)
57871462 1705{
1706 assem_debug("bmi %x\n",a);
1707 u_int offset=genjmp(a);
1708 output_w32(0x4a000000|offset);
1709}
e2b5e7aa 1710
1711static void emit_jns(int a)
57871462 1712{
1713 assem_debug("bpl %x\n",a);
1714 u_int offset=genjmp(a);
1715 output_w32(0x5a000000|offset);
1716}
e2b5e7aa 1717
1718static void emit_jl(int a)
57871462 1719{
1720 assem_debug("blt %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0xba000000|offset);
1723}
e2b5e7aa 1724
1725static void emit_jge(int a)
57871462 1726{
1727 assem_debug("bge %x\n",a);
1728 u_int offset=genjmp(a);
1729 output_w32(0xaa000000|offset);
1730}
e2b5e7aa 1731
1732static void emit_jno(int a)
57871462 1733{
1734 assem_debug("bvc %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0x7a000000|offset);
1737}
e2b5e7aa 1738
1739static void emit_jc(int a)
57871462 1740{
1741 assem_debug("bcs %x\n",a);
1742 u_int offset=genjmp(a);
1743 output_w32(0x2a000000|offset);
1744}
e2b5e7aa 1745
b14b6a8f 1746static void emit_jcc(void *a_)
57871462 1747{
b14b6a8f 1748 int a = (int)a_;
57871462 1749 assem_debug("bcc %x\n",a);
1750 u_int offset=genjmp(a);
1751 output_w32(0x3a000000|offset);
1752}
1753
e2b5e7aa 1754static void emit_callreg(u_int r)
57871462 1755{
c6c3b1b3 1756 assert(r<15);
1757 assem_debug("blx %s\n",regname[r]);
1758 output_w32(0xe12fff30|r);
57871462 1759}
e2b5e7aa 1760
1761static void emit_jmpreg(u_int r)
57871462 1762{
1763 assem_debug("mov pc,%s\n",regname[r]);
1764 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1765}
1766
e2b5e7aa 1767static void emit_readword_indexed(int offset, int rs, int rt)
57871462 1768{
1769 assert(offset>-4096&&offset<4096);
1770 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1771 if(offset>=0) {
1772 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1773 }else{
1774 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1775 }
1776}
e2b5e7aa 1777
1778static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
57871462 1779{
1780 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1781 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1782}
e2b5e7aa 1783
1784static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1785{
1786 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1787 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1788}
e2b5e7aa 1789
1790static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1791{
1792 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1793 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1794}
e2b5e7aa 1795
1796static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1797{
1798 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1799 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1800}
e2b5e7aa 1801
1802static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1803{
1804 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1805 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1806}
e2b5e7aa 1807
1808static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
c6c3b1b3 1809{
1810 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1811 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1812}
e2b5e7aa 1813
e2b5e7aa 1814static void emit_movsbl_indexed(int offset, int rs, int rt)
57871462 1815{
1816 assert(offset>-256&&offset<256);
1817 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1818 if(offset>=0) {
1819 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1820 }else{
1821 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1822 }
1823}
e2b5e7aa 1824
e2b5e7aa 1825static void emit_movswl_indexed(int offset, int rs, int rt)
57871462 1826{
1827 assert(offset>-256&&offset<256);
1828 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1829 if(offset>=0) {
1830 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1831 }else{
1832 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1833 }
1834}
e2b5e7aa 1835
1836static void emit_movzbl_indexed(int offset, int rs, int rt)
57871462 1837{
1838 assert(offset>-4096&&offset<4096);
1839 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1840 if(offset>=0) {
1841 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1842 }else{
1843 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1844 }
1845}
e2b5e7aa 1846
e2b5e7aa 1847static void emit_movzwl_indexed(int offset, int rs, int rt)
57871462 1848{
1849 assert(offset>-256&&offset<256);
1850 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1853 }else{
1854 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1855 }
1856}
e2b5e7aa 1857
054175e9 1858static void emit_ldrd(int offset, int rs, int rt)
1859{
1860 assert(offset>-256&&offset<256);
1861 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1862 if(offset>=0) {
1863 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1864 }else{
1865 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1866 }
1867}
e2b5e7aa 1868
643aeae3 1869static void emit_readword(void *addr, int rt)
57871462 1870{
643aeae3 1871 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1872 assert(offset<4096);
1873 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1874 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1875}
e2b5e7aa 1876
e2b5e7aa 1877static void emit_writeword_indexed(int rt, int offset, int rs)
57871462 1878{
1879 assert(offset>-4096&&offset<4096);
1880 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1881 if(offset>=0) {
1882 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1883 }else{
1884 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1885 }
1886}
e2b5e7aa 1887
e2b5e7aa 1888static void emit_writehword_indexed(int rt, int offset, int rs)
57871462 1889{
1890 assert(offset>-256&&offset<256);
1891 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1892 if(offset>=0) {
1893 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1894 }else{
1895 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1896 }
1897}
e2b5e7aa 1898
1899static void emit_writebyte_indexed(int rt, int offset, int rs)
57871462 1900{
1901 assert(offset>-4096&&offset<4096);
1902 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1903 if(offset>=0) {
1904 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1905 }else{
1906 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1907 }
1908}
e2b5e7aa 1909
e2b5e7aa 1910static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1911{
1912 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1914}
e2b5e7aa 1915
1916static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1917{
1918 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1919 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1920}
e2b5e7aa 1921
1922static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
b96d3df7 1923{
1924 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1926}
e2b5e7aa 1927
643aeae3 1928static void emit_writeword(int rt, void *addr)
57871462 1929{
643aeae3 1930 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
57871462 1931 assert(offset<4096);
1932 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1933 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1934}
e2b5e7aa 1935
e2b5e7aa 1936static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1937{
1938 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1939 assert(rs1<16);
1940 assert(rs2<16);
1941 assert(hi<16);
1942 assert(lo<16);
1943 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1944}
e2b5e7aa 1945
1946static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
57871462 1947{
1948 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1949 assert(rs1<16);
1950 assert(rs2<16);
1951 assert(hi<16);
1952 assert(lo<16);
1953 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1954}
1955
e2b5e7aa 1956static void emit_clz(int rs,int rt)
57871462 1957{
1958 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1959 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1960}
1961
e2b5e7aa 1962static void emit_subcs(int rs1,int rs2,int rt)
57871462 1963{
1964 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1965 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1966}
1967
e2b5e7aa 1968static void emit_shrcc_imm(int rs,u_int imm,int rt)
57871462 1969{
1970 assert(imm>0);
1971 assert(imm<32);
1972 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1973 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1974}
1975
e2b5e7aa 1976static void emit_shrne_imm(int rs,u_int imm,int rt)
b1be1eee 1977{
1978 assert(imm>0);
1979 assert(imm<32);
1980 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1981 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1982}
1983
e2b5e7aa 1984static void emit_negmi(int rs, int rt)
57871462 1985{
1986 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1987 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1988}
1989
e2b5e7aa 1990static void emit_negsmi(int rs, int rt)
57871462 1991{
1992 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1993 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1994}
1995
e2b5e7aa 1996static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 1997{
1998 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1999 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2000}
2001
e2b5e7aa 2002static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
57871462 2003{
2004 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2005 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2006}
2007
e2b5e7aa 2008static void emit_teq(int rs, int rt)
57871462 2009{
2010 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2011 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2012}
2013
e2b5e7aa 2014static void emit_rsbimm(int rs, int imm, int rt)
57871462 2015{
2016 u_int armval;
cfbd3c6e 2017 genimm_checked(imm,&armval);
57871462 2018 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2019 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2020}
2021
2022// Load 2 immediates optimizing for small code size
e2b5e7aa 2023static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
57871462 2024{
2025 emit_movimm(imm1,rt1);
2026 u_int armval;
2027 if(genimm(imm2-imm1,&armval)) {
2028 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2029 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2030 }else if(genimm(imm1-imm2,&armval)) {
2031 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2032 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2033 }
2034 else emit_movimm(imm2,rt2);
2035}
2036
2037// Conditionally select one of two immediates, optimizing for small code size
2038// This will only be called if HAVE_CMOV_IMM is defined
e2b5e7aa 2039static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
57871462 2040{
2041 u_int armval;
2042 if(genimm(imm2-imm1,&armval)) {
2043 emit_movimm(imm1,rt);
2044 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2045 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2046 }else if(genimm(imm1-imm2,&armval)) {
2047 emit_movimm(imm1,rt);
2048 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2049 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2050 }
2051 else {
665f33e1 2052 #ifndef HAVE_ARMV7
57871462 2053 emit_movimm(imm1,rt);
2054 add_literal((int)out,imm2);
2055 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2056 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2057 #else
2058 emit_movw(imm1&0x0000FFFF,rt);
2059 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2060 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2061 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2062 }
2063 emit_movt(imm1&0xFFFF0000,rt);
2064 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2065 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2066 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2067 }
2068 #endif
2069 }
2070}
2071
57871462 2072// special case for checking invalid_code
e2b5e7aa 2073static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
57871462 2074{
2075 assert(imm<128&&imm>=0);
2076 assert(r>=0&&r<16);
2077 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2078 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2079 emit_cmpimm(HOST_TEMPREG,imm);
2080}
2081
e2b5e7aa 2082static void emit_callne(int a)
0bbd1454 2083{
2084 assem_debug("blne %x\n",a);
2085 u_int offset=genjmp(a);
2086 output_w32(0x1b000000|offset);
2087}
2088
57871462 2089// Used to preload hash table entries
e2b5e7aa 2090static unused void emit_prefetchreg(int r)
57871462 2091{
2092 assem_debug("pld %s\n",regname[r]);
2093 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2094}
2095
2096// Special case for mini_ht
e2b5e7aa 2097static void emit_ldreq_indexed(int rs, u_int offset, int rt)
57871462 2098{
2099 assert(offset<4096);
2100 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2101 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2102}
2103
e2b5e7aa 2104static void emit_orrne_imm(int rs,int imm,int rt)
b9b61529 2105{
2106 u_int armval;
cfbd3c6e 2107 genimm_checked(imm,&armval);
b9b61529 2108 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2109 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2110}
2111
e2b5e7aa 2112static void emit_andne_imm(int rs,int imm,int rt)
b9b61529 2113{
2114 u_int armval;
cfbd3c6e 2115 genimm_checked(imm,&armval);
b9b61529 2116 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2117 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2118}
2119
e2b5e7aa 2120static unused void emit_addpl_imm(int rs,int imm,int rt)
665f33e1 2121{
2122 u_int armval;
2123 genimm_checked(imm,&armval);
2124 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2125 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2126}
2127
e2b5e7aa 2128static void emit_jno_unlikely(int a)
57871462 2129{
2130 //emit_jno(a);
2131 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2132 output_w32(0x72800000|rd_rn_rm(15,15,0));
2133}
2134
054175e9 2135static void save_regs_all(u_int reglist)
57871462 2136{
054175e9 2137 int i;
57871462 2138 if(!reglist) return;
2139 assem_debug("stmia fp,{");
054175e9 2140 for(i=0;i<16;i++)
2141 if(reglist&(1<<i))
2142 assem_debug("r%d,",i);
57871462 2143 assem_debug("}\n");
2144 output_w32(0xe88b0000|reglist);
2145}
e2b5e7aa 2146
054175e9 2147static void restore_regs_all(u_int reglist)
57871462 2148{
054175e9 2149 int i;
57871462 2150 if(!reglist) return;
2151 assem_debug("ldmia fp,{");
054175e9 2152 for(i=0;i<16;i++)
2153 if(reglist&(1<<i))
2154 assem_debug("r%d,",i);
57871462 2155 assem_debug("}\n");
2156 output_w32(0xe89b0000|reglist);
2157}
e2b5e7aa 2158
054175e9 2159// Save registers before function call
2160static void save_regs(u_int reglist)
2161{
4d646738 2162 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
054175e9 2163 save_regs_all(reglist);
2164}
e2b5e7aa 2165
054175e9 2166// Restore registers after function call
2167static void restore_regs(u_int reglist)
2168{
4d646738 2169 reglist&=CALLER_SAVE_REGS;
054175e9 2170 restore_regs_all(reglist);
2171}
57871462 2172
57871462 2173/* Stubs/epilogue */
2174
e2b5e7aa 2175static void literal_pool(int n)
57871462 2176{
2177 if(!literalcount) return;
2178 if(n) {
2179 if((int)out-literals[0][0]<4096-n) return;
2180 }
2181 u_int *ptr;
2182 int i;
2183 for(i=0;i<literalcount;i++)
2184 {
77750690 2185 u_int l_addr=(u_int)out;
2186 int j;
2187 for(j=0;j<i;j++) {
2188 if(literals[j][1]==literals[i][1]) {
2189 //printf("dup %08x\n",literals[i][1]);
2190 l_addr=literals[j][0];
2191 break;
2192 }
2193 }
57871462 2194 ptr=(u_int *)literals[i][0];
77750690 2195 u_int offset=l_addr-(u_int)ptr-8;
57871462 2196 assert(offset<4096);
2197 assert(!(offset&3));
2198 *ptr|=offset;
77750690 2199 if(l_addr==(u_int)out) {
2200 literals[i][0]=l_addr; // remember for dupes
2201 output_w32(literals[i][1]);
2202 }
57871462 2203 }
2204 literalcount=0;
2205}
2206
e2b5e7aa 2207static void literal_pool_jumpover(int n)
57871462 2208{
2209 if(!literalcount) return;
2210 if(n) {
2211 if((int)out-literals[0][0]<4096-n) return;
2212 }
df4dc2b1 2213 void *jaddr = out;
57871462 2214 emit_jmp(0);
2215 literal_pool(0);
df4dc2b1 2216 set_jump_target(jaddr, out);
57871462 2217}
2218
643aeae3 2219static void emit_extjump2(u_char *addr, int target, void *linker)
57871462 2220{
2221 u_char *ptr=(u_char *)addr;
2222 assert((ptr[3]&0x0e)==0xa);
e2b5e7aa 2223 (void)ptr;
2224
57871462 2225 emit_loadlp(target,0);
643aeae3 2226 emit_loadlp((u_int)addr,1);
2227 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
57871462 2228 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2229//DEBUG >
2230#ifdef DEBUG_CYCLE_COUNT
643aeae3 2231 emit_readword(&last_count,ECX);
57871462 2232 emit_add(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 2233 emit_readword(&next_interupt,ECX);
2234 emit_writeword(HOST_CCREG,&Count);
57871462 2235 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
643aeae3 2236 emit_writeword(ECX,&last_count);
57871462 2237#endif
2238//DEBUG <
2239 emit_jmp(linker);
2240}
2241
643aeae3 2242static void emit_extjump(void *addr, int target)
57871462 2243{
b14b6a8f 2244 emit_extjump2(addr, target, dyna_linker);
57871462 2245}
e2b5e7aa 2246
643aeae3 2247static void emit_extjump_ds(void *addr, int target)
57871462 2248{
b14b6a8f 2249 emit_extjump2(addr, target, dyna_linker_ds);
57871462 2250}
2251
13e35c04 2252// put rt_val into rt, potentially making use of rs with value rs_val
2253static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2254{
8575a877 2255 u_int armval;
2256 int diff;
2257 if(genimm(rt_val,&armval)) {
2258 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2259 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2260 return;
2261 }
2262 if(genimm(~rt_val,&armval)) {
2263 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2264 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2265 return;
2266 }
2267 diff=rt_val-rs_val;
2268 if(genimm(diff,&armval)) {
2269 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2270 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2271 return;
2272 }else if(genimm(-diff,&armval)) {
2273 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2274 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2275 return;
2276 }
2277 emit_movimm(rt_val,rt);
2278}
2279
2280// return 1 if above function can do it's job cheaply
2281static int is_similar_value(u_int v1,u_int v2)
2282{
13e35c04 2283 u_int xs;
8575a877 2284 int diff;
2285 if(v1==v2) return 1;
2286 diff=v2-v1;
2287 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2288 ;
8575a877 2289 if(xs<0x100) return 1;
2290 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2291 ;
2292 if(xs<0x100) return 1;
2293 return 0;
13e35c04 2294}
cbbab9cd 2295
b96d3df7 2296// trashes r2
2297static void pass_args(int a0, int a1)
2298{
2299 if(a0==1&&a1==0) {
2300 // must swap
2301 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2302 }
2303 else if(a0!=0&&a1==0) {
2304 emit_mov(a1,1);
2305 if (a0>=0) emit_mov(a0,0);
2306 }
2307 else {
2308 if(a0>=0&&a0!=0) emit_mov(a0,0);
2309 if(a1>=0&&a1!=1) emit_mov(a1,1);
2310 }
2311}
2312
b14b6a8f 2313static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
b1be1eee 2314{
2315 switch(type) {
2316 case LOADB_STUB: emit_signextend8(rs,rt); break;
2317 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2318 case LOADH_STUB: emit_signextend16(rs,rt); break;
2319 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2320 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2321 default: assert(0);
2322 }
2323}
2324
b1be1eee 2325#include "pcsxmem.h"
2326#include "pcsxmem_inline.c"
b1be1eee 2327
e2b5e7aa 2328static void do_readstub(int n)
57871462 2329{
b14b6a8f 2330 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
57871462 2331 literal_pool(256);
b14b6a8f 2332 set_jump_target(stubs[n].addr, out);
2333 enum stub_type type=stubs[n].type;
2334 int i=stubs[n].a;
2335 int rs=stubs[n].b;
2336 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2337 u_int reglist=stubs[n].e;
57871462 2338 signed char *i_regmap=i_regs->regmap;
581335b0 2339 int rt;
b9b61529 2340 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2341 rt=get_reg(i_regmap,FTEMP);
2342 }else{
57871462 2343 rt=get_reg(i_regmap,rt1[i]);
2344 }
2345 assert(rs>=0);
df4dc2b1 2346 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2347 void *restore_jump = NULL;
c6c3b1b3 2348 reglist|=(1<<rs);
2349 for(r=0;r<=12;r++) {
2350 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2351 temp=r; break;
2352 }
2353 }
db829eeb 2354 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2355 reglist&=~(1<<rt);
2356 if(temp==-1) {
2357 save_regs(reglist);
2358 regs_saved=1;
2359 temp=(rs==0)?2:0;
2360 }
2361 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2362 temp2=1;
643aeae3 2363 emit_readword(&mem_rtab,temp);
c6c3b1b3 2364 emit_shrimm(rs,12,temp2);
2365 emit_readword_dualindexedx4(temp,temp2,temp2);
2366 emit_lsls_imm(temp2,1,temp2);
2367 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2368 switch(type) {
2369 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2370 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2371 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2372 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2373 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
b14b6a8f 2374 default: assert(0);
c6c3b1b3 2375 }
2376 }
2377 if(regs_saved) {
df4dc2b1 2378 restore_jump=out;
c6c3b1b3 2379 emit_jcc(0); // jump to reg restore
2380 }
2381 else
b14b6a8f 2382 emit_jcc(stubs[n].retaddr); // return address
c6c3b1b3 2383
2384 if(!regs_saved)
2385 save_regs(reglist);
643aeae3 2386 void *handler=NULL;
c6c3b1b3 2387 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 2388 handler=jump_handler_read8;
c6c3b1b3 2389 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 2390 handler=jump_handler_read16;
c6c3b1b3 2391 if(type==LOADW_STUB)
643aeae3 2392 handler=jump_handler_read32;
2393 assert(handler);
b96d3df7 2394 pass_args(rs,temp2);
c6c3b1b3 2395 int cc=get_reg(i_regmap,CCREG);
2396 if(cc<0)
2397 emit_loadreg(CCREG,2);
b14b6a8f 2398 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
c6c3b1b3 2399 emit_call(handler);
2400 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2401 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2402 }
2403 if(restore_jump)
df4dc2b1 2404 set_jump_target(restore_jump, out);
c6c3b1b3 2405 restore_regs(reglist);
b14b6a8f 2406 emit_jmp(stubs[n].retaddr); // return address
57871462 2407}
2408
c6c3b1b3 2409// return memhandler, or get directly accessable address and return 0
643aeae3 2410static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
c6c3b1b3 2411{
2412 u_int l1,l2=0;
2413 l1=((u_int *)table)[addr>>12];
2414 if((l1&(1<<31))==0) {
2415 u_int v=l1<<1;
2416 *addr_host=v+addr;
643aeae3 2417 return NULL;
c6c3b1b3 2418 }
2419 else {
2420 l1<<=1;
2421 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2422 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2423 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2424 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2425 else
2426 l2=((u_int *)l1)[(addr&0xfff)/4];
2427 if((l2&(1<<31))==0) {
2428 u_int v=l2<<1;
2429 *addr_host=v+(addr&0xfff);
643aeae3 2430 return NULL;
c6c3b1b3 2431 }
643aeae3 2432 return (void *)(l2<<1);
c6c3b1b3 2433 }
2434}
c6c3b1b3 2435
b14b6a8f 2436static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2437{
2438 int rs=get_reg(regmap,target);
57871462 2439 int rt=get_reg(regmap,target);
535d208a 2440 if(rs<0) rs=get_reg(regmap,-1);
57871462 2441 assert(rs>=0);
643aeae3 2442 u_int host_addr=0,is_dynamic,far_call=0;
2443 void *handler;
b1be1eee 2444 int cc=get_reg(regmap,CCREG);
2445 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2446 return;
643aeae3 2447 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2448 if (handler == NULL) {
db829eeb 2449 if(rt<0||rt1[i]==0)
c6c3b1b3 2450 return;
13e35c04 2451 if(addr!=host_addr)
2452 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 2453 switch(type) {
2454 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2455 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2456 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2457 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2458 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2459 default: assert(0);
2460 }
2461 return;
2462 }
b1be1eee 2463 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2464 if(is_dynamic) {
2465 if(type==LOADB_STUB||type==LOADBU_STUB)
643aeae3 2466 handler=jump_handler_read8;
b1be1eee 2467 if(type==LOADH_STUB||type==LOADHU_STUB)
643aeae3 2468 handler=jump_handler_read16;
b1be1eee 2469 if(type==LOADW_STUB)
643aeae3 2470 handler=jump_handler_read32;
b1be1eee 2471 }
c6c3b1b3 2472
2473 // call a memhandler
db829eeb 2474 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2475 reglist&=~(1<<rt);
2476 save_regs(reglist);
2477 if(target==0)
2478 emit_movimm(addr,0);
2479 else if(rs!=0)
2480 emit_mov(rs,0);
643aeae3 2481 int offset=(u_char *)handler-out-8;
c6c3b1b3 2482 if(offset<-33554432||offset>=33554432) {
2483 // unreachable memhandler, a plugin func perhaps
643aeae3 2484 emit_movimm((u_int)handler,12);
b1be1eee 2485 far_call=1;
2486 }
2487 if(cc<0)
2488 emit_loadreg(CCREG,2);
2489 if(is_dynamic) {
2490 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2491 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 2492 }
b1be1eee 2493 else {
643aeae3 2494 emit_readword(&last_count,3);
b1be1eee 2495 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2496 emit_add(2,3,2);
643aeae3 2497 emit_writeword(2,&Count);
b1be1eee 2498 }
2499
2500 if(far_call)
2501 emit_callreg(12);
c6c3b1b3 2502 else
2503 emit_call(handler);
b1be1eee 2504
db829eeb 2505 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 2506 switch(type) {
2507 case LOADB_STUB: emit_signextend8(0,rt); break;
2508 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2509 case LOADH_STUB: emit_signextend16(0,rt); break;
2510 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2511 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2512 default: assert(0);
2513 }
2514 }
2515 restore_regs(reglist);
57871462 2516}
2517
e2b5e7aa 2518static void do_writestub(int n)
57871462 2519{
b14b6a8f 2520 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
57871462 2521 literal_pool(256);
b14b6a8f 2522 set_jump_target(stubs[n].addr, out);
2523 enum stub_type type=stubs[n].type;
2524 int i=stubs[n].a;
2525 int rs=stubs[n].b;
2526 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2527 u_int reglist=stubs[n].e;
57871462 2528 signed char *i_regmap=i_regs->regmap;
581335b0 2529 int rt,r;
b9b61529 2530 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 2531 rt=get_reg(i_regmap,r=FTEMP);
2532 }else{
57871462 2533 rt=get_reg(i_regmap,r=rs2[i]);
2534 }
2535 assert(rs>=0);
2536 assert(rt>=0);
b14b6a8f 2537 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
df4dc2b1 2538 void *restore_jump = NULL;
b96d3df7 2539 int reglist2=reglist|(1<<rs)|(1<<rt);
2540 for(rtmp=0;rtmp<=12;rtmp++) {
2541 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2542 temp=rtmp; break;
2543 }
2544 }
2545 if(temp==-1) {
2546 save_regs(reglist);
2547 regs_saved=1;
2548 for(rtmp=0;rtmp<=3;rtmp++)
2549 if(rtmp!=rs&&rtmp!=rt)
2550 {temp=rtmp;break;}
2551 }
2552 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2553 temp2=3;
643aeae3 2554 emit_readword(&mem_wtab,temp);
b96d3df7 2555 emit_shrimm(rs,12,temp2);
2556 emit_readword_dualindexedx4(temp,temp2,temp2);
2557 emit_lsls_imm(temp2,1,temp2);
2558 switch(type) {
2559 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2560 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2561 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2562 default: assert(0);
2563 }
2564 if(regs_saved) {
df4dc2b1 2565 restore_jump=out;
b96d3df7 2566 emit_jcc(0); // jump to reg restore
2567 }
2568 else
b14b6a8f 2569 emit_jcc(stubs[n].retaddr); // return address (invcode check)
b96d3df7 2570
2571 if(!regs_saved)
2572 save_regs(reglist);
643aeae3 2573 void *handler=NULL;
b96d3df7 2574 switch(type) {
643aeae3 2575 case STOREB_STUB: handler=jump_handler_write8; break;
2576 case STOREH_STUB: handler=jump_handler_write16; break;
2577 case STOREW_STUB: handler=jump_handler_write32; break;
b14b6a8f 2578 default: assert(0);
b96d3df7 2579 }
643aeae3 2580 assert(handler);
b96d3df7 2581 pass_args(rs,rt);
2582 if(temp2!=3)
2583 emit_mov(temp2,3);
2584 int cc=get_reg(i_regmap,CCREG);
2585 if(cc<0)
2586 emit_loadreg(CCREG,2);
b14b6a8f 2587 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
b96d3df7 2588 // returns new cycle_count
2589 emit_call(handler);
b14b6a8f 2590 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2591 if(cc<0)
2592 emit_storereg(CCREG,2);
2593 if(restore_jump)
df4dc2b1 2594 set_jump_target(restore_jump, out);
b96d3df7 2595 restore_regs(reglist);
b14b6a8f 2596 emit_jmp(stubs[n].retaddr);
57871462 2597}
2598
b14b6a8f 2599static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
57871462 2600{
2601 int rs=get_reg(regmap,-1);
57871462 2602 int rt=get_reg(regmap,target);
2603 assert(rs>=0);
2604 assert(rt>=0);
643aeae3 2605 u_int host_addr=0;
2606 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2607 if (handler == NULL) {
13e35c04 2608 if(addr!=host_addr)
2609 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 2610 switch(type) {
2611 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2612 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2613 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2614 default: assert(0);
2615 }
2616 return;
2617 }
2618
2619 // call a memhandler
2620 save_regs(reglist);
13e35c04 2621 pass_args(rs,rt);
b96d3df7 2622 int cc=get_reg(regmap,CCREG);
2623 if(cc<0)
2624 emit_loadreg(CCREG,2);
2573466a 2625 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
643aeae3 2626 emit_movimm((u_int)handler,3);
b96d3df7 2627 // returns new cycle_count
643aeae3 2628 emit_call(jump_handler_write_h);
2573466a 2629 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 2630 if(cc<0)
2631 emit_storereg(CCREG,2);
2632 restore_regs(reglist);
57871462 2633}
2634
e2b5e7aa 2635static void do_unalignedwritestub(int n)
57871462 2636{
b14b6a8f 2637 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
b7918751 2638 literal_pool(256);
b14b6a8f 2639 set_jump_target(stubs[n].addr, out);
b7918751 2640
b14b6a8f 2641 int i=stubs[n].a;
2642 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2643 int addr=stubs[n].b;
2644 u_int reglist=stubs[n].e;
b7918751 2645 signed char *i_regmap=i_regs->regmap;
2646 int temp2=get_reg(i_regmap,FTEMP);
2647 int rt;
b7918751 2648 rt=get_reg(i_regmap,rs2[i]);
2649 assert(rt>=0);
2650 assert(addr>=0);
2651 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2652 reglist|=(1<<addr);
2653 reglist&=~(1<<temp2);
2654
b96d3df7 2655#if 1
2656 // don't bother with it and call write handler
2657 save_regs(reglist);
2658 pass_args(addr,rt);
2659 int cc=get_reg(i_regmap,CCREG);
2660 if(cc<0)
2661 emit_loadreg(CCREG,2);
b14b6a8f 2662 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
643aeae3 2663 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
b14b6a8f 2664 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
b96d3df7 2665 if(cc<0)
2666 emit_storereg(CCREG,2);
2667 restore_regs(reglist);
b14b6a8f 2668 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2669#else
b7918751 2670 emit_andimm(addr,0xfffffffc,temp2);
643aeae3 2671 emit_writeword(temp2,&address);
b7918751 2672
2673 save_regs(reglist);
b7918751 2674 emit_shrimm(addr,16,1);
2675 int cc=get_reg(i_regmap,CCREG);
2676 if(cc<0) {
2677 emit_loadreg(CCREG,2);
2678 }
2679 emit_movimm((u_int)readmem,0);
b14b6a8f 2680 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
b7918751 2681 emit_call((int)&indirect_jump_indexed);
2682 restore_regs(reglist);
2683
643aeae3 2684 emit_readword(&readmem_dword,temp2);
b7918751 2685 int temp=addr; //hmh
2686 emit_shlimm(addr,3,temp);
2687 emit_andimm(temp,24,temp);
2688#ifdef BIG_ENDIAN_MIPS
2689 if (opcode[i]==0x2e) // SWR
2690#else
2691 if (opcode[i]==0x2a) // SWL
2692#endif
2693 emit_xorimm(temp,24,temp);
2694 emit_movimm(-1,HOST_TEMPREG);
55439448 2695 if (opcode[i]==0x2a) { // SWL
b7918751 2696 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2697 emit_orrshr(rt,temp,temp2);
2698 }else{
2699 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2700 emit_orrshl(rt,temp,temp2);
2701 }
643aeae3 2702 emit_readword(&address,addr);
2703 emit_writeword(temp2,&word);
b7918751 2704 //save_regs(reglist); // don't need to, no state changes
2705 emit_shrimm(addr,16,1);
2706 emit_movimm((u_int)writemem,0);
2707 //emit_call((int)&indirect_jump_indexed);
2708 emit_mov(15,14);
2709 emit_readword_dualindexedx4(0,1,15);
643aeae3 2710 emit_readword(&Count,HOST_TEMPREG);
2711 emit_readword(&next_interupt,2);
b14b6a8f 2712 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
643aeae3 2713 emit_writeword(2,&last_count);
b7918751 2714 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2715 if(cc<0) {
2716 emit_storereg(CCREG,HOST_TEMPREG);
2717 }
2718 restore_regs(reglist);
b14b6a8f 2719 emit_jmp(stubs[n].retaddr); // return address
b96d3df7 2720#endif
57871462 2721}
2722
e2b5e7aa 2723static void do_invstub(int n)
57871462 2724{
2725 literal_pool(20);
b14b6a8f 2726 u_int reglist=stubs[n].a;
2727 set_jump_target(stubs[n].addr, out);
57871462 2728 save_regs(reglist);
b14b6a8f 2729 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
643aeae3 2730 emit_call(&invalidate_addr);
57871462 2731 restore_regs(reglist);
b14b6a8f 2732 emit_jmp(stubs[n].retaddr); // return address
57871462 2733}
2734
df4dc2b1 2735void *do_dirty_stub(int i)
57871462 2736{
2737 assem_debug("do_dirty_stub %x\n",start+i*4);
71e490c5 2738 u_int addr=(u_int)source;
57871462 2739 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2740 #ifndef HAVE_ARMV7
ac545b3a 2741 emit_loadlp(addr,1);
57871462 2742 emit_loadlp((int)copy,2);
2743 emit_loadlp(slen*4,3);
2744 #else
ac545b3a 2745 emit_movw(addr&0x0000FFFF,1);
57871462 2746 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 2747 emit_movt(addr&0xFFFF0000,1);
57871462 2748 emit_movt(((u_int)copy)&0xFFFF0000,2);
2749 emit_movw(slen*4,3);
2750 #endif
2751 emit_movimm(start+i*4,0);
643aeae3 2752 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
df4dc2b1 2753 void *entry = out;
57871462 2754 load_regs_entry(i);
df4dc2b1 2755 if (entry == out)
2756 entry = instr_addr[i];
57871462 2757 emit_jmp(instr_addr[i]);
2758 return entry;
2759}
2760
e2b5e7aa 2761static void do_dirty_stub_ds()
57871462 2762{
2763 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 2764 #ifndef HAVE_ARMV7
57871462 2765 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2766 emit_loadlp((int)copy,2);
2767 emit_loadlp(slen*4,3);
2768 #else
2769 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2770 emit_movw(((u_int)copy)&0x0000FFFF,2);
2771 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2772 emit_movt(((u_int)copy)&0xFFFF0000,2);
2773 emit_movw(slen*4,3);
2774 #endif
2775 emit_movimm(start+1,0);
643aeae3 2776 emit_call(&verify_code_ds);
57871462 2777}
2778
e2b5e7aa 2779static void do_cop1stub(int n)
57871462 2780{
2781 literal_pool(256);
b14b6a8f 2782 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
2783 set_jump_target(stubs[n].addr, out);
2784 int i=stubs[n].a;
2785// int rs=stubs[n].b;
2786 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2787 int ds=stubs[n].d;
57871462 2788 if(!ds) {
2789 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2790 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2791 }
2792 //else {printf("fp exception in delay slot\n");}
2793 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2794 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2795 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 2796 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
b14b6a8f 2797 emit_jmp(ds?fp_exception_ds:fp_exception);
57871462 2798}
2799
57871462 2800/* Special assem */
2801
e2b5e7aa 2802static void shift_assemble_arm(int i,struct regstat *i_regs)
57871462 2803{
2804 if(rt1[i]) {
2805 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2806 {
2807 signed char s,t,shift;
2808 t=get_reg(i_regs->regmap,rt1[i]);
2809 s=get_reg(i_regs->regmap,rs1[i]);
2810 shift=get_reg(i_regs->regmap,rs2[i]);
2811 if(t>=0){
2812 if(rs1[i]==0)
2813 {
2814 emit_zeroreg(t);
2815 }
2816 else if(rs2[i]==0)
2817 {
2818 assert(s>=0);
2819 if(s!=t) emit_mov(s,t);
2820 }
2821 else
2822 {
2823 emit_andimm(shift,31,HOST_TEMPREG);
2824 if(opcode2[i]==4) // SLLV
2825 {
2826 emit_shl(s,HOST_TEMPREG,t);
2827 }
2828 if(opcode2[i]==6) // SRLV
2829 {
2830 emit_shr(s,HOST_TEMPREG,t);
2831 }
2832 if(opcode2[i]==7) // SRAV
2833 {
2834 emit_sar(s,HOST_TEMPREG,t);
2835 }
2836 }
2837 }
2838 } else { // DSLLV/DSRLV/DSRAV
2839 signed char sh,sl,th,tl,shift;
2840 th=get_reg(i_regs->regmap,rt1[i]|64);
2841 tl=get_reg(i_regs->regmap,rt1[i]);
2842 sh=get_reg(i_regs->regmap,rs1[i]|64);
2843 sl=get_reg(i_regs->regmap,rs1[i]);
2844 shift=get_reg(i_regs->regmap,rs2[i]);
2845 if(tl>=0){
2846 if(rs1[i]==0)
2847 {
2848 emit_zeroreg(tl);
2849 if(th>=0) emit_zeroreg(th);
2850 }
2851 else if(rs2[i]==0)
2852 {
2853 assert(sl>=0);
2854 if(sl!=tl) emit_mov(sl,tl);
2855 if(th>=0&&sh!=th) emit_mov(sh,th);
2856 }
2857 else
2858 {
2859 // FIXME: What if shift==tl ?
2860 assert(shift!=tl);
2861 int temp=get_reg(i_regs->regmap,-1);
2862 int real_th=th;
2863 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2864 assert(sl>=0);
2865 assert(sh>=0);
2866 emit_andimm(shift,31,HOST_TEMPREG);
2867 if(opcode2[i]==0x14) // DSLLV
2868 {
2869 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2870 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2871 emit_orrshr(sl,HOST_TEMPREG,th);
2872 emit_andimm(shift,31,HOST_TEMPREG);
2873 emit_testimm(shift,32);
2874 emit_shl(sl,HOST_TEMPREG,tl);
2875 if(th>=0) emit_cmovne_reg(tl,th);
2876 emit_cmovne_imm(0,tl);
2877 }
2878 if(opcode2[i]==0x16) // DSRLV
2879 {
2880 assert(th>=0);
2881 emit_shr(sl,HOST_TEMPREG,tl);
2882 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2883 emit_orrshl(sh,HOST_TEMPREG,tl);
2884 emit_andimm(shift,31,HOST_TEMPREG);
2885 emit_testimm(shift,32);
2886 emit_shr(sh,HOST_TEMPREG,th);
2887 emit_cmovne_reg(th,tl);
2888 if(real_th>=0) emit_cmovne_imm(0,th);
2889 }
2890 if(opcode2[i]==0x17) // DSRAV
2891 {
2892 assert(th>=0);
2893 emit_shr(sl,HOST_TEMPREG,tl);
2894 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2895 if(real_th>=0) {
2896 assert(temp>=0);
2897 emit_sarimm(th,31,temp);
2898 }
2899 emit_orrshl(sh,HOST_TEMPREG,tl);
2900 emit_andimm(shift,31,HOST_TEMPREG);
2901 emit_testimm(shift,32);
2902 emit_sar(sh,HOST_TEMPREG,th);
2903 emit_cmovne_reg(th,tl);
2904 if(real_th>=0) emit_cmovne_reg(temp,th);
2905 }
2906 }
2907 }
2908 }
2909 }
2910}
ffb0b9e0 2911
ffb0b9e0 2912static void speculate_mov(int rs,int rt)
2913{
2914 if(rt!=0) {
2915 smrv_strong_next|=1<<rt;
2916 smrv[rt]=smrv[rs];
2917 }
2918}
2919
2920static void speculate_mov_weak(int rs,int rt)
2921{
2922 if(rt!=0) {
2923 smrv_weak_next|=1<<rt;
2924 smrv[rt]=smrv[rs];
2925 }
2926}
2927
2928static void speculate_register_values(int i)
2929{
2930 if(i==0) {
2931 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
2932 // gp,sp are likely to stay the same throughout the block
2933 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
2934 smrv_weak_next=~smrv_strong_next;
2935 //printf(" llr %08x\n", smrv[4]);
2936 }
2937 smrv_strong=smrv_strong_next;
2938 smrv_weak=smrv_weak_next;
2939 switch(itype[i]) {
2940 case ALU:
2941 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2942 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
2943 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2944 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
2945 else {
2946 smrv_strong_next&=~(1<<rt1[i]);
2947 smrv_weak_next&=~(1<<rt1[i]);
2948 }
2949 break;
2950 case SHIFTIMM:
2951 smrv_strong_next&=~(1<<rt1[i]);
2952 smrv_weak_next&=~(1<<rt1[i]);
2953 // fallthrough
2954 case IMM16:
2955 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
2956 int value,hr=get_reg(regs[i].regmap,rt1[i]);
2957 if(hr>=0) {
2958 if(get_final_value(hr,i,&value))
2959 smrv[rt1[i]]=value;
2960 else smrv[rt1[i]]=constmap[i][hr];
2961 smrv_strong_next|=1<<rt1[i];
2962 }
2963 }
2964 else {
2965 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2966 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2967 }
2968 break;
2969 case LOAD:
2970 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
2971 // special case for BIOS
2972 smrv[rt1[i]]=0xa0000000;
2973 smrv_strong_next|=1<<rt1[i];
2974 break;
2975 }
2976 // fallthrough
2977 case SHIFT:
2978 case LOADLR:
2979 case MOV:
2980 smrv_strong_next&=~(1<<rt1[i]);
2981 smrv_weak_next&=~(1<<rt1[i]);
2982 break;
2983 case COP0:
2984 case COP2:
2985 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
2986 smrv_strong_next&=~(1<<rt1[i]);
2987 smrv_weak_next&=~(1<<rt1[i]);
2988 }
2989 break;
2990 case C2LS:
2991 if (opcode[i]==0x32) { // LWC2
2992 smrv_strong_next&=~(1<<rt1[i]);
2993 smrv_weak_next&=~(1<<rt1[i]);
2994 }
2995 break;
2996 }
2997#if 0
2998 int r=4;
2999 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3000 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3001#endif
3002}
3003
3004enum {
3005 MTYPE_8000 = 0,
3006 MTYPE_8020,
3007 MTYPE_0000,
3008 MTYPE_A000,
3009 MTYPE_1F80,
3010};
3011
3012static int get_ptr_mem_type(u_int a)
3013{
3014 if(a < 0x00200000) {
3015 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3016 // return wrong, must use memhandler for BIOS self-test to pass
3017 // 007 does similar stuff from a00 mirror, weird stuff
3018 return MTYPE_8000;
3019 return MTYPE_0000;
3020 }
3021 if(0x1f800000 <= a && a < 0x1f801000)
3022 return MTYPE_1F80;
3023 if(0x80200000 <= a && a < 0x80800000)
3024 return MTYPE_8020;
3025 if(0xa0000000 <= a && a < 0xa0200000)
3026 return MTYPE_A000;
3027 return MTYPE_8000;
3028}
ffb0b9e0 3029
b14b6a8f 3030static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
ffb0b9e0 3031{
b14b6a8f 3032 void *jaddr = NULL;
3033 int type=0;
ffb0b9e0 3034 int mr=rs1[i];
3035 if(((smrv_strong|smrv_weak)>>mr)&1) {
3036 type=get_ptr_mem_type(smrv[mr]);
3037 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3038 }
3039 else {
3040 // use the mirror we are running on
3041 type=get_ptr_mem_type(start);
3042 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3043 }
3044
3045 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3046 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3047 addr=*addr_reg_override=HOST_TEMPREG;
3048 type=0;
3049 }
3050 else if(type==MTYPE_0000) { // RAM 0 mirror
3051 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3052 addr=*addr_reg_override=HOST_TEMPREG;
3053 type=0;
3054 }
3055 else if(type==MTYPE_A000) { // RAM A mirror
3056 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3057 addr=*addr_reg_override=HOST_TEMPREG;
3058 type=0;
3059 }
3060 else if(type==MTYPE_1F80) { // scratchpad
6d760c92 3061 if (psxH == (void *)0x1f800000) {
3062 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3063 emit_cmpimm(HOST_TEMPREG,0x1000);
b14b6a8f 3064 jaddr=out;
6d760c92 3065 emit_jc(0);
3066 }
3067 else {
3068 // do usual RAM check, jump will go to the right handler
3069 type=0;
3070 }
ffb0b9e0 3071 }
ffb0b9e0 3072
3073 if(type==0)
3074 {
3075 emit_cmpimm(addr,RAM_SIZE);
b14b6a8f 3076 jaddr=out;
ffb0b9e0 3077 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3078 // Hint to branch predictor that the branch is unlikely to be taken
3079 if(rs1[i]>=28)
3080 emit_jno_unlikely(0);
3081 else
3082 #endif
3083 emit_jno(0);
a327ad27 3084 if(ram_offset!=0) {
3085 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3086 addr=*addr_reg_override=HOST_TEMPREG;
3087 }
ffb0b9e0 3088 }
3089
3090 return jaddr;
3091}
3092
57871462 3093#define shift_assemble shift_assemble_arm
3094
e2b5e7aa 3095static void loadlr_assemble_arm(int i,struct regstat *i_regs)
57871462 3096{
9c45ca93 3097 int s,tl,temp,temp2,addr;
57871462 3098 int offset;
b14b6a8f 3099 void *jaddr=0;
af4ee1fe 3100 int memtarget=0,c=0;
ffb0b9e0 3101 int fastload_reg_override=0;
57871462 3102 u_int hr,reglist=0;
57871462 3103 tl=get_reg(i_regs->regmap,rt1[i]);
3104 s=get_reg(i_regs->regmap,rs1[i]);
3105 temp=get_reg(i_regs->regmap,-1);
3106 temp2=get_reg(i_regs->regmap,FTEMP);
3107 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3108 assert(addr<0);
3109 offset=imm[i];
3110 for(hr=0;hr<HOST_REGS;hr++) {
3111 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3112 }
3113 reglist|=1<<temp;
3114 if(offset||s<0||c) addr=temp2;
3115 else addr=s;
3116 if(s>=0) {
3117 c=(i_regs->wasconst>>s)&1;
af4ee1fe 3118 if(c) {
3119 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
af4ee1fe 3120 }
57871462 3121 }
1edfcc68 3122 if(!c) {
1edfcc68 3123 emit_shlimm(addr,3,temp);
3124 if (opcode[i]==0x22||opcode[i]==0x26) {
3125 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3126 }else{
3127 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
535d208a 3128 }
1edfcc68 3129 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3130 }
3131 else {
3132 if(ram_offset&&memtarget) {
3133 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3134 fastload_reg_override=HOST_TEMPREG;
57871462 3135 }
1edfcc68 3136 if (opcode[i]==0x22||opcode[i]==0x26) {
3137 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
535d208a 3138 }else{
1edfcc68 3139 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
535d208a 3140 }
535d208a 3141 }
3142 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3143 if(!c||memtarget) {
ffb0b9e0 3144 int a=temp2;
3145 if(fastload_reg_override) a=fastload_reg_override;
9c45ca93 3146 emit_readword_indexed(0,a,temp2);
b14b6a8f 3147 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
535d208a 3148 }
3149 else
3150 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3151 if(rt1[i]) {
3152 assert(tl>=0);
57871462 3153 emit_andimm(temp,24,temp);
2002a1db 3154#ifdef BIG_ENDIAN_MIPS
3155 if (opcode[i]==0x26) // LWR
3156#else
3157 if (opcode[i]==0x22) // LWL
3158#endif
3159 emit_xorimm(temp,24,temp);
57871462 3160 emit_movimm(-1,HOST_TEMPREG);
3161 if (opcode[i]==0x26) {
3162 emit_shr(temp2,temp,temp2);
3163 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3164 }else{
3165 emit_shl(temp2,temp,temp2);
3166 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3167 }
3168 emit_or(temp2,tl,tl);
57871462 3169 }
535d208a 3170 //emit_storereg(rt1[i],tl); // DEBUG
3171 }
3172 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
9c45ca93 3173 assert(0);
57871462 3174 }
3175}
3176#define loadlr_assemble loadlr_assemble_arm
3177
e2b5e7aa 3178static void cop0_assemble(int i,struct regstat *i_regs)
57871462 3179{
3180 if(opcode2[i]==0) // MFC0
3181 {
3182 signed char t=get_reg(i_regs->regmap,rt1[i]);
643aeae3 3183 u_int copr=(source[i]>>11)&0x1f;
57871462 3184 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 3185 if(t>=0&&rt1[i]!=0) {
643aeae3 3186 emit_readword(&reg_cop0[copr],t);
57871462 3187 }
3188 }
3189 else if(opcode2[i]==4) // MTC0
3190 {
3191 signed char s=get_reg(i_regs->regmap,rs1[i]);
3192 char copr=(source[i]>>11)&0x1f;
3193 assert(s>=0);
63cb0298 3194 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 3195 if(copr==9||copr==11||copr==12||copr==13) {
643aeae3 3196 emit_readword(&last_count,HOST_TEMPREG);
57871462 3197 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 3198 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 3199 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
643aeae3 3200 emit_writeword(HOST_CCREG,&Count);
57871462 3201 }
3202 // What a mess. The status register (12) can enable interrupts,
3203 // so needs a special case to handle a pending interrupt.
3204 // The interrupt must be taken immediately, because a subsequent
3205 // instruction might disable interrupts again.
7139f3c8 3206 if(copr==12||copr==13) {
fca1aef2 3207 if (is_delayslot) {
3208 // burn cycles to cause cc_interrupt, which will
3209 // reschedule next_interupt. Relies on CCREG from above.
3210 assem_debug("MTC0 DS %d\n", copr);
643aeae3 3211 emit_writeword(HOST_CCREG,&last_count);
fca1aef2 3212 emit_movimm(0,HOST_CCREG);
3213 emit_storereg(CCREG,HOST_CCREG);
caeefe31 3214 emit_loadreg(rs1[i],1);
fca1aef2 3215 emit_movimm(copr,0);
643aeae3 3216 emit_call(pcsx_mtc0_ds);
042c7287 3217 emit_loadreg(rs1[i],s);
fca1aef2 3218 return;
3219 }
63cb0298 3220 emit_movimm(start+i*4+4,HOST_TEMPREG);
643aeae3 3221 emit_writeword(HOST_TEMPREG,&pcaddr);
63cb0298 3222 emit_movimm(0,HOST_TEMPREG);
643aeae3 3223 emit_writeword(HOST_TEMPREG,&pending_exception);
57871462 3224 }
3225 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3226 //else
caeefe31 3227 if(s==HOST_CCREG)
3228 emit_loadreg(rs1[i],1);
3229 else if(s!=1)
63cb0298 3230 emit_mov(s,1);
fca1aef2 3231 emit_movimm(copr,0);
643aeae3 3232 emit_call(pcsx_mtc0);
7139f3c8 3233 if(copr==9||copr==11||copr==12||copr==13) {
643aeae3 3234 emit_readword(&Count,HOST_CCREG);
3235 emit_readword(&next_interupt,HOST_TEMPREG);
2573466a 3236 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 3237 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
643aeae3 3238 emit_writeword(HOST_TEMPREG,&last_count);
57871462 3239 emit_storereg(CCREG,HOST_CCREG);
3240 }
7139f3c8 3241 if(copr==12||copr==13) {
57871462 3242 assert(!is_delayslot);
643aeae3 3243 emit_readword(&pending_exception,14);
042c7287 3244 emit_test(14,14);
643aeae3 3245 emit_jne(&do_interrupt);
57871462 3246 }
3247 emit_loadreg(rs1[i],s);
3248 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3249 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 3250 cop1_usable=0;
3251 }
3252 else
3253 {
3254 assert(opcode2[i]==0x10);
576bbd8f 3255 if((source[i]&0x3f)==0x10) // RFE
3256 {
643aeae3 3257 emit_readword(&Status,0);
576bbd8f 3258 emit_andimm(0,0x3c,1);
3259 emit_andimm(0,~0xf,0);
3260 emit_orrshr_imm(1,2,0);
643aeae3 3261 emit_writeword(0,&Status);
576bbd8f 3262 }
57871462 3263 }
3264}
3265
b9b61529 3266static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3267{
3268 switch (copr) {
3269 case 1:
3270 case 3:
3271 case 5:
3272 case 8:
3273 case 9:
3274 case 10:
3275 case 11:
643aeae3 3276 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3277 emit_signextend16(tl,tl);
643aeae3 3278 emit_writeword(tl,&reg_cop2d[copr]); // hmh
b9b61529 3279 break;
3280 case 7:
3281 case 16:
3282 case 17:
3283 case 18:
3284 case 19:
643aeae3 3285 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3286 emit_andimm(tl,0xffff,tl);
643aeae3 3287 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3288 break;
3289 case 15:
643aeae3 3290 emit_readword(&reg_cop2d[14],tl); // SXY2
3291 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3292 break;
3293 case 28:
b9b61529 3294 case 29:
643aeae3 3295 emit_readword(&reg_cop2d[9],temp);
b9b61529 3296 emit_testimm(temp,0x8000); // do we need this?
3297 emit_andimm(temp,0xf80,temp);
3298 emit_andne_imm(temp,0,temp);
f70d384d 3299 emit_shrimm(temp,7,tl);
643aeae3 3300 emit_readword(&reg_cop2d[10],temp);
b9b61529 3301 emit_testimm(temp,0x8000);
3302 emit_andimm(temp,0xf80,temp);
3303 emit_andne_imm(temp,0,temp);
f70d384d 3304 emit_orrshr_imm(temp,2,tl);
643aeae3 3305 emit_readword(&reg_cop2d[11],temp);
b9b61529 3306 emit_testimm(temp,0x8000);
3307 emit_andimm(temp,0xf80,temp);
3308 emit_andne_imm(temp,0,temp);
f70d384d 3309 emit_orrshl_imm(temp,3,tl);
643aeae3 3310 emit_writeword(tl,&reg_cop2d[copr]);
b9b61529 3311 break;
3312 default:
643aeae3 3313 emit_readword(&reg_cop2d[copr],tl);
b9b61529 3314 break;
3315 }
3316}
3317
3318static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3319{
3320 switch (copr) {
3321 case 15:
643aeae3 3322 emit_readword(&reg_cop2d[13],temp); // SXY1
3323 emit_writeword(sl,&reg_cop2d[copr]);
3324 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3325 emit_readword(&reg_cop2d[14],temp); // SXY2
3326 emit_writeword(sl,&reg_cop2d[14]);
3327 emit_writeword(temp,&reg_cop2d[13]); // SXY1
b9b61529 3328 break;
3329 case 28:
3330 emit_andimm(sl,0x001f,temp);
f70d384d 3331 emit_shlimm(temp,7,temp);
643aeae3 3332 emit_writeword(temp,&reg_cop2d[9]);
b9b61529 3333 emit_andimm(sl,0x03e0,temp);
f70d384d 3334 emit_shlimm(temp,2,temp);
643aeae3 3335 emit_writeword(temp,&reg_cop2d[10]);
b9b61529 3336 emit_andimm(sl,0x7c00,temp);
f70d384d 3337 emit_shrimm(temp,3,temp);
643aeae3 3338 emit_writeword(temp,&reg_cop2d[11]);
3339 emit_writeword(sl,&reg_cop2d[28]);
b9b61529 3340 break;
3341 case 30:
3342 emit_movs(sl,temp);
3343 emit_mvnmi(temp,temp);
665f33e1 3344#ifdef HAVE_ARMV5
b9b61529 3345 emit_clz(temp,temp);
665f33e1 3346#else
3347 emit_movs(temp,HOST_TEMPREG);
3348 emit_movimm(0,temp);
3349 emit_jeq((int)out+4*4);
3350 emit_addpl_imm(temp,1,temp);
3351 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3352 emit_jns((int)out-2*4);
3353#endif
643aeae3 3354 emit_writeword(sl,&reg_cop2d[30]);
3355 emit_writeword(temp,&reg_cop2d[31]);
b9b61529 3356 break;
b9b61529 3357 case 31:
3358 break;
3359 default:
643aeae3 3360 emit_writeword(sl,&reg_cop2d[copr]);
b9b61529 3361 break;
3362 }
3363}
3364
e2b5e7aa 3365static void cop2_assemble(int i,struct regstat *i_regs)
b9b61529 3366{
3367 u_int copr=(source[i]>>11)&0x1f;
3368 signed char temp=get_reg(i_regs->regmap,-1);
3369 if (opcode2[i]==0) { // MFC2
3370 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3371 if(tl>=0&&rt1[i]!=0)
b9b61529 3372 cop2_get_dreg(copr,tl,temp);
3373 }
3374 else if (opcode2[i]==4) { // MTC2
3375 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3376 cop2_put_dreg(copr,sl,temp);
3377 }
3378 else if (opcode2[i]==2) // CFC2
3379 {
3380 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 3381 if(tl>=0&&rt1[i]!=0)
643aeae3 3382 emit_readword(&reg_cop2c[copr],tl);
b9b61529 3383 }
3384 else if (opcode2[i]==6) // CTC2
3385 {
3386 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3387 switch(copr) {
3388 case 4:
3389 case 12:
3390 case 20:
3391 case 26:
3392 case 27:
3393 case 29:
3394 case 30:
3395 emit_signextend16(sl,temp);
3396 break;
3397 case 31:
3398 //value = value & 0x7ffff000;
3399 //if (value & 0x7f87e000) value |= 0x80000000;
3400 emit_shrimm(sl,12,temp);
3401 emit_shlimm(temp,12,temp);
3402 emit_testimm(temp,0x7f000000);
3403 emit_testeqimm(temp,0x00870000);
3404 emit_testeqimm(temp,0x0000e000);
3405 emit_orrne_imm(temp,0x80000000,temp);
3406 break;
3407 default:
3408 temp=sl;
3409 break;
3410 }
643aeae3 3411 emit_writeword(temp,&reg_cop2c[copr]);
b9b61529 3412 assert(sl>=0);
3413 }
3414}
3415
054175e9 3416static void c2op_prologue(u_int op,u_int reglist)
3417{
3418 save_regs_all(reglist);
82ed88eb 3419#ifdef PCNT
3420 emit_movimm(op,0);
3421 emit_call((int)pcnt_gte_start);
3422#endif
054175e9 3423 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3424}
3425
3426static void c2op_epilogue(u_int op,u_int reglist)
3427{
82ed88eb 3428#ifdef PCNT
3429 emit_movimm(op,0);
3430 emit_call((int)pcnt_gte_end);
3431#endif
054175e9 3432 restore_regs_all(reglist);
3433}
3434
6c0eefaf 3435static void c2op_call_MACtoIR(int lm,int need_flags)
3436{
3437 if(need_flags)
643aeae3 3438 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
6c0eefaf 3439 else
643aeae3 3440 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
6c0eefaf 3441}
3442
3443static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3444{
643aeae3 3445 emit_call(func);
6c0eefaf 3446 // func is C code and trashes r0
3447 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3448 if(need_flags||need_ir)
3449 c2op_call_MACtoIR(lm,need_flags);
643aeae3 3450 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
6c0eefaf 3451}
3452
054175e9 3453static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 3454{
b9b61529 3455 u_int c2op=source[i]&0x3f;
6c0eefaf 3456 u_int hr,reglist_full=0,reglist;
054175e9 3457 int need_flags,need_ir;
b9b61529 3458 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 3459 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 3460 }
4d646738 3461 reglist=reglist_full&CALLER_SAVE_REGS;
b9b61529 3462
3463 if (gte_handlers[c2op]!=NULL) {
bedfea38 3464 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 3465 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 3466 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3467 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 3468 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3469 need_flags=0;
6c0eefaf 3470 int shift = (source[i] >> 19) & 1;
3471 int lm = (source[i] >> 10) & 1;
054175e9 3472 switch(c2op) {
19776aef 3473#ifndef DRC_DBG
054175e9 3474 case GTE_MVMVA: {
82336ba3 3475#ifdef HAVE_ARMV5
054175e9 3476 int v = (source[i] >> 15) & 3;
3477 int cv = (source[i] >> 13) & 3;
3478 int mx = (source[i] >> 17) & 3;
4d646738 3479 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
054175e9 3480 c2op_prologue(c2op,reglist);
3481 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3482 if(v<3)
3483 emit_ldrd(v*8,0,4);
3484 else {
3485 emit_movzwl_indexed(9*4,0,4); // gteIR
3486 emit_movzwl_indexed(10*4,0,6);
3487 emit_movzwl_indexed(11*4,0,5);
3488 emit_orrshl_imm(6,16,4);
3489 }
3490 if(mx<3)
3491 emit_addimm(0,32*4+mx*8*4,6);
3492 else
643aeae3 3493 emit_readword(&zeromem_ptr,6);
054175e9 3494 if(cv<3)
3495 emit_addimm(0,32*4+(cv*8+5)*4,7);
3496 else
643aeae3 3497 emit_readword(&zeromem_ptr,7);
054175e9 3498#ifdef __ARM_NEON__
3499 emit_movimm(source[i],1); // opcode
643aeae3 3500 emit_call(gteMVMVA_part_neon);
054175e9 3501 if(need_flags) {
3502 emit_movimm(lm,1);
643aeae3 3503 emit_call(gteMACtoIR_flags_neon);
054175e9 3504 }
3505#else
3506 if(cv==3&&shift)
3507 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3508 else {
3509 emit_movimm(shift,1);
3510 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3511 }
6c0eefaf 3512 if(need_flags||need_ir)
3513 c2op_call_MACtoIR(lm,need_flags);
82336ba3 3514#endif
3515#else /* if not HAVE_ARMV5 */
3516 c2op_prologue(c2op,reglist);
3517 emit_movimm(source[i],1); // opcode
643aeae3 3518 emit_writeword(1,&psxRegs.code);
82336ba3 3519 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 3520#endif
3521 break;
3522 }
6c0eefaf 3523 case GTE_OP:
3524 c2op_prologue(c2op,reglist);
643aeae3 3525 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
6c0eefaf 3526 if(need_flags||need_ir) {
3527 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3528 c2op_call_MACtoIR(lm,need_flags);
3529 }
3530 break;
3531 case GTE_DPCS:
3532 c2op_prologue(c2op,reglist);
3533 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3534 break;
3535 case GTE_INTPL:
3536 c2op_prologue(c2op,reglist);
3537 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3538 break;
3539 case GTE_SQR:
3540 c2op_prologue(c2op,reglist);
643aeae3 3541 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
6c0eefaf 3542 if(need_flags||need_ir) {
3543 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3544 c2op_call_MACtoIR(lm,need_flags);
3545 }
3546 break;
3547 case GTE_DCPL:
3548 c2op_prologue(c2op,reglist);
3549 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3550 break;
3551 case GTE_GPF:
3552 c2op_prologue(c2op,reglist);
3553 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3554 break;
3555 case GTE_GPL:
3556 c2op_prologue(c2op,reglist);
3557 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3558 break;
19776aef 3559#endif
054175e9 3560 default:
054175e9 3561 c2op_prologue(c2op,reglist);
19776aef 3562#ifdef DRC_DBG
3563 emit_movimm(source[i],1); // opcode
643aeae3 3564 emit_writeword(1,&psxRegs.code);
19776aef 3565#endif
643aeae3 3566 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
054175e9 3567 break;
3568 }
3569 c2op_epilogue(c2op,reglist);
3570 }
b9b61529 3571}
3572
e2b5e7aa 3573static void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 3574{
3575 // XXX: should just just do the exception instead
3576 if(!cop1_usable) {
b14b6a8f 3577 void *jaddr=out;
3d624f89 3578 emit_jmp(0);
b14b6a8f 3579 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3d624f89 3580 cop1_usable=1;
3581 }
3582}
3583
e2b5e7aa 3584static void cop1_assemble(int i,struct regstat *i_regs)
57871462 3585{
3d624f89 3586 cop1_unusable(i, i_regs);
57871462 3587}
3588
e2b5e7aa 3589static void fconv_assemble_arm(int i,struct regstat *i_regs)
57871462 3590{
3d624f89 3591 cop1_unusable(i, i_regs);
57871462 3592}
3593#define fconv_assemble fconv_assemble_arm
3594
e2b5e7aa 3595static void fcomp_assemble(int i,struct regstat *i_regs)
57871462 3596{
3d624f89 3597 cop1_unusable(i, i_regs);
57871462 3598}
3599
e2b5e7aa 3600static void float_assemble(int i,struct regstat *i_regs)
57871462 3601{
3d624f89 3602 cop1_unusable(i, i_regs);
57871462 3603}
3604
e2b5e7aa 3605static void multdiv_assemble_arm(int i,struct regstat *i_regs)
57871462 3606{
3607 // case 0x18: MULT
3608 // case 0x19: MULTU
3609 // case 0x1A: DIV
3610 // case 0x1B: DIVU
3611 // case 0x1C: DMULT
3612 // case 0x1D: DMULTU
3613 // case 0x1E: DDIV
3614 // case 0x1F: DDIVU
3615 if(rs1[i]&&rs2[i])
3616 {
3617 if((opcode2[i]&4)==0) // 32-bit
3618 {
3619 if(opcode2[i]==0x18) // MULT
3620 {
3621 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3622 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3623 signed char hi=get_reg(i_regs->regmap,HIREG);
3624 signed char lo=get_reg(i_regs->regmap,LOREG);
3625 assert(m1>=0);
3626 assert(m2>=0);
3627 assert(hi>=0);
3628 assert(lo>=0);
3629 emit_smull(m1,m2,hi,lo);
3630 }
3631 if(opcode2[i]==0x19) // MULTU
3632 {
3633 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3634 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3635 signed char hi=get_reg(i_regs->regmap,HIREG);
3636 signed char lo=get_reg(i_regs->regmap,LOREG);
3637 assert(m1>=0);
3638 assert(m2>=0);
3639 assert(hi>=0);
3640 assert(lo>=0);
3641 emit_umull(m1,m2,hi,lo);
3642 }
3643 if(opcode2[i]==0x1A) // DIV
3644 {
3645 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3646 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3647 assert(d1>=0);
3648 assert(d2>=0);
3649 signed char quotient=get_reg(i_regs->regmap,LOREG);
3650 signed char remainder=get_reg(i_regs->regmap,HIREG);
3651 assert(quotient>=0);
3652 assert(remainder>=0);
3653 emit_movs(d1,remainder);
44a80f6a 3654 emit_movimm(0xffffffff,quotient);
3655 emit_negmi(quotient,quotient); // .. quotient and ..
3656 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 3657 emit_movs(d2,HOST_TEMPREG);
3658 emit_jeq((int)out+52); // Division by zero
82336ba3 3659 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 3660#ifdef HAVE_ARMV5
57871462 3661 emit_clz(HOST_TEMPREG,quotient);
3662 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 3663#else
3664 emit_movimm(0,quotient);
3665 emit_addpl_imm(quotient,1,quotient);
3666 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3667 emit_jns((int)out-2*4);
3668#endif
57871462 3669 emit_orimm(quotient,1<<31,quotient);
3670 emit_shr(quotient,quotient,quotient);
3671 emit_cmp(remainder,HOST_TEMPREG);
3672 emit_subcs(remainder,HOST_TEMPREG,remainder);
3673 emit_adcs(quotient,quotient,quotient);
3674 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
b14b6a8f 3675 emit_jcc(out-16); // -4
57871462 3676 emit_teq(d1,d2);
3677 emit_negmi(quotient,quotient);
3678 emit_test(d1,d1);
3679 emit_negmi(remainder,remainder);
3680 }
3681 if(opcode2[i]==0x1B) // DIVU
3682 {
3683 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3684 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3685 assert(d1>=0);
3686 assert(d2>=0);
3687 signed char quotient=get_reg(i_regs->regmap,LOREG);
3688 signed char remainder=get_reg(i_regs->regmap,HIREG);
3689 assert(quotient>=0);
3690 assert(remainder>=0);
44a80f6a 3691 emit_mov(d1,remainder);
3692 emit_movimm(0xffffffff,quotient); // div0 case
57871462 3693 emit_test(d2,d2);
44a80f6a 3694 emit_jeq((int)out+40); // Division by zero
665f33e1 3695#ifdef HAVE_ARMV5
57871462 3696 emit_clz(d2,HOST_TEMPREG);
3697 emit_movimm(1<<31,quotient);
3698 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 3699#else
3700 emit_movimm(0,HOST_TEMPREG);
82336ba3 3701 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3702 emit_lslpls_imm(d2,1,d2);
665f33e1 3703 emit_jns((int)out-2*4);
3704 emit_movimm(1<<31,quotient);
3705#endif
57871462 3706 emit_shr(quotient,HOST_TEMPREG,quotient);
3707 emit_cmp(remainder,d2);
3708 emit_subcs(remainder,d2,remainder);
3709 emit_adcs(quotient,quotient,quotient);
3710 emit_shrcc_imm(d2,1,d2);
b14b6a8f 3711 emit_jcc(out-16); // -4
57871462 3712 }
3713 }
3714 else // 64-bit
71e490c5 3715 assert(0);
57871462 3716 }
3717 else
3718 {
3719 // Multiply by zero is zero.
3720 // MIPS does not have a divide by zero exception.
3721 // The result is undefined, we return zero.
3722 signed char hr=get_reg(i_regs->regmap,HIREG);
3723 signed char lr=get_reg(i_regs->regmap,LOREG);
3724 if(hr>=0) emit_zeroreg(hr);
3725 if(lr>=0) emit_zeroreg(lr);
3726 }
3727}
3728#define multdiv_assemble multdiv_assemble_arm
3729
e2b5e7aa 3730static void do_preload_rhash(int r) {
57871462 3731 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3732 // register. On ARM the hash can be done with a single instruction (below)
3733}
3734
e2b5e7aa 3735static void do_preload_rhtbl(int ht) {
57871462 3736 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3737}
3738
e2b5e7aa 3739static void do_rhash(int rs,int rh) {
57871462 3740 emit_andimm(rs,0xf8,rh);
3741}
3742
e2b5e7aa 3743static void do_miniht_load(int ht,int rh) {
57871462 3744 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3745 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3746}
3747
e2b5e7aa 3748static void do_miniht_jump(int rs,int rh,int ht) {
57871462 3749 emit_cmp(rh,rs);
3750 emit_ldreq_indexed(ht,4,15);
3751 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3752 emit_mov(rs,7);
3753 emit_jmp(jump_vaddr_reg[7]);
3754 #else
3755 emit_jmp(jump_vaddr_reg[rs]);
3756 #endif
3757}
3758
e2b5e7aa 3759static void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 3760 #ifndef HAVE_ARMV7
57871462 3761 emit_movimm(return_address,rt); // PC into link register
643aeae3 3762 add_to_linker(out,return_address,1);
57871462 3763 emit_pcreladdr(temp);
643aeae3 3764 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3765 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 3766 #else
3767 emit_movw(return_address&0x0000FFFF,rt);
643aeae3 3768 add_to_linker(out,return_address,1);
57871462 3769 emit_pcreladdr(temp);
643aeae3 3770 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
57871462 3771 emit_movt(return_address&0xFFFF0000,rt);
643aeae3 3772 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
57871462 3773 #endif
3774}
3775
e2b5e7aa 3776static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
57871462 3777{
3778 //if(dirty_pre==dirty) return;
581335b0 3779 int hr,reg;
57871462 3780 for(hr=0;hr<HOST_REGS;hr++) {
3781 if(hr!=EXCLUDE_REG) {
3782 reg=pre[hr];
3783 if(((~u)>>(reg&63))&1) {
f776eb14 3784 if(reg>0) {
57871462 3785 if(((dirty_pre&~dirty)>>hr)&1) {
3786 if(reg>0&&reg<34) {
3787 emit_storereg(reg,hr);
3788 if( ((is32_pre&~uu)>>reg)&1 ) {
3789 emit_sarimm(hr,31,HOST_TEMPREG);
3790 emit_storereg(reg|64,HOST_TEMPREG);
3791 }
3792 }
3793 else if(reg>=64) {
3794 emit_storereg(reg,hr);
3795 }
3796 }
3797 }
57871462 3798 }
3799 }
3800 }
3801}
3802
d148d265 3803static void mark_clear_cache(void *target)
3804{
643aeae3 3805 u_long offset = (u_char *)target - translation_cache;
d148d265 3806 u_int mask = 1u << ((offset >> 12) & 31);
3807 if (!(needs_clear_cache[offset >> 17] & mask)) {
3808 char *start = (char *)((u_long)target & ~4095ul);
3809 start_tcache_write(start, start + 4096);
3810 needs_clear_cache[offset >> 17] |= mask;
3811 }
3812}
3813
dd3a91a1 3814// Clearing the cache is rather slow on ARM Linux, so mark the areas
3815// that need to be cleared, and then only clear these areas once.
e2b5e7aa 3816static void do_clear_cache()
dd3a91a1 3817{
3818 int i,j;
3819 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
3820 {
3821 u_int bitmap=needs_clear_cache[i];
3822 if(bitmap) {
643aeae3 3823 u_char *start, *end;
9f51b4b9 3824 for(j=0;j<32;j++)
dd3a91a1 3825 {
3826 if(bitmap&(1<<j)) {
643aeae3 3827 start=translation_cache+i*131072+j*4096;
dd3a91a1 3828 end=start+4095;
3829 j++;
3830 while(j<32) {
3831 if(bitmap&(1<<j)) {
3832 end+=4096;
3833 j++;
3834 }else{
643aeae3 3835 end_tcache_write(start, end);
dd3a91a1 3836 break;
3837 }
3838 }
3839 }
3840 }
3841 needs_clear_cache[i]=0;
3842 }
3843 }
3844}
3845
57871462 3846// CPU-architecture-specific initialization
71e490c5 3847static void arch_init() {
57871462 3848}
b9b61529 3849
3850// vim:shiftwidth=2:expandtab