drc: remove some leftover n64-only stuff
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
... / ...
CommitLineData
1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
2 * Mupen64plus/PCSX - assem_arm.c *
3 * Copyright (C) 2009-2011 Ari64 *
4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
22#include "../gte.h"
23#define FLAGLESS
24#include "../gte.h"
25#undef FLAGLESS
26#include "../gte_arm.h"
27#include "../gte_neon.h"
28#include "pcnt.h"
29#include "arm_features.h"
30
31#if defined(BASE_ADDR_FIXED)
32#elif defined(BASE_ADDR_DYNAMIC)
33u_char *translation_cache;
34#else
35u_char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
36#endif
37
38#ifndef __MACH__
39#define CALLER_SAVE_REGS 0x100f
40#else
41#define CALLER_SAVE_REGS 0x120f
42#endif
43
44#define unused __attribute__((unused))
45
46#ifdef DRC_DBG
47#pragma GCC diagnostic ignored "-Wunused-function"
48#pragma GCC diagnostic ignored "-Wunused-variable"
49#pragma GCC diagnostic ignored "-Wunused-but-set-variable"
50#endif
51
52extern int cycle_count;
53extern int last_count;
54extern int pcaddr;
55extern int pending_exception;
56extern int branch_target;
57extern uint64_t readmem_dword;
58extern void *dynarec_local;
59extern u_int mini_ht[32][2];
60
61void indirect_jump_indexed();
62void indirect_jump();
63void do_interrupt();
64void jump_vaddr_r0();
65void jump_vaddr_r1();
66void jump_vaddr_r2();
67void jump_vaddr_r3();
68void jump_vaddr_r4();
69void jump_vaddr_r5();
70void jump_vaddr_r6();
71void jump_vaddr_r7();
72void jump_vaddr_r8();
73void jump_vaddr_r9();
74void jump_vaddr_r10();
75void jump_vaddr_r12();
76
77void * const jump_vaddr_reg[16] = {
78 jump_vaddr_r0,
79 jump_vaddr_r1,
80 jump_vaddr_r2,
81 jump_vaddr_r3,
82 jump_vaddr_r4,
83 jump_vaddr_r5,
84 jump_vaddr_r6,
85 jump_vaddr_r7,
86 jump_vaddr_r8,
87 jump_vaddr_r9,
88 jump_vaddr_r10,
89 0,
90 jump_vaddr_r12,
91 0,
92 0,
93 0
94};
95
96void invalidate_addr_r0();
97void invalidate_addr_r1();
98void invalidate_addr_r2();
99void invalidate_addr_r3();
100void invalidate_addr_r4();
101void invalidate_addr_r5();
102void invalidate_addr_r6();
103void invalidate_addr_r7();
104void invalidate_addr_r8();
105void invalidate_addr_r9();
106void invalidate_addr_r10();
107void invalidate_addr_r12();
108
109const u_int invalidate_addr_reg[16] = {
110 (int)invalidate_addr_r0,
111 (int)invalidate_addr_r1,
112 (int)invalidate_addr_r2,
113 (int)invalidate_addr_r3,
114 (int)invalidate_addr_r4,
115 (int)invalidate_addr_r5,
116 (int)invalidate_addr_r6,
117 (int)invalidate_addr_r7,
118 (int)invalidate_addr_r8,
119 (int)invalidate_addr_r9,
120 (int)invalidate_addr_r10,
121 0,
122 (int)invalidate_addr_r12,
123 0,
124 0,
125 0};
126
127static u_int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
128
129/* Linker */
130
131static void set_jump_target(void *addr, void *target_)
132{
133 u_int target = (u_int)target_;
134 u_char *ptr = addr;
135 u_int *ptr2=(u_int *)ptr;
136 if(ptr[3]==0xe2) {
137 assert((target-(u_int)ptr2-8)<1024);
138 assert(((uintptr_t)addr&3)==0);
139 assert((target&3)==0);
140 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
141 //printf("target=%x addr=%p insn=%x\n",target,addr,*ptr2);
142 }
143 else if(ptr[3]==0x72) {
144 // generated by emit_jno_unlikely
145 if((target-(u_int)ptr2-8)<1024) {
146 assert(((uintptr_t)addr&3)==0);
147 assert((target&3)==0);
148 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
149 }
150 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
151 assert(((uintptr_t)addr&3)==0);
152 assert((target&3)==0);
153 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
154 }
155 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
156 }
157 else {
158 assert((ptr[3]&0x0e)==0xa);
159 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
160 }
161}
162
163// This optionally copies the instruction from the target of the branch into
164// the space before the branch. Works, but the difference in speed is
165// usually insignificant.
166#if 0
167static void set_jump_target_fillslot(int addr,u_int target,int copy)
168{
169 u_char *ptr=(u_char *)addr;
170 u_int *ptr2=(u_int *)ptr;
171 assert(!copy||ptr2[-1]==0xe28dd000);
172 if(ptr[3]==0xe2) {
173 assert(!copy);
174 assert((target-(u_int)ptr2-8)<4096);
175 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
176 }
177 else {
178 assert((ptr[3]&0x0e)==0xa);
179 u_int target_insn=*(u_int *)target;
180 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
181 copy=0;
182 }
183 if((target_insn&0x0c100000)==0x04100000) { // Load
184 copy=0;
185 }
186 if(target_insn&0x08000000) {
187 copy=0;
188 }
189 if(copy) {
190 ptr2[-1]=target_insn;
191 target+=4;
192 }
193 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
194 }
195}
196#endif
197
198/* Literal pool */
199static void add_literal(int addr,int val)
200{
201 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
202 literals[literalcount][0]=addr;
203 literals[literalcount][1]=val;
204 literalcount++;
205}
206
207// from a pointer to external jump stub (which was produced by emit_extjump2)
208// find where the jumping insn is
209static void *find_extjump_insn(void *stub)
210{
211 int *ptr=(int *)(stub+4);
212 assert((*ptr&0x0fff0000)==0x059f0000); // ldr rx, [pc, #ofs]
213 u_int offset=*ptr&0xfff;
214 void **l_ptr=(void *)ptr+offset+8;
215 return *l_ptr;
216}
217
218// find where external branch is liked to using addr of it's stub:
219// get address that insn one after stub loads (dyna_linker arg1),
220// treat it as a pointer to branch insn,
221// return addr where that branch jumps to
222static void *get_pointer(void *stub)
223{
224 //printf("get_pointer(%x)\n",(int)stub);
225 int *i_ptr=find_extjump_insn(stub);
226 assert((*i_ptr&0x0f000000)==0x0a000000);
227 return (u_char *)i_ptr+((*i_ptr<<8)>>6)+8;
228}
229
230// Find the "clean" entry point from a "dirty" entry point
231// by skipping past the call to verify_code
232static void *get_clean_addr(void *addr)
233{
234 signed int *ptr = addr;
235 #ifndef HAVE_ARMV7
236 ptr+=4;
237 #else
238 ptr+=6;
239 #endif
240 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
241 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
242 ptr++;
243 if((*ptr&0xFF000000)==0xea000000) {
244 return (char *)ptr+((*ptr<<8)>>6)+8; // follow jump
245 }
246 return ptr;
247}
248
249static int verify_dirty(u_int *ptr)
250{
251 #ifndef HAVE_ARMV7
252 u_int offset;
253 // get from literal pool
254 assert((*ptr&0xFFFF0000)==0xe59f0000);
255 offset=*ptr&0xfff;
256 u_int source=*(u_int*)((void *)ptr+offset+8);
257 ptr++;
258 assert((*ptr&0xFFFF0000)==0xe59f0000);
259 offset=*ptr&0xfff;
260 u_int copy=*(u_int*)((void *)ptr+offset+8);
261 ptr++;
262 assert((*ptr&0xFFFF0000)==0xe59f0000);
263 offset=*ptr&0xfff;
264 u_int len=*(u_int*)((void *)ptr+offset+8);
265 ptr++;
266 ptr++;
267 #else
268 // ARMv7 movw/movt
269 assert((*ptr&0xFFF00000)==0xe3000000);
270 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
271 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
272 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
273 ptr+=6;
274 #endif
275 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
276 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
277 //printf("verify_dirty: %x %x %x\n",source,copy,len);
278 return !memcmp((void *)source,(void *)copy,len);
279}
280
281// This doesn't necessarily find all clean entry points, just
282// guarantees that it's not dirty
283static int isclean(void *addr)
284{
285 #ifndef HAVE_ARMV7
286 u_int *ptr=((u_int *)addr)+4;
287 #else
288 u_int *ptr=((u_int *)addr)+6;
289 #endif
290 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
291 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
293 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
294 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
295 return 1;
296}
297
298// get source that block at addr was compiled from (host pointers)
299static void get_bounds(void *addr,u_int *start,u_int *end)
300{
301 u_int *ptr = addr;
302 #ifndef HAVE_ARMV7
303 u_int offset;
304 // get from literal pool
305 assert((*ptr&0xFFFF0000)==0xe59f0000);
306 offset=*ptr&0xfff;
307 u_int source=*(u_int*)((void *)ptr+offset+8);
308 ptr++;
309 //assert((*ptr&0xFFFF0000)==0xe59f0000);
310 //offset=*ptr&0xfff;
311 //u_int copy=*(u_int*)((void *)ptr+offset+8);
312 ptr++;
313 assert((*ptr&0xFFFF0000)==0xe59f0000);
314 offset=*ptr&0xfff;
315 u_int len=*(u_int*)((void *)ptr+offset+8);
316 ptr++;
317 ptr++;
318 #else
319 // ARMv7 movw/movt
320 assert((*ptr&0xFFF00000)==0xe3000000);
321 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
322 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
323 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
324 ptr+=6;
325 #endif
326 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
327 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
328 *start=source;
329 *end=source+len;
330}
331
332/* Register allocation */
333
334// Note: registers are allocated clean (unmodified state)
335// if you intend to modify the register, you must call dirty_reg().
336static void alloc_reg(struct regstat *cur,int i,signed char reg)
337{
338 int r,hr;
339 int preferred_reg = (reg&7);
340 if(reg==CCREG) preferred_reg=HOST_CCREG;
341 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
342
343 // Don't allocate unused registers
344 if((cur->u>>reg)&1) return;
345
346 // see if it's already allocated
347 for(hr=0;hr<HOST_REGS;hr++)
348 {
349 if(cur->regmap[hr]==reg) return;
350 }
351
352 // Keep the same mapping if the register was already allocated in a loop
353 preferred_reg = loop_reg(i,reg,preferred_reg);
354
355 // Try to allocate the preferred register
356 if(cur->regmap[preferred_reg]==-1) {
357 cur->regmap[preferred_reg]=reg;
358 cur->dirty&=~(1<<preferred_reg);
359 cur->isconst&=~(1<<preferred_reg);
360 return;
361 }
362 r=cur->regmap[preferred_reg];
363 if(r<64&&((cur->u>>r)&1)) {
364 cur->regmap[preferred_reg]=reg;
365 cur->dirty&=~(1<<preferred_reg);
366 cur->isconst&=~(1<<preferred_reg);
367 return;
368 }
369 if(r>=64&&((cur->uu>>(r&63))&1)) {
370 cur->regmap[preferred_reg]=reg;
371 cur->dirty&=~(1<<preferred_reg);
372 cur->isconst&=~(1<<preferred_reg);
373 return;
374 }
375
376 // Clear any unneeded registers
377 // We try to keep the mapping consistent, if possible, because it
378 // makes branches easier (especially loops). So we try to allocate
379 // first (see above) before removing old mappings. If this is not
380 // possible then go ahead and clear out the registers that are no
381 // longer needed.
382 for(hr=0;hr<HOST_REGS;hr++)
383 {
384 r=cur->regmap[hr];
385 if(r>=0) {
386 if(r<64) {
387 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
388 }
389 else
390 {
391 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
392 }
393 }
394 }
395 // Try to allocate any available register, but prefer
396 // registers that have not been used recently.
397 if(i>0) {
398 for(hr=0;hr<HOST_REGS;hr++) {
399 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
400 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
401 cur->regmap[hr]=reg;
402 cur->dirty&=~(1<<hr);
403 cur->isconst&=~(1<<hr);
404 return;
405 }
406 }
407 }
408 }
409 // Try to allocate any available register
410 for(hr=0;hr<HOST_REGS;hr++) {
411 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
412 cur->regmap[hr]=reg;
413 cur->dirty&=~(1<<hr);
414 cur->isconst&=~(1<<hr);
415 return;
416 }
417 }
418
419 // Ok, now we have to evict someone
420 // Pick a register we hopefully won't need soon
421 u_char hsn[MAXREG+1];
422 memset(hsn,10,sizeof(hsn));
423 int j;
424 lsn(hsn,i,&preferred_reg);
425 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
426 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
427 if(i>0) {
428 // Don't evict the cycle count at entry points, otherwise the entry
429 // stub will have to write it.
430 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
431 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
432 for(j=10;j>=3;j--)
433 {
434 // Alloc preferred register if available
435 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
436 for(hr=0;hr<HOST_REGS;hr++) {
437 // Evict both parts of a 64-bit register
438 if((cur->regmap[hr]&63)==r) {
439 cur->regmap[hr]=-1;
440 cur->dirty&=~(1<<hr);
441 cur->isconst&=~(1<<hr);
442 }
443 }
444 cur->regmap[preferred_reg]=reg;
445 return;
446 }
447 for(r=1;r<=MAXREG;r++)
448 {
449 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
450 for(hr=0;hr<HOST_REGS;hr++) {
451 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
452 if(cur->regmap[hr]==r+64) {
453 cur->regmap[hr]=reg;
454 cur->dirty&=~(1<<hr);
455 cur->isconst&=~(1<<hr);
456 return;
457 }
458 }
459 }
460 for(hr=0;hr<HOST_REGS;hr++) {
461 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
462 if(cur->regmap[hr]==r) {
463 cur->regmap[hr]=reg;
464 cur->dirty&=~(1<<hr);
465 cur->isconst&=~(1<<hr);
466 return;
467 }
468 }
469 }
470 }
471 }
472 }
473 }
474 for(j=10;j>=0;j--)
475 {
476 for(r=1;r<=MAXREG;r++)
477 {
478 if(hsn[r]==j) {
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r+64) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 for(hr=0;hr<HOST_REGS;hr++) {
488 if(cur->regmap[hr]==r) {
489 cur->regmap[hr]=reg;
490 cur->dirty&=~(1<<hr);
491 cur->isconst&=~(1<<hr);
492 return;
493 }
494 }
495 }
496 }
497 }
498 SysPrintf("This shouldn't happen (alloc_reg)");exit(1);
499}
500
501static void alloc_reg64(struct regstat *cur,int i,signed char reg)
502{
503 int preferred_reg = 8+(reg&1);
504 int r,hr;
505
506 // allocate the lower 32 bits
507 alloc_reg(cur,i,reg);
508
509 // Don't allocate unused registers
510 if((cur->uu>>reg)&1) return;
511
512 // see if the upper half is already allocated
513 for(hr=0;hr<HOST_REGS;hr++)
514 {
515 if(cur->regmap[hr]==reg+64) return;
516 }
517
518 // Keep the same mapping if the register was already allocated in a loop
519 preferred_reg = loop_reg(i,reg,preferred_reg);
520
521 // Try to allocate the preferred register
522 if(cur->regmap[preferred_reg]==-1) {
523 cur->regmap[preferred_reg]=reg|64;
524 cur->dirty&=~(1<<preferred_reg);
525 cur->isconst&=~(1<<preferred_reg);
526 return;
527 }
528 r=cur->regmap[preferred_reg];
529 if(r<64&&((cur->u>>r)&1)) {
530 cur->regmap[preferred_reg]=reg|64;
531 cur->dirty&=~(1<<preferred_reg);
532 cur->isconst&=~(1<<preferred_reg);
533 return;
534 }
535 if(r>=64&&((cur->uu>>(r&63))&1)) {
536 cur->regmap[preferred_reg]=reg|64;
537 cur->dirty&=~(1<<preferred_reg);
538 cur->isconst&=~(1<<preferred_reg);
539 return;
540 }
541
542 // Clear any unneeded registers
543 // We try to keep the mapping consistent, if possible, because it
544 // makes branches easier (especially loops). So we try to allocate
545 // first (see above) before removing old mappings. If this is not
546 // possible then go ahead and clear out the registers that are no
547 // longer needed.
548 for(hr=HOST_REGS-1;hr>=0;hr--)
549 {
550 r=cur->regmap[hr];
551 if(r>=0) {
552 if(r<64) {
553 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
554 }
555 else
556 {
557 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
558 }
559 }
560 }
561 // Try to allocate any available register, but prefer
562 // registers that have not been used recently.
563 if(i>0) {
564 for(hr=0;hr<HOST_REGS;hr++) {
565 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
566 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
567 cur->regmap[hr]=reg|64;
568 cur->dirty&=~(1<<hr);
569 cur->isconst&=~(1<<hr);
570 return;
571 }
572 }
573 }
574 }
575 // Try to allocate any available register
576 for(hr=0;hr<HOST_REGS;hr++) {
577 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
578 cur->regmap[hr]=reg|64;
579 cur->dirty&=~(1<<hr);
580 cur->isconst&=~(1<<hr);
581 return;
582 }
583 }
584
585 // Ok, now we have to evict someone
586 // Pick a register we hopefully won't need soon
587 u_char hsn[MAXREG+1];
588 memset(hsn,10,sizeof(hsn));
589 int j;
590 lsn(hsn,i,&preferred_reg);
591 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
592 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
593 if(i>0) {
594 // Don't evict the cycle count at entry points, otherwise the entry
595 // stub will have to write it.
596 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
597 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
598 for(j=10;j>=3;j--)
599 {
600 // Alloc preferred register if available
601 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
602 for(hr=0;hr<HOST_REGS;hr++) {
603 // Evict both parts of a 64-bit register
604 if((cur->regmap[hr]&63)==r) {
605 cur->regmap[hr]=-1;
606 cur->dirty&=~(1<<hr);
607 cur->isconst&=~(1<<hr);
608 }
609 }
610 cur->regmap[preferred_reg]=reg|64;
611 return;
612 }
613 for(r=1;r<=MAXREG;r++)
614 {
615 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
616 for(hr=0;hr<HOST_REGS;hr++) {
617 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
618 if(cur->regmap[hr]==r+64) {
619 cur->regmap[hr]=reg|64;
620 cur->dirty&=~(1<<hr);
621 cur->isconst&=~(1<<hr);
622 return;
623 }
624 }
625 }
626 for(hr=0;hr<HOST_REGS;hr++) {
627 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
628 if(cur->regmap[hr]==r) {
629 cur->regmap[hr]=reg|64;
630 cur->dirty&=~(1<<hr);
631 cur->isconst&=~(1<<hr);
632 return;
633 }
634 }
635 }
636 }
637 }
638 }
639 }
640 for(j=10;j>=0;j--)
641 {
642 for(r=1;r<=MAXREG;r++)
643 {
644 if(hsn[r]==j) {
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r+64) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 for(hr=0;hr<HOST_REGS;hr++) {
654 if(cur->regmap[hr]==r) {
655 cur->regmap[hr]=reg|64;
656 cur->dirty&=~(1<<hr);
657 cur->isconst&=~(1<<hr);
658 return;
659 }
660 }
661 }
662 }
663 }
664 SysPrintf("This shouldn't happen");exit(1);
665}
666
667// Allocate a temporary register. This is done without regard to
668// dirty status or whether the register we request is on the unneeded list
669// Note: This will only allocate one register, even if called multiple times
670static void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
671{
672 int r,hr;
673 int preferred_reg = -1;
674
675 // see if it's already allocated
676 for(hr=0;hr<HOST_REGS;hr++)
677 {
678 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
679 }
680
681 // Try to allocate any available register
682 for(hr=HOST_REGS-1;hr>=0;hr--) {
683 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
684 cur->regmap[hr]=reg;
685 cur->dirty&=~(1<<hr);
686 cur->isconst&=~(1<<hr);
687 return;
688 }
689 }
690
691 // Find an unneeded register
692 for(hr=HOST_REGS-1;hr>=0;hr--)
693 {
694 r=cur->regmap[hr];
695 if(r>=0) {
696 if(r<64) {
697 if((cur->u>>r)&1) {
698 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
699 cur->regmap[hr]=reg;
700 cur->dirty&=~(1<<hr);
701 cur->isconst&=~(1<<hr);
702 return;
703 }
704 }
705 }
706 else
707 {
708 if((cur->uu>>(r&63))&1) {
709 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
710 cur->regmap[hr]=reg;
711 cur->dirty&=~(1<<hr);
712 cur->isconst&=~(1<<hr);
713 return;
714 }
715 }
716 }
717 }
718 }
719
720 // Ok, now we have to evict someone
721 // Pick a register we hopefully won't need soon
722 // TODO: we might want to follow unconditional jumps here
723 // TODO: get rid of dupe code and make this into a function
724 u_char hsn[MAXREG+1];
725 memset(hsn,10,sizeof(hsn));
726 int j;
727 lsn(hsn,i,&preferred_reg);
728 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
729 if(i>0) {
730 // Don't evict the cycle count at entry points, otherwise the entry
731 // stub will have to write it.
732 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
733 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
734 for(j=10;j>=3;j--)
735 {
736 for(r=1;r<=MAXREG;r++)
737 {
738 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
739 for(hr=0;hr<HOST_REGS;hr++) {
740 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
741 if(cur->regmap[hr]==r+64) {
742 cur->regmap[hr]=reg;
743 cur->dirty&=~(1<<hr);
744 cur->isconst&=~(1<<hr);
745 return;
746 }
747 }
748 }
749 for(hr=0;hr<HOST_REGS;hr++) {
750 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
751 if(cur->regmap[hr]==r) {
752 cur->regmap[hr]=reg;
753 cur->dirty&=~(1<<hr);
754 cur->isconst&=~(1<<hr);
755 return;
756 }
757 }
758 }
759 }
760 }
761 }
762 }
763 for(j=10;j>=0;j--)
764 {
765 for(r=1;r<=MAXREG;r++)
766 {
767 if(hsn[r]==j) {
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r+64) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 for(hr=0;hr<HOST_REGS;hr++) {
777 if(cur->regmap[hr]==r) {
778 cur->regmap[hr]=reg;
779 cur->dirty&=~(1<<hr);
780 cur->isconst&=~(1<<hr);
781 return;
782 }
783 }
784 }
785 }
786 }
787 SysPrintf("This shouldn't happen");exit(1);
788}
789
790// Allocate a specific ARM register.
791static void alloc_arm_reg(struct regstat *cur,int i,signed char reg,int hr)
792{
793 int n;
794 int dirty=0;
795
796 // see if it's already allocated (and dealloc it)
797 for(n=0;n<HOST_REGS;n++)
798 {
799 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
800 dirty=(cur->dirty>>n)&1;
801 cur->regmap[n]=-1;
802 }
803 }
804
805 cur->regmap[hr]=reg;
806 cur->dirty&=~(1<<hr);
807 cur->dirty|=dirty<<hr;
808 cur->isconst&=~(1<<hr);
809}
810
811// Alloc cycle count into dedicated register
812static void alloc_cc(struct regstat *cur,int i)
813{
814 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
815}
816
817/* Special alloc */
818
819
820/* Assembler */
821
822static unused char regname[16][4] = {
823 "r0",
824 "r1",
825 "r2",
826 "r3",
827 "r4",
828 "r5",
829 "r6",
830 "r7",
831 "r8",
832 "r9",
833 "r10",
834 "fp",
835 "r12",
836 "sp",
837 "lr",
838 "pc"};
839
840static void output_w32(u_int word)
841{
842 *((u_int *)out)=word;
843 out+=4;
844}
845
846static u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
847{
848 assert(rd<16);
849 assert(rn<16);
850 assert(rm<16);
851 return((rn<<16)|(rd<<12)|rm);
852}
853
854static u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
855{
856 assert(rd<16);
857 assert(rn<16);
858 assert(imm<256);
859 assert((shift&1)==0);
860 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
861}
862
863static u_int genimm(u_int imm,u_int *encoded)
864{
865 *encoded=0;
866 if(imm==0) return 1;
867 int i=32;
868 while(i>0)
869 {
870 if(imm<256) {
871 *encoded=((i&30)<<7)|imm;
872 return 1;
873 }
874 imm=(imm>>2)|(imm<<30);i-=2;
875 }
876 return 0;
877}
878
879static void genimm_checked(u_int imm,u_int *encoded)
880{
881 u_int ret=genimm(imm,encoded);
882 assert(ret);
883 (void)ret;
884}
885
886static u_int genjmp(u_int addr)
887{
888 int offset=addr-(int)out-8;
889 if(offset<-33554432||offset>=33554432) {
890 if (addr>2) {
891 SysPrintf("genjmp: out of range: %08x\n", offset);
892 exit(1);
893 }
894 return 0;
895 }
896 return ((u_int)offset>>2)&0xffffff;
897}
898
899static void emit_mov(int rs,int rt)
900{
901 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
902 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
903}
904
905static void emit_movs(int rs,int rt)
906{
907 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
908 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
909}
910
911static void emit_add(int rs1,int rs2,int rt)
912{
913 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
914 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
915}
916
917static void emit_adds(int rs1,int rs2,int rt)
918{
919 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
920 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
921}
922
923static void emit_adcs(int rs1,int rs2,int rt)
924{
925 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
926 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
927}
928
929static void emit_sbc(int rs1,int rs2,int rt)
930{
931 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
932 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
933}
934
935static void emit_sbcs(int rs1,int rs2,int rt)
936{
937 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
938 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
939}
940
941static void emit_neg(int rs, int rt)
942{
943 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
944 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
945}
946
947static void emit_negs(int rs, int rt)
948{
949 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
950 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
951}
952
953static void emit_sub(int rs1,int rs2,int rt)
954{
955 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
956 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
957}
958
959static void emit_subs(int rs1,int rs2,int rt)
960{
961 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
962 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
963}
964
965static void emit_zeroreg(int rt)
966{
967 assem_debug("mov %s,#0\n",regname[rt]);
968 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
969}
970
971static void emit_loadlp(u_int imm,u_int rt)
972{
973 add_literal((int)out,imm);
974 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
975 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
976}
977
978static void emit_movw(u_int imm,u_int rt)
979{
980 assert(imm<65536);
981 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
982 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
983}
984
985static void emit_movt(u_int imm,u_int rt)
986{
987 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
988 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
989}
990
991static void emit_movimm(u_int imm,u_int rt)
992{
993 u_int armval;
994 if(genimm(imm,&armval)) {
995 assem_debug("mov %s,#%d\n",regname[rt],imm);
996 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
997 }else if(genimm(~imm,&armval)) {
998 assem_debug("mvn %s,#%d\n",regname[rt],imm);
999 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1000 }else if(imm<65536) {
1001 #ifndef HAVE_ARMV7
1002 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1003 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1004 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1005 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1006 #else
1007 emit_movw(imm,rt);
1008 #endif
1009 }else{
1010 #ifndef HAVE_ARMV7
1011 emit_loadlp(imm,rt);
1012 #else
1013 emit_movw(imm&0x0000FFFF,rt);
1014 emit_movt(imm&0xFFFF0000,rt);
1015 #endif
1016 }
1017}
1018
1019static void emit_pcreladdr(u_int rt)
1020{
1021 assem_debug("add %s,pc,#?\n",regname[rt]);
1022 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1023}
1024
1025static void emit_loadreg(int r, int hr)
1026{
1027 if(r&64) {
1028 SysPrintf("64bit load in 32bit mode!\n");
1029 assert(0);
1030 return;
1031 }
1032 if((r&63)==0)
1033 emit_zeroreg(hr);
1034 else {
1035 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1036 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1037 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1038 if(r==CCREG) addr=(int)&cycle_count;
1039 if(r==CSREG) addr=(int)&Status;
1040 if(r==FSREG) addr=(int)&FCR31;
1041 if(r==INVCP) addr=(int)&invc_ptr;
1042 u_int offset = addr-(u_int)&dynarec_local;
1043 assert(offset<4096);
1044 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1045 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1046 }
1047}
1048
1049static void emit_storereg(int r, int hr)
1050{
1051 if(r&64) {
1052 SysPrintf("64bit store in 32bit mode!\n");
1053 assert(0);
1054 return;
1055 }
1056 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
1057 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1058 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1059 if(r==CCREG) addr=(int)&cycle_count;
1060 if(r==FSREG) addr=(int)&FCR31;
1061 u_int offset = addr-(u_int)&dynarec_local;
1062 assert(offset<4096);
1063 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1064 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1065}
1066
1067static void emit_test(int rs, int rt)
1068{
1069 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1070 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1071}
1072
1073static void emit_testimm(int rs,int imm)
1074{
1075 u_int armval;
1076 assem_debug("tst %s,#%d\n",regname[rs],imm);
1077 genimm_checked(imm,&armval);
1078 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1079}
1080
1081static void emit_testeqimm(int rs,int imm)
1082{
1083 u_int armval;
1084 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
1085 genimm_checked(imm,&armval);
1086 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1087}
1088
1089static void emit_not(int rs,int rt)
1090{
1091 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1092 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1093}
1094
1095static void emit_mvnmi(int rs,int rt)
1096{
1097 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1098 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1099}
1100
1101static void emit_and(u_int rs1,u_int rs2,u_int rt)
1102{
1103 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1104 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1105}
1106
1107static void emit_or(u_int rs1,u_int rs2,u_int rt)
1108{
1109 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1110 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1111}
1112
1113static void emit_or_and_set_flags(int rs1,int rs2,int rt)
1114{
1115 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1116 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1117}
1118
1119static void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1120{
1121 assert(rs<16);
1122 assert(rt<16);
1123 assert(imm<32);
1124 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1125 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1126}
1127
1128static void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 assert(imm<32);
1133 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1134 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1135}
1136
1137static void emit_xor(u_int rs1,u_int rs2,u_int rt)
1138{
1139 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1140 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1141}
1142
1143static void emit_addimm(u_int rs,int imm,u_int rt)
1144{
1145 assert(rs<16);
1146 assert(rt<16);
1147 if(imm!=0) {
1148 u_int armval;
1149 if(genimm(imm,&armval)) {
1150 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1151 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1152 }else if(genimm(-imm,&armval)) {
1153 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
1154 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1155 #ifdef HAVE_ARMV7
1156 }else if(rt!=rs&&(u_int)imm<65536) {
1157 emit_movw(imm&0x0000ffff,rt);
1158 emit_add(rs,rt,rt);
1159 }else if(rt!=rs&&(u_int)-imm<65536) {
1160 emit_movw(-imm&0x0000ffff,rt);
1161 emit_sub(rs,rt,rt);
1162 #endif
1163 }else if((u_int)-imm<65536) {
1164 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1165 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1166 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1167 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1168 }else {
1169 do {
1170 int shift = (ffs(imm) - 1) & ~1;
1171 int imm8 = imm & (0xff << shift);
1172 genimm_checked(imm8,&armval);
1173 assem_debug("add %s,%s,#0x%x\n",regname[rt],regname[rs],imm8);
1174 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1175 rs = rt;
1176 imm &= ~imm8;
1177 }
1178 while (imm != 0);
1179 }
1180 }
1181 else if(rs!=rt) emit_mov(rs,rt);
1182}
1183
1184static void emit_addimm_and_set_flags(int imm,int rt)
1185{
1186 assert(imm>-65536&&imm<65536);
1187 u_int armval;
1188 if(genimm(imm,&armval)) {
1189 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1190 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1191 }else if(genimm(-imm,&armval)) {
1192 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1193 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1194 }else if(imm<0) {
1195 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1196 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1197 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1198 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1199 }else{
1200 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1201 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1202 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1203 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1204 }
1205}
1206
1207static void emit_addimm_no_flags(u_int imm,u_int rt)
1208{
1209 emit_addimm(rt,imm,rt);
1210}
1211
1212static void emit_addnop(u_int r)
1213{
1214 assert(r<16);
1215 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1216 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1217}
1218
1219static void emit_adcimm(u_int rs,int imm,u_int rt)
1220{
1221 u_int armval;
1222 genimm_checked(imm,&armval);
1223 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1224 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1225}
1226
1227static void emit_rscimm(int rs,int imm,u_int rt)
1228{
1229 assert(0);
1230 u_int armval;
1231 genimm_checked(imm,&armval);
1232 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1233 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1234}
1235
1236static void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1237{
1238 // TODO: if(genimm(imm,&armval)) ...
1239 // else
1240 emit_movimm(imm,HOST_TEMPREG);
1241 emit_adds(HOST_TEMPREG,rsl,rtl);
1242 emit_adcimm(rsh,0,rth);
1243}
1244
1245static void emit_andimm(int rs,int imm,int rt)
1246{
1247 u_int armval;
1248 if(imm==0) {
1249 emit_zeroreg(rt);
1250 }else if(genimm(imm,&armval)) {
1251 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1252 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1253 }else if(genimm(~imm,&armval)) {
1254 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1255 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1256 }else if(imm==65535) {
1257 #ifndef HAVE_ARMV6
1258 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1259 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1260 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1261 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1262 #else
1263 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1264 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1265 #endif
1266 }else{
1267 assert(imm>0&&imm<65535);
1268 #ifndef HAVE_ARMV7
1269 assem_debug("mov r14,#%d\n",imm&0xFF00);
1270 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1271 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1272 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1273 #else
1274 emit_movw(imm,HOST_TEMPREG);
1275 #endif
1276 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1277 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1278 }
1279}
1280
1281static void emit_orimm(int rs,int imm,int rt)
1282{
1283 u_int armval;
1284 if(imm==0) {
1285 if(rs!=rt) emit_mov(rs,rt);
1286 }else if(genimm(imm,&armval)) {
1287 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1288 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1289 }else{
1290 assert(imm>0&&imm<65536);
1291 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1292 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1293 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1294 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1295 }
1296}
1297
1298static void emit_xorimm(int rs,int imm,int rt)
1299{
1300 u_int armval;
1301 if(imm==0) {
1302 if(rs!=rt) emit_mov(rs,rt);
1303 }else if(genimm(imm,&armval)) {
1304 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1305 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1306 }else{
1307 assert(imm>0&&imm<65536);
1308 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1309 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1310 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1311 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1312 }
1313}
1314
1315static void emit_shlimm(int rs,u_int imm,int rt)
1316{
1317 assert(imm>0);
1318 assert(imm<32);
1319 //if(imm==1) ...
1320 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1321 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1322}
1323
1324static void emit_lsls_imm(int rs,int imm,int rt)
1325{
1326 assert(imm>0);
1327 assert(imm<32);
1328 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1329 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1330}
1331
1332static unused void emit_lslpls_imm(int rs,int imm,int rt)
1333{
1334 assert(imm>0);
1335 assert(imm<32);
1336 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1337 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1338}
1339
1340static void emit_shrimm(int rs,u_int imm,int rt)
1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1345 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1346}
1347
1348static void emit_sarimm(int rs,u_int imm,int rt)
1349{
1350 assert(imm>0);
1351 assert(imm<32);
1352 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1353 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1354}
1355
1356static void emit_rorimm(int rs,u_int imm,int rt)
1357{
1358 assert(imm>0);
1359 assert(imm<32);
1360 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1361 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1362}
1363
1364static void emit_signextend16(int rs,int rt)
1365{
1366 #ifndef HAVE_ARMV6
1367 emit_shlimm(rs,16,rt);
1368 emit_sarimm(rt,16,rt);
1369 #else
1370 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1371 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1372 #endif
1373}
1374
1375static void emit_signextend8(int rs,int rt)
1376{
1377 #ifndef HAVE_ARMV6
1378 emit_shlimm(rs,24,rt);
1379 emit_sarimm(rt,24,rt);
1380 #else
1381 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1382 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1383 #endif
1384}
1385
1386static void emit_shl(u_int rs,u_int shift,u_int rt)
1387{
1388 assert(rs<16);
1389 assert(rt<16);
1390 assert(shift<16);
1391 //if(imm==1) ...
1392 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1393 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1394}
1395
1396static void emit_shr(u_int rs,u_int shift,u_int rt)
1397{
1398 assert(rs<16);
1399 assert(rt<16);
1400 assert(shift<16);
1401 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1402 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1403}
1404
1405static void emit_sar(u_int rs,u_int shift,u_int rt)
1406{
1407 assert(rs<16);
1408 assert(rt<16);
1409 assert(shift<16);
1410 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1411 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1412}
1413
1414static void emit_orrshl(u_int rs,u_int shift,u_int rt)
1415{
1416 assert(rs<16);
1417 assert(rt<16);
1418 assert(shift<16);
1419 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1420 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1421}
1422
1423static void emit_orrshr(u_int rs,u_int shift,u_int rt)
1424{
1425 assert(rs<16);
1426 assert(rt<16);
1427 assert(shift<16);
1428 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1429 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1430}
1431
1432static void emit_cmpimm(int rs,int imm)
1433{
1434 u_int armval;
1435 if(genimm(imm,&armval)) {
1436 assem_debug("cmp %s,#%d\n",regname[rs],imm);
1437 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1438 }else if(genimm(-imm,&armval)) {
1439 assem_debug("cmn %s,#%d\n",regname[rs],imm);
1440 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1441 }else if(imm>0) {
1442 assert(imm<65536);
1443 emit_movimm(imm,HOST_TEMPREG);
1444 assem_debug("cmp %s,r14\n",regname[rs]);
1445 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1446 }else{
1447 assert(imm>-65536);
1448 emit_movimm(-imm,HOST_TEMPREG);
1449 assem_debug("cmn %s,r14\n",regname[rs]);
1450 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1451 }
1452}
1453
1454static void emit_cmovne_imm(int imm,int rt)
1455{
1456 assem_debug("movne %s,#%d\n",regname[rt],imm);
1457 u_int armval;
1458 genimm_checked(imm,&armval);
1459 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1460}
1461
1462static void emit_cmovl_imm(int imm,int rt)
1463{
1464 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1465 u_int armval;
1466 genimm_checked(imm,&armval);
1467 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1468}
1469
1470static void emit_cmovb_imm(int imm,int rt)
1471{
1472 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1473 u_int armval;
1474 genimm_checked(imm,&armval);
1475 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1476}
1477
1478static void emit_cmovs_imm(int imm,int rt)
1479{
1480 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1481 u_int armval;
1482 genimm_checked(imm,&armval);
1483 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1484}
1485
1486static void emit_cmovne_reg(int rs,int rt)
1487{
1488 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1489 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1490}
1491
1492static void emit_cmovl_reg(int rs,int rt)
1493{
1494 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1495 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1496}
1497
1498static void emit_cmovs_reg(int rs,int rt)
1499{
1500 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1501 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1502}
1503
1504static void emit_slti32(int rs,int imm,int rt)
1505{
1506 if(rs!=rt) emit_zeroreg(rt);
1507 emit_cmpimm(rs,imm);
1508 if(rs==rt) emit_movimm(0,rt);
1509 emit_cmovl_imm(1,rt);
1510}
1511
1512static void emit_sltiu32(int rs,int imm,int rt)
1513{
1514 if(rs!=rt) emit_zeroreg(rt);
1515 emit_cmpimm(rs,imm);
1516 if(rs==rt) emit_movimm(0,rt);
1517 emit_cmovb_imm(1,rt);
1518}
1519
1520static void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1521{
1522 assert(rsh!=rt);
1523 emit_slti32(rsl,imm,rt);
1524 if(imm>=0)
1525 {
1526 emit_test(rsh,rsh);
1527 emit_cmovne_imm(0,rt);
1528 emit_cmovs_imm(1,rt);
1529 }
1530 else
1531 {
1532 emit_cmpimm(rsh,-1);
1533 emit_cmovne_imm(0,rt);
1534 emit_cmovl_imm(1,rt);
1535 }
1536}
1537
1538static void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1539{
1540 assert(rsh!=rt);
1541 emit_sltiu32(rsl,imm,rt);
1542 if(imm>=0)
1543 {
1544 emit_test(rsh,rsh);
1545 emit_cmovne_imm(0,rt);
1546 }
1547 else
1548 {
1549 emit_cmpimm(rsh,-1);
1550 emit_cmovne_imm(1,rt);
1551 }
1552}
1553
1554static void emit_cmp(int rs,int rt)
1555{
1556 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1557 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1558}
1559
1560static void emit_set_gz32(int rs, int rt)
1561{
1562 //assem_debug("set_gz32\n");
1563 emit_cmpimm(rs,1);
1564 emit_movimm(1,rt);
1565 emit_cmovl_imm(0,rt);
1566}
1567
1568static void emit_set_nz32(int rs, int rt)
1569{
1570 //assem_debug("set_nz32\n");
1571 if(rs!=rt) emit_movs(rs,rt);
1572 else emit_test(rs,rs);
1573 emit_cmovne_imm(1,rt);
1574}
1575
1576static void emit_set_gz64_32(int rsh, int rsl, int rt)
1577{
1578 //assem_debug("set_gz64\n");
1579 emit_set_gz32(rsl,rt);
1580 emit_test(rsh,rsh);
1581 emit_cmovne_imm(1,rt);
1582 emit_cmovs_imm(0,rt);
1583}
1584
1585static void emit_set_nz64_32(int rsh, int rsl, int rt)
1586{
1587 //assem_debug("set_nz64\n");
1588 emit_or_and_set_flags(rsh,rsl,rt);
1589 emit_cmovne_imm(1,rt);
1590}
1591
1592static void emit_set_if_less32(int rs1, int rs2, int rt)
1593{
1594 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1595 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1596 emit_cmp(rs1,rs2);
1597 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1598 emit_cmovl_imm(1,rt);
1599}
1600
1601static void emit_set_if_carry32(int rs1, int rs2, int rt)
1602{
1603 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1604 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1605 emit_cmp(rs1,rs2);
1606 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1607 emit_cmovb_imm(1,rt);
1608}
1609
1610static void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1611{
1612 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1613 assert(u1!=rt);
1614 assert(u2!=rt);
1615 emit_cmp(l1,l2);
1616 emit_movimm(0,rt);
1617 emit_sbcs(u1,u2,HOST_TEMPREG);
1618 emit_cmovl_imm(1,rt);
1619}
1620
1621static void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1622{
1623 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1624 assert(u1!=rt);
1625 assert(u2!=rt);
1626 emit_cmp(l1,l2);
1627 emit_movimm(0,rt);
1628 emit_sbcs(u1,u2,HOST_TEMPREG);
1629 emit_cmovb_imm(1,rt);
1630}
1631
1632#ifdef DRC_DBG
1633extern void gen_interupt();
1634extern void do_insn_cmp();
1635#define FUNCNAME(f) { (intptr_t)f, " " #f }
1636static const struct {
1637 intptr_t addr;
1638 const char *name;
1639} function_names[] = {
1640 FUNCNAME(cc_interrupt),
1641 FUNCNAME(gen_interupt),
1642 FUNCNAME(get_addr_ht),
1643 FUNCNAME(get_addr),
1644 FUNCNAME(jump_handler_read8),
1645 FUNCNAME(jump_handler_read16),
1646 FUNCNAME(jump_handler_read32),
1647 FUNCNAME(jump_handler_write8),
1648 FUNCNAME(jump_handler_write16),
1649 FUNCNAME(jump_handler_write32),
1650 FUNCNAME(invalidate_addr),
1651 FUNCNAME(verify_code_vm),
1652 FUNCNAME(verify_code),
1653 FUNCNAME(jump_hlecall),
1654 FUNCNAME(jump_syscall_hle),
1655 FUNCNAME(new_dyna_leave),
1656 FUNCNAME(pcsx_mtc0),
1657 FUNCNAME(pcsx_mtc0_ds),
1658 FUNCNAME(do_insn_cmp),
1659};
1660
1661static const char *func_name(intptr_t a)
1662{
1663 int i;
1664 for (i = 0; i < sizeof(function_names)/sizeof(function_names[0]); i++)
1665 if (function_names[i].addr == a)
1666 return function_names[i].name;
1667 return "";
1668}
1669#else
1670#define func_name(x) ""
1671#endif
1672
1673static void emit_call(const void *a_)
1674{
1675 int a = (int)a_;
1676 assem_debug("bl %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1677 u_int offset=genjmp(a);
1678 output_w32(0xeb000000|offset);
1679}
1680
1681static void emit_jmp(const void *a_)
1682{
1683 int a = (int)a_;
1684 assem_debug("b %x (%x+%x)%s\n",a,(int)out,a-(int)out-8,func_name(a));
1685 u_int offset=genjmp(a);
1686 output_w32(0xea000000|offset);
1687}
1688
1689static void emit_jne(const void *a_)
1690{
1691 int a = (int)a_;
1692 assem_debug("bne %x\n",a);
1693 u_int offset=genjmp(a);
1694 output_w32(0x1a000000|offset);
1695}
1696
1697static void emit_jeq(int a)
1698{
1699 assem_debug("beq %x\n",a);
1700 u_int offset=genjmp(a);
1701 output_w32(0x0a000000|offset);
1702}
1703
1704static void emit_js(int a)
1705{
1706 assem_debug("bmi %x\n",a);
1707 u_int offset=genjmp(a);
1708 output_w32(0x4a000000|offset);
1709}
1710
1711static void emit_jns(int a)
1712{
1713 assem_debug("bpl %x\n",a);
1714 u_int offset=genjmp(a);
1715 output_w32(0x5a000000|offset);
1716}
1717
1718static void emit_jl(int a)
1719{
1720 assem_debug("blt %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0xba000000|offset);
1723}
1724
1725static void emit_jge(int a)
1726{
1727 assem_debug("bge %x\n",a);
1728 u_int offset=genjmp(a);
1729 output_w32(0xaa000000|offset);
1730}
1731
1732static void emit_jno(int a)
1733{
1734 assem_debug("bvc %x\n",a);
1735 u_int offset=genjmp(a);
1736 output_w32(0x7a000000|offset);
1737}
1738
1739static void emit_jc(int a)
1740{
1741 assem_debug("bcs %x\n",a);
1742 u_int offset=genjmp(a);
1743 output_w32(0x2a000000|offset);
1744}
1745
1746static void emit_jcc(void *a_)
1747{
1748 int a = (int)a_;
1749 assem_debug("bcc %x\n",a);
1750 u_int offset=genjmp(a);
1751 output_w32(0x3a000000|offset);
1752}
1753
1754static void emit_callreg(u_int r)
1755{
1756 assert(r<15);
1757 assem_debug("blx %s\n",regname[r]);
1758 output_w32(0xe12fff30|r);
1759}
1760
1761static void emit_jmpreg(u_int r)
1762{
1763 assem_debug("mov pc,%s\n",regname[r]);
1764 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1765}
1766
1767static void emit_readword_indexed(int offset, int rs, int rt)
1768{
1769 assert(offset>-4096&&offset<4096);
1770 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1771 if(offset>=0) {
1772 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1773 }else{
1774 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1775 }
1776}
1777
1778static void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1779{
1780 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1781 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1782}
1783
1784static void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1785{
1786 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1787 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1788}
1789
1790static void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1791{
1792 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1793 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1794}
1795
1796static void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1797{
1798 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1799 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1800}
1801
1802static void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1803{
1804 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1805 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1806}
1807
1808static void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1809{
1810 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1811 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1812}
1813
1814static void emit_movsbl_indexed(int offset, int rs, int rt)
1815{
1816 assert(offset>-256&&offset<256);
1817 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1818 if(offset>=0) {
1819 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1820 }else{
1821 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1822 }
1823}
1824
1825static void emit_movswl_indexed(int offset, int rs, int rt)
1826{
1827 assert(offset>-256&&offset<256);
1828 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1829 if(offset>=0) {
1830 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1831 }else{
1832 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1833 }
1834}
1835
1836static void emit_movzbl_indexed(int offset, int rs, int rt)
1837{
1838 assert(offset>-4096&&offset<4096);
1839 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1840 if(offset>=0) {
1841 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1842 }else{
1843 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1844 }
1845}
1846
1847static void emit_movzwl_indexed(int offset, int rs, int rt)
1848{
1849 assert(offset>-256&&offset<256);
1850 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1851 if(offset>=0) {
1852 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1853 }else{
1854 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1855 }
1856}
1857
1858static void emit_ldrd(int offset, int rs, int rt)
1859{
1860 assert(offset>-256&&offset<256);
1861 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1862 if(offset>=0) {
1863 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1864 }else{
1865 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1866 }
1867}
1868
1869static void emit_readword(void *addr, int rt)
1870{
1871 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1872 assert(offset<4096);
1873 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1874 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1875}
1876
1877static void emit_writeword_indexed(int rt, int offset, int rs)
1878{
1879 assert(offset>-4096&&offset<4096);
1880 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1881 if(offset>=0) {
1882 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1883 }else{
1884 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
1885 }
1886}
1887
1888static void emit_writehword_indexed(int rt, int offset, int rs)
1889{
1890 assert(offset>-256&&offset<256);
1891 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
1892 if(offset>=0) {
1893 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1894 }else{
1895 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1896 }
1897}
1898
1899static void emit_writebyte_indexed(int rt, int offset, int rs)
1900{
1901 assert(offset>-4096&&offset<4096);
1902 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
1903 if(offset>=0) {
1904 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
1905 }else{
1906 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
1907 }
1908}
1909
1910static void emit_strcc_dualindexed(int rs1, int rs2, int rt)
1911{
1912 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
1914}
1915
1916static void emit_strccb_dualindexed(int rs1, int rs2, int rt)
1917{
1918 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1919 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
1920}
1921
1922static void emit_strcch_dualindexed(int rs1, int rs2, int rt)
1923{
1924 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1925 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
1926}
1927
1928static void emit_writeword(int rt, void *addr)
1929{
1930 uintptr_t offset = (u_char *)addr - (u_char *)&dynarec_local;
1931 assert(offset<4096);
1932 assem_debug("str %s,fp+%d\n",regname[rt],offset);
1933 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
1934}
1935
1936static void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1937{
1938 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1939 assert(rs1<16);
1940 assert(rs2<16);
1941 assert(hi<16);
1942 assert(lo<16);
1943 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1944}
1945
1946static void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
1947{
1948 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
1949 assert(rs1<16);
1950 assert(rs2<16);
1951 assert(hi<16);
1952 assert(lo<16);
1953 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
1954}
1955
1956static void emit_clz(int rs,int rt)
1957{
1958 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
1959 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
1960}
1961
1962static void emit_subcs(int rs1,int rs2,int rt)
1963{
1964 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1965 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
1966}
1967
1968static void emit_shrcc_imm(int rs,u_int imm,int rt)
1969{
1970 assert(imm>0);
1971 assert(imm<32);
1972 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1973 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1974}
1975
1976static void emit_shrne_imm(int rs,u_int imm,int rt)
1977{
1978 assert(imm>0);
1979 assert(imm<32);
1980 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
1981 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1982}
1983
1984static void emit_negmi(int rs, int rt)
1985{
1986 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
1987 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
1988}
1989
1990static void emit_negsmi(int rs, int rt)
1991{
1992 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
1993 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
1994}
1995
1996static void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
1997{
1998 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
1999 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2000}
2001
2002static void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2003{
2004 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2005 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2006}
2007
2008static void emit_teq(int rs, int rt)
2009{
2010 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2011 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2012}
2013
2014static void emit_rsbimm(int rs, int imm, int rt)
2015{
2016 u_int armval;
2017 genimm_checked(imm,&armval);
2018 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2019 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2020}
2021
2022// Load 2 immediates optimizing for small code size
2023static void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2024{
2025 emit_movimm(imm1,rt1);
2026 u_int armval;
2027 if(genimm(imm2-imm1,&armval)) {
2028 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2029 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2030 }else if(genimm(imm1-imm2,&armval)) {
2031 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2032 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2033 }
2034 else emit_movimm(imm2,rt2);
2035}
2036
2037// Conditionally select one of two immediates, optimizing for small code size
2038// This will only be called if HAVE_CMOV_IMM is defined
2039static void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2040{
2041 u_int armval;
2042 if(genimm(imm2-imm1,&armval)) {
2043 emit_movimm(imm1,rt);
2044 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2045 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2046 }else if(genimm(imm1-imm2,&armval)) {
2047 emit_movimm(imm1,rt);
2048 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2049 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2050 }
2051 else {
2052 #ifndef HAVE_ARMV7
2053 emit_movimm(imm1,rt);
2054 add_literal((int)out,imm2);
2055 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2056 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2057 #else
2058 emit_movw(imm1&0x0000FFFF,rt);
2059 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2060 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2061 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2062 }
2063 emit_movt(imm1&0xFFFF0000,rt);
2064 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2065 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2066 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2067 }
2068 #endif
2069 }
2070}
2071
2072// special case for checking invalid_code
2073static void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2074{
2075 assert(imm<128&&imm>=0);
2076 assert(r>=0&&r<16);
2077 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2078 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2079 emit_cmpimm(HOST_TEMPREG,imm);
2080}
2081
2082static void emit_callne(int a)
2083{
2084 assem_debug("blne %x\n",a);
2085 u_int offset=genjmp(a);
2086 output_w32(0x1b000000|offset);
2087}
2088
2089// Used to preload hash table entries
2090static unused void emit_prefetchreg(int r)
2091{
2092 assem_debug("pld %s\n",regname[r]);
2093 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2094}
2095
2096// Special case for mini_ht
2097static void emit_ldreq_indexed(int rs, u_int offset, int rt)
2098{
2099 assert(offset<4096);
2100 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2101 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2102}
2103
2104static void emit_orrne_imm(int rs,int imm,int rt)
2105{
2106 u_int armval;
2107 genimm_checked(imm,&armval);
2108 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2109 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2110}
2111
2112static void emit_andne_imm(int rs,int imm,int rt)
2113{
2114 u_int armval;
2115 genimm_checked(imm,&armval);
2116 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2117 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2118}
2119
2120static unused void emit_addpl_imm(int rs,int imm,int rt)
2121{
2122 u_int armval;
2123 genimm_checked(imm,&armval);
2124 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2125 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2126}
2127
2128static void emit_jno_unlikely(int a)
2129{
2130 //emit_jno(a);
2131 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2132 output_w32(0x72800000|rd_rn_rm(15,15,0));
2133}
2134
2135static void save_regs_all(u_int reglist)
2136{
2137 int i;
2138 if(!reglist) return;
2139 assem_debug("stmia fp,{");
2140 for(i=0;i<16;i++)
2141 if(reglist&(1<<i))
2142 assem_debug("r%d,",i);
2143 assem_debug("}\n");
2144 output_w32(0xe88b0000|reglist);
2145}
2146
2147static void restore_regs_all(u_int reglist)
2148{
2149 int i;
2150 if(!reglist) return;
2151 assem_debug("ldmia fp,{");
2152 for(i=0;i<16;i++)
2153 if(reglist&(1<<i))
2154 assem_debug("r%d,",i);
2155 assem_debug("}\n");
2156 output_w32(0xe89b0000|reglist);
2157}
2158
2159// Save registers before function call
2160static void save_regs(u_int reglist)
2161{
2162 reglist&=CALLER_SAVE_REGS; // only save the caller-save registers, r0-r3, r12
2163 save_regs_all(reglist);
2164}
2165
2166// Restore registers after function call
2167static void restore_regs(u_int reglist)
2168{
2169 reglist&=CALLER_SAVE_REGS;
2170 restore_regs_all(reglist);
2171}
2172
2173/* Stubs/epilogue */
2174
2175static void literal_pool(int n)
2176{
2177 if(!literalcount) return;
2178 if(n) {
2179 if((int)out-literals[0][0]<4096-n) return;
2180 }
2181 u_int *ptr;
2182 int i;
2183 for(i=0;i<literalcount;i++)
2184 {
2185 u_int l_addr=(u_int)out;
2186 int j;
2187 for(j=0;j<i;j++) {
2188 if(literals[j][1]==literals[i][1]) {
2189 //printf("dup %08x\n",literals[i][1]);
2190 l_addr=literals[j][0];
2191 break;
2192 }
2193 }
2194 ptr=(u_int *)literals[i][0];
2195 u_int offset=l_addr-(u_int)ptr-8;
2196 assert(offset<4096);
2197 assert(!(offset&3));
2198 *ptr|=offset;
2199 if(l_addr==(u_int)out) {
2200 literals[i][0]=l_addr; // remember for dupes
2201 output_w32(literals[i][1]);
2202 }
2203 }
2204 literalcount=0;
2205}
2206
2207static void literal_pool_jumpover(int n)
2208{
2209 if(!literalcount) return;
2210 if(n) {
2211 if((int)out-literals[0][0]<4096-n) return;
2212 }
2213 void *jaddr = out;
2214 emit_jmp(0);
2215 literal_pool(0);
2216 set_jump_target(jaddr, out);
2217}
2218
2219static void emit_extjump2(u_char *addr, int target, void *linker)
2220{
2221 u_char *ptr=(u_char *)addr;
2222 assert((ptr[3]&0x0e)==0xa);
2223 (void)ptr;
2224
2225 emit_loadlp(target,0);
2226 emit_loadlp((u_int)addr,1);
2227 assert(addr>=translation_cache&&addr<(translation_cache+(1<<TARGET_SIZE_2)));
2228 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2229//DEBUG >
2230#ifdef DEBUG_CYCLE_COUNT
2231 emit_readword(&last_count,ECX);
2232 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2233 emit_readword(&next_interupt,ECX);
2234 emit_writeword(HOST_CCREG,&Count);
2235 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2236 emit_writeword(ECX,&last_count);
2237#endif
2238//DEBUG <
2239 emit_jmp(linker);
2240}
2241
2242static void emit_extjump(void *addr, int target)
2243{
2244 emit_extjump2(addr, target, dyna_linker);
2245}
2246
2247static void emit_extjump_ds(void *addr, int target)
2248{
2249 emit_extjump2(addr, target, dyna_linker_ds);
2250}
2251
2252// put rt_val into rt, potentially making use of rs with value rs_val
2253static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2254{
2255 u_int armval;
2256 int diff;
2257 if(genimm(rt_val,&armval)) {
2258 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2259 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2260 return;
2261 }
2262 if(genimm(~rt_val,&armval)) {
2263 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2264 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2265 return;
2266 }
2267 diff=rt_val-rs_val;
2268 if(genimm(diff,&armval)) {
2269 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2270 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2271 return;
2272 }else if(genimm(-diff,&armval)) {
2273 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2274 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2275 return;
2276 }
2277 emit_movimm(rt_val,rt);
2278}
2279
2280// return 1 if above function can do it's job cheaply
2281static int is_similar_value(u_int v1,u_int v2)
2282{
2283 u_int xs;
2284 int diff;
2285 if(v1==v2) return 1;
2286 diff=v2-v1;
2287 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
2288 ;
2289 if(xs<0x100) return 1;
2290 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2291 ;
2292 if(xs<0x100) return 1;
2293 return 0;
2294}
2295
2296// trashes r2
2297static void pass_args(int a0, int a1)
2298{
2299 if(a0==1&&a1==0) {
2300 // must swap
2301 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2302 }
2303 else if(a0!=0&&a1==0) {
2304 emit_mov(a1,1);
2305 if (a0>=0) emit_mov(a0,0);
2306 }
2307 else {
2308 if(a0>=0&&a0!=0) emit_mov(a0,0);
2309 if(a1>=0&&a1!=1) emit_mov(a1,1);
2310 }
2311}
2312
2313static void mov_loadtype_adj(enum stub_type type,int rs,int rt)
2314{
2315 switch(type) {
2316 case LOADB_STUB: emit_signextend8(rs,rt); break;
2317 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2318 case LOADH_STUB: emit_signextend16(rs,rt); break;
2319 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2320 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2321 default: assert(0);
2322 }
2323}
2324
2325#include "pcsxmem.h"
2326#include "pcsxmem_inline.c"
2327
2328static void do_readstub(int n)
2329{
2330 assem_debug("do_readstub %x\n",start+stubs[n].a*4);
2331 literal_pool(256);
2332 set_jump_target(stubs[n].addr, out);
2333 enum stub_type type=stubs[n].type;
2334 int i=stubs[n].a;
2335 int rs=stubs[n].b;
2336 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2337 u_int reglist=stubs[n].e;
2338 signed char *i_regmap=i_regs->regmap;
2339 int rt;
2340 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
2341 rt=get_reg(i_regmap,FTEMP);
2342 }else{
2343 rt=get_reg(i_regmap,rt1[i]);
2344 }
2345 assert(rs>=0);
2346 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2347 void *restore_jump = NULL;
2348 reglist|=(1<<rs);
2349 for(r=0;r<=12;r++) {
2350 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2351 temp=r; break;
2352 }
2353 }
2354 if(rt>=0&&rt1[i]!=0)
2355 reglist&=~(1<<rt);
2356 if(temp==-1) {
2357 save_regs(reglist);
2358 regs_saved=1;
2359 temp=(rs==0)?2:0;
2360 }
2361 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2362 temp2=1;
2363 emit_readword(&mem_rtab,temp);
2364 emit_shrimm(rs,12,temp2);
2365 emit_readword_dualindexedx4(temp,temp2,temp2);
2366 emit_lsls_imm(temp2,1,temp2);
2367 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2368 switch(type) {
2369 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2370 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2371 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2372 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2373 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2374 default: assert(0);
2375 }
2376 }
2377 if(regs_saved) {
2378 restore_jump=out;
2379 emit_jcc(0); // jump to reg restore
2380 }
2381 else
2382 emit_jcc(stubs[n].retaddr); // return address
2383
2384 if(!regs_saved)
2385 save_regs(reglist);
2386 void *handler=NULL;
2387 if(type==LOADB_STUB||type==LOADBU_STUB)
2388 handler=jump_handler_read8;
2389 if(type==LOADH_STUB||type==LOADHU_STUB)
2390 handler=jump_handler_read16;
2391 if(type==LOADW_STUB)
2392 handler=jump_handler_read32;
2393 assert(handler);
2394 pass_args(rs,temp2);
2395 int cc=get_reg(i_regmap,CCREG);
2396 if(cc<0)
2397 emit_loadreg(CCREG,2);
2398 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2399 emit_call(handler);
2400 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2401 mov_loadtype_adj(type,0,rt);
2402 }
2403 if(restore_jump)
2404 set_jump_target(restore_jump, out);
2405 restore_regs(reglist);
2406 emit_jmp(stubs[n].retaddr); // return address
2407}
2408
2409// return memhandler, or get directly accessable address and return 0
2410static void *get_direct_memhandler(void *table,u_int addr,enum stub_type type,u_int *addr_host)
2411{
2412 u_int l1,l2=0;
2413 l1=((u_int *)table)[addr>>12];
2414 if((l1&(1<<31))==0) {
2415 u_int v=l1<<1;
2416 *addr_host=v+addr;
2417 return NULL;
2418 }
2419 else {
2420 l1<<=1;
2421 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2422 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
2423 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
2424 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2425 else
2426 l2=((u_int *)l1)[(addr&0xfff)/4];
2427 if((l2&(1<<31))==0) {
2428 u_int v=l2<<1;
2429 *addr_host=v+(addr&0xfff);
2430 return NULL;
2431 }
2432 return (void *)(l2<<1);
2433 }
2434}
2435
2436static void inline_readstub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2437{
2438 int rs=get_reg(regmap,target);
2439 int rt=get_reg(regmap,target);
2440 if(rs<0) rs=get_reg(regmap,-1);
2441 assert(rs>=0);
2442 u_int host_addr=0,is_dynamic,far_call=0;
2443 void *handler;
2444 int cc=get_reg(regmap,CCREG);
2445 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
2446 return;
2447 handler = get_direct_memhandler(mem_rtab, addr, type, &host_addr);
2448 if (handler == NULL) {
2449 if(rt<0||rt1[i]==0)
2450 return;
2451 if(addr!=host_addr)
2452 emit_movimm_from(addr,rs,host_addr,rs);
2453 switch(type) {
2454 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
2455 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
2456 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
2457 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
2458 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
2459 default: assert(0);
2460 }
2461 return;
2462 }
2463 is_dynamic=pcsxmem_is_handler_dynamic(addr);
2464 if(is_dynamic) {
2465 if(type==LOADB_STUB||type==LOADBU_STUB)
2466 handler=jump_handler_read8;
2467 if(type==LOADH_STUB||type==LOADHU_STUB)
2468 handler=jump_handler_read16;
2469 if(type==LOADW_STUB)
2470 handler=jump_handler_read32;
2471 }
2472
2473 // call a memhandler
2474 if(rt>=0&&rt1[i]!=0)
2475 reglist&=~(1<<rt);
2476 save_regs(reglist);
2477 if(target==0)
2478 emit_movimm(addr,0);
2479 else if(rs!=0)
2480 emit_mov(rs,0);
2481 int offset=(u_char *)handler-out-8;
2482 if(offset<-33554432||offset>=33554432) {
2483 // unreachable memhandler, a plugin func perhaps
2484 emit_movimm((u_int)handler,12);
2485 far_call=1;
2486 }
2487 if(cc<0)
2488 emit_loadreg(CCREG,2);
2489 if(is_dynamic) {
2490 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
2491 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2492 }
2493 else {
2494 emit_readword(&last_count,3);
2495 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2496 emit_add(2,3,2);
2497 emit_writeword(2,&Count);
2498 }
2499
2500 if(far_call)
2501 emit_callreg(12);
2502 else
2503 emit_call(handler);
2504
2505 if(rt>=0&&rt1[i]!=0) {
2506 switch(type) {
2507 case LOADB_STUB: emit_signextend8(0,rt); break;
2508 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
2509 case LOADH_STUB: emit_signextend16(0,rt); break;
2510 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
2511 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
2512 default: assert(0);
2513 }
2514 }
2515 restore_regs(reglist);
2516}
2517
2518static void do_writestub(int n)
2519{
2520 assem_debug("do_writestub %x\n",start+stubs[n].a*4);
2521 literal_pool(256);
2522 set_jump_target(stubs[n].addr, out);
2523 enum stub_type type=stubs[n].type;
2524 int i=stubs[n].a;
2525 int rs=stubs[n].b;
2526 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2527 u_int reglist=stubs[n].e;
2528 signed char *i_regmap=i_regs->regmap;
2529 int rt,r;
2530 if(itype[i]==C1LS||itype[i]==C2LS) {
2531 rt=get_reg(i_regmap,r=FTEMP);
2532 }else{
2533 rt=get_reg(i_regmap,r=rs2[i]);
2534 }
2535 assert(rs>=0);
2536 assert(rt>=0);
2537 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0;
2538 void *restore_jump = NULL;
2539 int reglist2=reglist|(1<<rs)|(1<<rt);
2540 for(rtmp=0;rtmp<=12;rtmp++) {
2541 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
2542 temp=rtmp; break;
2543 }
2544 }
2545 if(temp==-1) {
2546 save_regs(reglist);
2547 regs_saved=1;
2548 for(rtmp=0;rtmp<=3;rtmp++)
2549 if(rtmp!=rs&&rtmp!=rt)
2550 {temp=rtmp;break;}
2551 }
2552 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
2553 temp2=3;
2554 emit_readword(&mem_wtab,temp);
2555 emit_shrimm(rs,12,temp2);
2556 emit_readword_dualindexedx4(temp,temp2,temp2);
2557 emit_lsls_imm(temp2,1,temp2);
2558 switch(type) {
2559 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
2560 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
2561 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
2562 default: assert(0);
2563 }
2564 if(regs_saved) {
2565 restore_jump=out;
2566 emit_jcc(0); // jump to reg restore
2567 }
2568 else
2569 emit_jcc(stubs[n].retaddr); // return address (invcode check)
2570
2571 if(!regs_saved)
2572 save_regs(reglist);
2573 void *handler=NULL;
2574 switch(type) {
2575 case STOREB_STUB: handler=jump_handler_write8; break;
2576 case STOREH_STUB: handler=jump_handler_write16; break;
2577 case STOREW_STUB: handler=jump_handler_write32; break;
2578 default: assert(0);
2579 }
2580 assert(handler);
2581 pass_args(rs,rt);
2582 if(temp2!=3)
2583 emit_mov(temp2,3);
2584 int cc=get_reg(i_regmap,CCREG);
2585 if(cc<0)
2586 emit_loadreg(CCREG,2);
2587 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2588 // returns new cycle_count
2589 emit_call(handler);
2590 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2591 if(cc<0)
2592 emit_storereg(CCREG,2);
2593 if(restore_jump)
2594 set_jump_target(restore_jump, out);
2595 restore_regs(reglist);
2596 emit_jmp(stubs[n].retaddr);
2597}
2598
2599static void inline_writestub(enum stub_type type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
2600{
2601 int rs=get_reg(regmap,-1);
2602 int rt=get_reg(regmap,target);
2603 assert(rs>=0);
2604 assert(rt>=0);
2605 u_int host_addr=0;
2606 void *handler = get_direct_memhandler(mem_wtab, addr, type, &host_addr);
2607 if (handler == NULL) {
2608 if(addr!=host_addr)
2609 emit_movimm_from(addr,rs,host_addr,rs);
2610 switch(type) {
2611 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
2612 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
2613 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
2614 default: assert(0);
2615 }
2616 return;
2617 }
2618
2619 // call a memhandler
2620 save_regs(reglist);
2621 pass_args(rs,rt);
2622 int cc=get_reg(regmap,CCREG);
2623 if(cc<0)
2624 emit_loadreg(CCREG,2);
2625 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
2626 emit_movimm((u_int)handler,3);
2627 // returns new cycle_count
2628 emit_call(jump_handler_write_h);
2629 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
2630 if(cc<0)
2631 emit_storereg(CCREG,2);
2632 restore_regs(reglist);
2633}
2634
2635static void do_unalignedwritestub(int n)
2636{
2637 assem_debug("do_unalignedwritestub %x\n",start+stubs[n].a*4);
2638 literal_pool(256);
2639 set_jump_target(stubs[n].addr, out);
2640
2641 int i=stubs[n].a;
2642 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2643 int addr=stubs[n].b;
2644 u_int reglist=stubs[n].e;
2645 signed char *i_regmap=i_regs->regmap;
2646 int temp2=get_reg(i_regmap,FTEMP);
2647 int rt;
2648 rt=get_reg(i_regmap,rs2[i]);
2649 assert(rt>=0);
2650 assert(addr>=0);
2651 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
2652 reglist|=(1<<addr);
2653 reglist&=~(1<<temp2);
2654
2655#if 1
2656 // don't bother with it and call write handler
2657 save_regs(reglist);
2658 pass_args(addr,rt);
2659 int cc=get_reg(i_regmap,CCREG);
2660 if(cc<0)
2661 emit_loadreg(CCREG,2);
2662 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n].d+1),2);
2663 emit_call((opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2664 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n].d+1),cc<0?2:cc);
2665 if(cc<0)
2666 emit_storereg(CCREG,2);
2667 restore_regs(reglist);
2668 emit_jmp(stubs[n].retaddr); // return address
2669#else
2670 emit_andimm(addr,0xfffffffc,temp2);
2671 emit_writeword(temp2,&address);
2672
2673 save_regs(reglist);
2674 emit_shrimm(addr,16,1);
2675 int cc=get_reg(i_regmap,CCREG);
2676 if(cc<0) {
2677 emit_loadreg(CCREG,2);
2678 }
2679 emit_movimm((u_int)readmem,0);
2680 emit_addimm(cc<0?2:cc,2*stubs[n].d+2,2);
2681 emit_call((int)&indirect_jump_indexed);
2682 restore_regs(reglist);
2683
2684 emit_readword(&readmem_dword,temp2);
2685 int temp=addr; //hmh
2686 emit_shlimm(addr,3,temp);
2687 emit_andimm(temp,24,temp);
2688#ifdef BIG_ENDIAN_MIPS
2689 if (opcode[i]==0x2e) // SWR
2690#else
2691 if (opcode[i]==0x2a) // SWL
2692#endif
2693 emit_xorimm(temp,24,temp);
2694 emit_movimm(-1,HOST_TEMPREG);
2695 if (opcode[i]==0x2a) { // SWL
2696 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
2697 emit_orrshr(rt,temp,temp2);
2698 }else{
2699 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
2700 emit_orrshl(rt,temp,temp2);
2701 }
2702 emit_readword(&address,addr);
2703 emit_writeword(temp2,&word);
2704 //save_regs(reglist); // don't need to, no state changes
2705 emit_shrimm(addr,16,1);
2706 emit_movimm((u_int)writemem,0);
2707 //emit_call((int)&indirect_jump_indexed);
2708 emit_mov(15,14);
2709 emit_readword_dualindexedx4(0,1,15);
2710 emit_readword(&Count,HOST_TEMPREG);
2711 emit_readword(&next_interupt,2);
2712 emit_addimm(HOST_TEMPREG,-2*stubs[n].d-2,HOST_TEMPREG);
2713 emit_writeword(2,&last_count);
2714 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2715 if(cc<0) {
2716 emit_storereg(CCREG,HOST_TEMPREG);
2717 }
2718 restore_regs(reglist);
2719 emit_jmp(stubs[n].retaddr); // return address
2720#endif
2721}
2722
2723static void do_invstub(int n)
2724{
2725 literal_pool(20);
2726 u_int reglist=stubs[n].a;
2727 set_jump_target(stubs[n].addr, out);
2728 save_regs(reglist);
2729 if(stubs[n].b!=0) emit_mov(stubs[n].b,0);
2730 emit_call(&invalidate_addr);
2731 restore_regs(reglist);
2732 emit_jmp(stubs[n].retaddr); // return address
2733}
2734
2735void *do_dirty_stub(int i)
2736{
2737 assem_debug("do_dirty_stub %x\n",start+i*4);
2738 u_int addr=(u_int)source;
2739 // Careful about the code output here, verify_dirty needs to parse it.
2740 #ifndef HAVE_ARMV7
2741 emit_loadlp(addr,1);
2742 emit_loadlp((int)copy,2);
2743 emit_loadlp(slen*4,3);
2744 #else
2745 emit_movw(addr&0x0000FFFF,1);
2746 emit_movw(((u_int)copy)&0x0000FFFF,2);
2747 emit_movt(addr&0xFFFF0000,1);
2748 emit_movt(((u_int)copy)&0xFFFF0000,2);
2749 emit_movw(slen*4,3);
2750 #endif
2751 emit_movimm(start+i*4,0);
2752 emit_call((int)start<(int)0xC0000000?&verify_code:&verify_code_vm);
2753 void *entry = out;
2754 load_regs_entry(i);
2755 if (entry == out)
2756 entry = instr_addr[i];
2757 emit_jmp(instr_addr[i]);
2758 return entry;
2759}
2760
2761static void do_dirty_stub_ds()
2762{
2763 // Careful about the code output here, verify_dirty needs to parse it.
2764 #ifndef HAVE_ARMV7
2765 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
2766 emit_loadlp((int)copy,2);
2767 emit_loadlp(slen*4,3);
2768 #else
2769 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
2770 emit_movw(((u_int)copy)&0x0000FFFF,2);
2771 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
2772 emit_movt(((u_int)copy)&0xFFFF0000,2);
2773 emit_movw(slen*4,3);
2774 #endif
2775 emit_movimm(start+1,0);
2776 emit_call(&verify_code_ds);
2777}
2778
2779static void do_cop1stub(int n)
2780{
2781 literal_pool(256);
2782 assem_debug("do_cop1stub %x\n",start+stubs[n].a*4);
2783 set_jump_target(stubs[n].addr, out);
2784 int i=stubs[n].a;
2785// int rs=stubs[n].b;
2786 struct regstat *i_regs=(struct regstat *)stubs[n].c;
2787 int ds=stubs[n].d;
2788 if(!ds) {
2789 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
2790 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
2791 }
2792 //else {printf("fp exception in delay slot\n");}
2793 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
2794 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2795 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2796 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
2797 emit_jmp(ds?fp_exception_ds:fp_exception);
2798}
2799
2800/* Special assem */
2801
2802static void shift_assemble_arm(int i,struct regstat *i_regs)
2803{
2804 if(rt1[i]) {
2805 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
2806 {
2807 signed char s,t,shift;
2808 t=get_reg(i_regs->regmap,rt1[i]);
2809 s=get_reg(i_regs->regmap,rs1[i]);
2810 shift=get_reg(i_regs->regmap,rs2[i]);
2811 if(t>=0){
2812 if(rs1[i]==0)
2813 {
2814 emit_zeroreg(t);
2815 }
2816 else if(rs2[i]==0)
2817 {
2818 assert(s>=0);
2819 if(s!=t) emit_mov(s,t);
2820 }
2821 else
2822 {
2823 emit_andimm(shift,31,HOST_TEMPREG);
2824 if(opcode2[i]==4) // SLLV
2825 {
2826 emit_shl(s,HOST_TEMPREG,t);
2827 }
2828 if(opcode2[i]==6) // SRLV
2829 {
2830 emit_shr(s,HOST_TEMPREG,t);
2831 }
2832 if(opcode2[i]==7) // SRAV
2833 {
2834 emit_sar(s,HOST_TEMPREG,t);
2835 }
2836 }
2837 }
2838 } else { // DSLLV/DSRLV/DSRAV
2839 signed char sh,sl,th,tl,shift;
2840 th=get_reg(i_regs->regmap,rt1[i]|64);
2841 tl=get_reg(i_regs->regmap,rt1[i]);
2842 sh=get_reg(i_regs->regmap,rs1[i]|64);
2843 sl=get_reg(i_regs->regmap,rs1[i]);
2844 shift=get_reg(i_regs->regmap,rs2[i]);
2845 if(tl>=0){
2846 if(rs1[i]==0)
2847 {
2848 emit_zeroreg(tl);
2849 if(th>=0) emit_zeroreg(th);
2850 }
2851 else if(rs2[i]==0)
2852 {
2853 assert(sl>=0);
2854 if(sl!=tl) emit_mov(sl,tl);
2855 if(th>=0&&sh!=th) emit_mov(sh,th);
2856 }
2857 else
2858 {
2859 // FIXME: What if shift==tl ?
2860 assert(shift!=tl);
2861 int temp=get_reg(i_regs->regmap,-1);
2862 int real_th=th;
2863 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
2864 assert(sl>=0);
2865 assert(sh>=0);
2866 emit_andimm(shift,31,HOST_TEMPREG);
2867 if(opcode2[i]==0x14) // DSLLV
2868 {
2869 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
2870 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2871 emit_orrshr(sl,HOST_TEMPREG,th);
2872 emit_andimm(shift,31,HOST_TEMPREG);
2873 emit_testimm(shift,32);
2874 emit_shl(sl,HOST_TEMPREG,tl);
2875 if(th>=0) emit_cmovne_reg(tl,th);
2876 emit_cmovne_imm(0,tl);
2877 }
2878 if(opcode2[i]==0x16) // DSRLV
2879 {
2880 assert(th>=0);
2881 emit_shr(sl,HOST_TEMPREG,tl);
2882 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2883 emit_orrshl(sh,HOST_TEMPREG,tl);
2884 emit_andimm(shift,31,HOST_TEMPREG);
2885 emit_testimm(shift,32);
2886 emit_shr(sh,HOST_TEMPREG,th);
2887 emit_cmovne_reg(th,tl);
2888 if(real_th>=0) emit_cmovne_imm(0,th);
2889 }
2890 if(opcode2[i]==0x17) // DSRAV
2891 {
2892 assert(th>=0);
2893 emit_shr(sl,HOST_TEMPREG,tl);
2894 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
2895 if(real_th>=0) {
2896 assert(temp>=0);
2897 emit_sarimm(th,31,temp);
2898 }
2899 emit_orrshl(sh,HOST_TEMPREG,tl);
2900 emit_andimm(shift,31,HOST_TEMPREG);
2901 emit_testimm(shift,32);
2902 emit_sar(sh,HOST_TEMPREG,th);
2903 emit_cmovne_reg(th,tl);
2904 if(real_th>=0) emit_cmovne_reg(temp,th);
2905 }
2906 }
2907 }
2908 }
2909 }
2910}
2911
2912static void speculate_mov(int rs,int rt)
2913{
2914 if(rt!=0) {
2915 smrv_strong_next|=1<<rt;
2916 smrv[rt]=smrv[rs];
2917 }
2918}
2919
2920static void speculate_mov_weak(int rs,int rt)
2921{
2922 if(rt!=0) {
2923 smrv_weak_next|=1<<rt;
2924 smrv[rt]=smrv[rs];
2925 }
2926}
2927
2928static void speculate_register_values(int i)
2929{
2930 if(i==0) {
2931 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
2932 // gp,sp are likely to stay the same throughout the block
2933 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
2934 smrv_weak_next=~smrv_strong_next;
2935 //printf(" llr %08x\n", smrv[4]);
2936 }
2937 smrv_strong=smrv_strong_next;
2938 smrv_weak=smrv_weak_next;
2939 switch(itype[i]) {
2940 case ALU:
2941 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2942 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
2943 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2944 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
2945 else {
2946 smrv_strong_next&=~(1<<rt1[i]);
2947 smrv_weak_next&=~(1<<rt1[i]);
2948 }
2949 break;
2950 case SHIFTIMM:
2951 smrv_strong_next&=~(1<<rt1[i]);
2952 smrv_weak_next&=~(1<<rt1[i]);
2953 // fallthrough
2954 case IMM16:
2955 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
2956 int value,hr=get_reg(regs[i].regmap,rt1[i]);
2957 if(hr>=0) {
2958 if(get_final_value(hr,i,&value))
2959 smrv[rt1[i]]=value;
2960 else smrv[rt1[i]]=constmap[i][hr];
2961 smrv_strong_next|=1<<rt1[i];
2962 }
2963 }
2964 else {
2965 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
2966 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
2967 }
2968 break;
2969 case LOAD:
2970 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
2971 // special case for BIOS
2972 smrv[rt1[i]]=0xa0000000;
2973 smrv_strong_next|=1<<rt1[i];
2974 break;
2975 }
2976 // fallthrough
2977 case SHIFT:
2978 case LOADLR:
2979 case MOV:
2980 smrv_strong_next&=~(1<<rt1[i]);
2981 smrv_weak_next&=~(1<<rt1[i]);
2982 break;
2983 case COP0:
2984 case COP2:
2985 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
2986 smrv_strong_next&=~(1<<rt1[i]);
2987 smrv_weak_next&=~(1<<rt1[i]);
2988 }
2989 break;
2990 case C2LS:
2991 if (opcode[i]==0x32) { // LWC2
2992 smrv_strong_next&=~(1<<rt1[i]);
2993 smrv_weak_next&=~(1<<rt1[i]);
2994 }
2995 break;
2996 }
2997#if 0
2998 int r=4;
2999 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3000 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3001#endif
3002}
3003
3004enum {
3005 MTYPE_8000 = 0,
3006 MTYPE_8020,
3007 MTYPE_0000,
3008 MTYPE_A000,
3009 MTYPE_1F80,
3010};
3011
3012static int get_ptr_mem_type(u_int a)
3013{
3014 if(a < 0x00200000) {
3015 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3016 // return wrong, must use memhandler for BIOS self-test to pass
3017 // 007 does similar stuff from a00 mirror, weird stuff
3018 return MTYPE_8000;
3019 return MTYPE_0000;
3020 }
3021 if(0x1f800000 <= a && a < 0x1f801000)
3022 return MTYPE_1F80;
3023 if(0x80200000 <= a && a < 0x80800000)
3024 return MTYPE_8020;
3025 if(0xa0000000 <= a && a < 0xa0200000)
3026 return MTYPE_A000;
3027 return MTYPE_8000;
3028}
3029
3030static void *emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3031{
3032 void *jaddr = NULL;
3033 int type=0;
3034 int mr=rs1[i];
3035 if(((smrv_strong|smrv_weak)>>mr)&1) {
3036 type=get_ptr_mem_type(smrv[mr]);
3037 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3038 }
3039 else {
3040 // use the mirror we are running on
3041 type=get_ptr_mem_type(start);
3042 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3043 }
3044
3045 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3046 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3047 addr=*addr_reg_override=HOST_TEMPREG;
3048 type=0;
3049 }
3050 else if(type==MTYPE_0000) { // RAM 0 mirror
3051 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3052 addr=*addr_reg_override=HOST_TEMPREG;
3053 type=0;
3054 }
3055 else if(type==MTYPE_A000) { // RAM A mirror
3056 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3057 addr=*addr_reg_override=HOST_TEMPREG;
3058 type=0;
3059 }
3060 else if(type==MTYPE_1F80) { // scratchpad
3061 if (psxH == (void *)0x1f800000) {
3062 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3063 emit_cmpimm(HOST_TEMPREG,0x1000);
3064 jaddr=out;
3065 emit_jc(0);
3066 }
3067 else {
3068 // do usual RAM check, jump will go to the right handler
3069 type=0;
3070 }
3071 }
3072
3073 if(type==0)
3074 {
3075 emit_cmpimm(addr,RAM_SIZE);
3076 jaddr=out;
3077 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3078 // Hint to branch predictor that the branch is unlikely to be taken
3079 if(rs1[i]>=28)
3080 emit_jno_unlikely(0);
3081 else
3082 #endif
3083 emit_jno(0);
3084 if(ram_offset!=0) {
3085 emit_addimm(addr,ram_offset,HOST_TEMPREG);
3086 addr=*addr_reg_override=HOST_TEMPREG;
3087 }
3088 }
3089
3090 return jaddr;
3091}
3092
3093#define shift_assemble shift_assemble_arm
3094
3095static void loadlr_assemble_arm(int i,struct regstat *i_regs)
3096{
3097 int s,tl,temp,temp2,addr;
3098 int offset;
3099 void *jaddr=0;
3100 int memtarget=0,c=0;
3101 int fastload_reg_override=0;
3102 u_int hr,reglist=0;
3103 tl=get_reg(i_regs->regmap,rt1[i]);
3104 s=get_reg(i_regs->regmap,rs1[i]);
3105 temp=get_reg(i_regs->regmap,-1);
3106 temp2=get_reg(i_regs->regmap,FTEMP);
3107 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
3108 assert(addr<0);
3109 offset=imm[i];
3110 for(hr=0;hr<HOST_REGS;hr++) {
3111 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
3112 }
3113 reglist|=1<<temp;
3114 if(offset||s<0||c) addr=temp2;
3115 else addr=s;
3116 if(s>=0) {
3117 c=(i_regs->wasconst>>s)&1;
3118 if(c) {
3119 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
3120 }
3121 }
3122 if(!c) {
3123 emit_shlimm(addr,3,temp);
3124 if (opcode[i]==0x22||opcode[i]==0x26) {
3125 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
3126 }else{
3127 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
3128 }
3129 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
3130 }
3131 else {
3132 if(ram_offset&&memtarget) {
3133 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
3134 fastload_reg_override=HOST_TEMPREG;
3135 }
3136 if (opcode[i]==0x22||opcode[i]==0x26) {
3137 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
3138 }else{
3139 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
3140 }
3141 }
3142 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
3143 if(!c||memtarget) {
3144 int a=temp2;
3145 if(fastload_reg_override) a=fastload_reg_override;
3146 emit_readword_indexed(0,a,temp2);
3147 if(jaddr) add_stub_r(LOADW_STUB,jaddr,out,i,temp2,i_regs,ccadj[i],reglist);
3148 }
3149 else
3150 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
3151 if(rt1[i]) {
3152 assert(tl>=0);
3153 emit_andimm(temp,24,temp);
3154#ifdef BIG_ENDIAN_MIPS
3155 if (opcode[i]==0x26) // LWR
3156#else
3157 if (opcode[i]==0x22) // LWL
3158#endif
3159 emit_xorimm(temp,24,temp);
3160 emit_movimm(-1,HOST_TEMPREG);
3161 if (opcode[i]==0x26) {
3162 emit_shr(temp2,temp,temp2);
3163 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
3164 }else{
3165 emit_shl(temp2,temp,temp2);
3166 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
3167 }
3168 emit_or(temp2,tl,tl);
3169 }
3170 //emit_storereg(rt1[i],tl); // DEBUG
3171 }
3172 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
3173 assert(0);
3174 }
3175}
3176#define loadlr_assemble loadlr_assemble_arm
3177
3178static void cop0_assemble(int i,struct regstat *i_regs)
3179{
3180 if(opcode2[i]==0) // MFC0
3181 {
3182 signed char t=get_reg(i_regs->regmap,rt1[i]);
3183 u_int copr=(source[i]>>11)&0x1f;
3184 //assert(t>=0); // Why does this happen? OOT is weird
3185 if(t>=0&&rt1[i]!=0) {
3186 emit_readword(&reg_cop0[copr],t);
3187 }
3188 }
3189 else if(opcode2[i]==4) // MTC0
3190 {
3191 signed char s=get_reg(i_regs->regmap,rs1[i]);
3192 char copr=(source[i]>>11)&0x1f;
3193 assert(s>=0);
3194 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
3195 if(copr==9||copr==11||copr==12||copr==13) {
3196 emit_readword(&last_count,HOST_TEMPREG);
3197 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
3198 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3199 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3200 emit_writeword(HOST_CCREG,&Count);
3201 }
3202 // What a mess. The status register (12) can enable interrupts,
3203 // so needs a special case to handle a pending interrupt.
3204 // The interrupt must be taken immediately, because a subsequent
3205 // instruction might disable interrupts again.
3206 if(copr==12||copr==13) {
3207 if (is_delayslot) {
3208 // burn cycles to cause cc_interrupt, which will
3209 // reschedule next_interupt. Relies on CCREG from above.
3210 assem_debug("MTC0 DS %d\n", copr);
3211 emit_writeword(HOST_CCREG,&last_count);
3212 emit_movimm(0,HOST_CCREG);
3213 emit_storereg(CCREG,HOST_CCREG);
3214 emit_loadreg(rs1[i],1);
3215 emit_movimm(copr,0);
3216 emit_call(pcsx_mtc0_ds);
3217 emit_loadreg(rs1[i],s);
3218 return;
3219 }
3220 emit_movimm(start+i*4+4,HOST_TEMPREG);
3221 emit_writeword(HOST_TEMPREG,&pcaddr);
3222 emit_movimm(0,HOST_TEMPREG);
3223 emit_writeword(HOST_TEMPREG,&pending_exception);
3224 }
3225 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
3226 //else
3227 if(s==HOST_CCREG)
3228 emit_loadreg(rs1[i],1);
3229 else if(s!=1)
3230 emit_mov(s,1);
3231 emit_movimm(copr,0);
3232 emit_call(pcsx_mtc0);
3233 if(copr==9||copr==11||copr==12||copr==13) {
3234 emit_readword(&Count,HOST_CCREG);
3235 emit_readword(&next_interupt,HOST_TEMPREG);
3236 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
3237 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
3238 emit_writeword(HOST_TEMPREG,&last_count);
3239 emit_storereg(CCREG,HOST_CCREG);
3240 }
3241 if(copr==12||copr==13) {
3242 assert(!is_delayslot);
3243 emit_readword(&pending_exception,14);
3244 emit_test(14,14);
3245 emit_jne(&do_interrupt);
3246 }
3247 emit_loadreg(rs1[i],s);
3248 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
3249 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
3250 cop1_usable=0;
3251 }
3252 else
3253 {
3254 assert(opcode2[i]==0x10);
3255 if((source[i]&0x3f)==0x10) // RFE
3256 {
3257 emit_readword(&Status,0);
3258 emit_andimm(0,0x3c,1);
3259 emit_andimm(0,~0xf,0);
3260 emit_orrshr_imm(1,2,0);
3261 emit_writeword(0,&Status);
3262 }
3263 }
3264}
3265
3266static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
3267{
3268 switch (copr) {
3269 case 1:
3270 case 3:
3271 case 5:
3272 case 8:
3273 case 9:
3274 case 10:
3275 case 11:
3276 emit_readword(&reg_cop2d[copr],tl);
3277 emit_signextend16(tl,tl);
3278 emit_writeword(tl,&reg_cop2d[copr]); // hmh
3279 break;
3280 case 7:
3281 case 16:
3282 case 17:
3283 case 18:
3284 case 19:
3285 emit_readword(&reg_cop2d[copr],tl);
3286 emit_andimm(tl,0xffff,tl);
3287 emit_writeword(tl,&reg_cop2d[copr]);
3288 break;
3289 case 15:
3290 emit_readword(&reg_cop2d[14],tl); // SXY2
3291 emit_writeword(tl,&reg_cop2d[copr]);
3292 break;
3293 case 28:
3294 case 29:
3295 emit_readword(&reg_cop2d[9],temp);
3296 emit_testimm(temp,0x8000); // do we need this?
3297 emit_andimm(temp,0xf80,temp);
3298 emit_andne_imm(temp,0,temp);
3299 emit_shrimm(temp,7,tl);
3300 emit_readword(&reg_cop2d[10],temp);
3301 emit_testimm(temp,0x8000);
3302 emit_andimm(temp,0xf80,temp);
3303 emit_andne_imm(temp,0,temp);
3304 emit_orrshr_imm(temp,2,tl);
3305 emit_readword(&reg_cop2d[11],temp);
3306 emit_testimm(temp,0x8000);
3307 emit_andimm(temp,0xf80,temp);
3308 emit_andne_imm(temp,0,temp);
3309 emit_orrshl_imm(temp,3,tl);
3310 emit_writeword(tl,&reg_cop2d[copr]);
3311 break;
3312 default:
3313 emit_readword(&reg_cop2d[copr],tl);
3314 break;
3315 }
3316}
3317
3318static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
3319{
3320 switch (copr) {
3321 case 15:
3322 emit_readword(&reg_cop2d[13],temp); // SXY1
3323 emit_writeword(sl,&reg_cop2d[copr]);
3324 emit_writeword(temp,&reg_cop2d[12]); // SXY0
3325 emit_readword(&reg_cop2d[14],temp); // SXY2
3326 emit_writeword(sl,&reg_cop2d[14]);
3327 emit_writeword(temp,&reg_cop2d[13]); // SXY1
3328 break;
3329 case 28:
3330 emit_andimm(sl,0x001f,temp);
3331 emit_shlimm(temp,7,temp);
3332 emit_writeword(temp,&reg_cop2d[9]);
3333 emit_andimm(sl,0x03e0,temp);
3334 emit_shlimm(temp,2,temp);
3335 emit_writeword(temp,&reg_cop2d[10]);
3336 emit_andimm(sl,0x7c00,temp);
3337 emit_shrimm(temp,3,temp);
3338 emit_writeword(temp,&reg_cop2d[11]);
3339 emit_writeword(sl,&reg_cop2d[28]);
3340 break;
3341 case 30:
3342 emit_movs(sl,temp);
3343 emit_mvnmi(temp,temp);
3344#ifdef HAVE_ARMV5
3345 emit_clz(temp,temp);
3346#else
3347 emit_movs(temp,HOST_TEMPREG);
3348 emit_movimm(0,temp);
3349 emit_jeq((int)out+4*4);
3350 emit_addpl_imm(temp,1,temp);
3351 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3352 emit_jns((int)out-2*4);
3353#endif
3354 emit_writeword(sl,&reg_cop2d[30]);
3355 emit_writeword(temp,&reg_cop2d[31]);
3356 break;
3357 case 31:
3358 break;
3359 default:
3360 emit_writeword(sl,&reg_cop2d[copr]);
3361 break;
3362 }
3363}
3364
3365static void cop2_assemble(int i,struct regstat *i_regs)
3366{
3367 u_int copr=(source[i]>>11)&0x1f;
3368 signed char temp=get_reg(i_regs->regmap,-1);
3369 if (opcode2[i]==0) { // MFC2
3370 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3371 if(tl>=0&&rt1[i]!=0)
3372 cop2_get_dreg(copr,tl,temp);
3373 }
3374 else if (opcode2[i]==4) { // MTC2
3375 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3376 cop2_put_dreg(copr,sl,temp);
3377 }
3378 else if (opcode2[i]==2) // CFC2
3379 {
3380 signed char tl=get_reg(i_regs->regmap,rt1[i]);
3381 if(tl>=0&&rt1[i]!=0)
3382 emit_readword(&reg_cop2c[copr],tl);
3383 }
3384 else if (opcode2[i]==6) // CTC2
3385 {
3386 signed char sl=get_reg(i_regs->regmap,rs1[i]);
3387 switch(copr) {
3388 case 4:
3389 case 12:
3390 case 20:
3391 case 26:
3392 case 27:
3393 case 29:
3394 case 30:
3395 emit_signextend16(sl,temp);
3396 break;
3397 case 31:
3398 //value = value & 0x7ffff000;
3399 //if (value & 0x7f87e000) value |= 0x80000000;
3400 emit_shrimm(sl,12,temp);
3401 emit_shlimm(temp,12,temp);
3402 emit_testimm(temp,0x7f000000);
3403 emit_testeqimm(temp,0x00870000);
3404 emit_testeqimm(temp,0x0000e000);
3405 emit_orrne_imm(temp,0x80000000,temp);
3406 break;
3407 default:
3408 temp=sl;
3409 break;
3410 }
3411 emit_writeword(temp,&reg_cop2c[copr]);
3412 assert(sl>=0);
3413 }
3414}
3415
3416static void c2op_prologue(u_int op,u_int reglist)
3417{
3418 save_regs_all(reglist);
3419#ifdef PCNT
3420 emit_movimm(op,0);
3421 emit_call((int)pcnt_gte_start);
3422#endif
3423 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
3424}
3425
3426static void c2op_epilogue(u_int op,u_int reglist)
3427{
3428#ifdef PCNT
3429 emit_movimm(op,0);
3430 emit_call((int)pcnt_gte_end);
3431#endif
3432 restore_regs_all(reglist);
3433}
3434
3435static void c2op_call_MACtoIR(int lm,int need_flags)
3436{
3437 if(need_flags)
3438 emit_call(lm?gteMACtoIR_lm1:gteMACtoIR_lm0);
3439 else
3440 emit_call(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf);
3441}
3442
3443static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
3444{
3445 emit_call(func);
3446 // func is C code and trashes r0
3447 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3448 if(need_flags||need_ir)
3449 c2op_call_MACtoIR(lm,need_flags);
3450 emit_call(need_flags?gteMACtoRGB:gteMACtoRGB_nf);
3451}
3452
3453static void c2op_assemble(int i,struct regstat *i_regs)
3454{
3455 u_int c2op=source[i]&0x3f;
3456 u_int hr,reglist_full=0,reglist;
3457 int need_flags,need_ir;
3458 for(hr=0;hr<HOST_REGS;hr++) {
3459 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
3460 }
3461 reglist=reglist_full&CALLER_SAVE_REGS;
3462
3463 if (gte_handlers[c2op]!=NULL) {
3464 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
3465 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
3466 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
3467 source[i],gte_unneeded[i+1],need_flags,need_ir);
3468 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
3469 need_flags=0;
3470 int shift = (source[i] >> 19) & 1;
3471 int lm = (source[i] >> 10) & 1;
3472 switch(c2op) {
3473#ifndef DRC_DBG
3474 case GTE_MVMVA: {
3475#ifdef HAVE_ARMV5
3476 int v = (source[i] >> 15) & 3;
3477 int cv = (source[i] >> 13) & 3;
3478 int mx = (source[i] >> 17) & 3;
3479 reglist=reglist_full&(CALLER_SAVE_REGS|0xf0); // +{r4-r7}
3480 c2op_prologue(c2op,reglist);
3481 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
3482 if(v<3)
3483 emit_ldrd(v*8,0,4);
3484 else {
3485 emit_movzwl_indexed(9*4,0,4); // gteIR
3486 emit_movzwl_indexed(10*4,0,6);
3487 emit_movzwl_indexed(11*4,0,5);
3488 emit_orrshl_imm(6,16,4);
3489 }
3490 if(mx<3)
3491 emit_addimm(0,32*4+mx*8*4,6);
3492 else
3493 emit_readword(&zeromem_ptr,6);
3494 if(cv<3)
3495 emit_addimm(0,32*4+(cv*8+5)*4,7);
3496 else
3497 emit_readword(&zeromem_ptr,7);
3498#ifdef __ARM_NEON__
3499 emit_movimm(source[i],1); // opcode
3500 emit_call(gteMVMVA_part_neon);
3501 if(need_flags) {
3502 emit_movimm(lm,1);
3503 emit_call(gteMACtoIR_flags_neon);
3504 }
3505#else
3506 if(cv==3&&shift)
3507 emit_call((int)gteMVMVA_part_cv3sh12_arm);
3508 else {
3509 emit_movimm(shift,1);
3510 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
3511 }
3512 if(need_flags||need_ir)
3513 c2op_call_MACtoIR(lm,need_flags);
3514#endif
3515#else /* if not HAVE_ARMV5 */
3516 c2op_prologue(c2op,reglist);
3517 emit_movimm(source[i],1); // opcode
3518 emit_writeword(1,&psxRegs.code);
3519 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
3520#endif
3521 break;
3522 }
3523 case GTE_OP:
3524 c2op_prologue(c2op,reglist);
3525 emit_call(shift?gteOP_part_shift:gteOP_part_noshift);
3526 if(need_flags||need_ir) {
3527 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3528 c2op_call_MACtoIR(lm,need_flags);
3529 }
3530 break;
3531 case GTE_DPCS:
3532 c2op_prologue(c2op,reglist);
3533 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
3534 break;
3535 case GTE_INTPL:
3536 c2op_prologue(c2op,reglist);
3537 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
3538 break;
3539 case GTE_SQR:
3540 c2op_prologue(c2op,reglist);
3541 emit_call(shift?gteSQR_part_shift:gteSQR_part_noshift);
3542 if(need_flags||need_ir) {
3543 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
3544 c2op_call_MACtoIR(lm,need_flags);
3545 }
3546 break;
3547 case GTE_DCPL:
3548 c2op_prologue(c2op,reglist);
3549 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
3550 break;
3551 case GTE_GPF:
3552 c2op_prologue(c2op,reglist);
3553 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
3554 break;
3555 case GTE_GPL:
3556 c2op_prologue(c2op,reglist);
3557 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
3558 break;
3559#endif
3560 default:
3561 c2op_prologue(c2op,reglist);
3562#ifdef DRC_DBG
3563 emit_movimm(source[i],1); // opcode
3564 emit_writeword(1,&psxRegs.code);
3565#endif
3566 emit_call(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]);
3567 break;
3568 }
3569 c2op_epilogue(c2op,reglist);
3570 }
3571}
3572
3573static void cop1_unusable(int i,struct regstat *i_regs)
3574{
3575 // XXX: should just just do the exception instead
3576 if(!cop1_usable) {
3577 void *jaddr=out;
3578 emit_jmp(0);
3579 add_stub_r(FP_STUB,jaddr,out,i,0,i_regs,is_delayslot,0);
3580 cop1_usable=1;
3581 }
3582}
3583
3584static void cop1_assemble(int i,struct regstat *i_regs)
3585{
3586 cop1_unusable(i, i_regs);
3587}
3588
3589static void fconv_assemble_arm(int i,struct regstat *i_regs)
3590{
3591 cop1_unusable(i, i_regs);
3592}
3593#define fconv_assemble fconv_assemble_arm
3594
3595static void fcomp_assemble(int i,struct regstat *i_regs)
3596{
3597 cop1_unusable(i, i_regs);
3598}
3599
3600static void float_assemble(int i,struct regstat *i_regs)
3601{
3602 cop1_unusable(i, i_regs);
3603}
3604
3605static void multdiv_assemble_arm(int i,struct regstat *i_regs)
3606{
3607 // case 0x18: MULT
3608 // case 0x19: MULTU
3609 // case 0x1A: DIV
3610 // case 0x1B: DIVU
3611 // case 0x1C: DMULT
3612 // case 0x1D: DMULTU
3613 // case 0x1E: DDIV
3614 // case 0x1F: DDIVU
3615 if(rs1[i]&&rs2[i])
3616 {
3617 if((opcode2[i]&4)==0) // 32-bit
3618 {
3619 if(opcode2[i]==0x18) // MULT
3620 {
3621 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3622 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3623 signed char hi=get_reg(i_regs->regmap,HIREG);
3624 signed char lo=get_reg(i_regs->regmap,LOREG);
3625 assert(m1>=0);
3626 assert(m2>=0);
3627 assert(hi>=0);
3628 assert(lo>=0);
3629 emit_smull(m1,m2,hi,lo);
3630 }
3631 if(opcode2[i]==0x19) // MULTU
3632 {
3633 signed char m1=get_reg(i_regs->regmap,rs1[i]);
3634 signed char m2=get_reg(i_regs->regmap,rs2[i]);
3635 signed char hi=get_reg(i_regs->regmap,HIREG);
3636 signed char lo=get_reg(i_regs->regmap,LOREG);
3637 assert(m1>=0);
3638 assert(m2>=0);
3639 assert(hi>=0);
3640 assert(lo>=0);
3641 emit_umull(m1,m2,hi,lo);
3642 }
3643 if(opcode2[i]==0x1A) // DIV
3644 {
3645 signed char d1=get_reg(i_regs->regmap,rs1[i]);
3646 signed char d2=get_reg(i_regs->regmap,rs2[i]);
3647 assert(d1>=0);
3648 assert(d2>=0);
3649 signed char quotient=get_reg(i_regs->regmap,LOREG);
3650 signed char remainder=get_reg(i_regs->regmap,HIREG);
3651 assert(quotient>=0);
3652 assert(remainder>=0);
3653 emit_movs(d1,remainder);
3654 emit_movimm(0xffffffff,quotient);
3655 emit_negmi(quotient,quotient); // .. quotient and ..
3656 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
3657 emit_movs(d2,HOST_TEMPREG);
3658 emit_jeq((int)out+52); // Division by zero
3659 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
3660#ifdef HAVE_ARMV5
3661 emit_clz(HOST_TEMPREG,quotient);
3662 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
3663#else
3664 emit_movimm(0,quotient);
3665 emit_addpl_imm(quotient,1,quotient);
3666 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3667 emit_jns((int)out-2*4);
3668#endif
3669 emit_orimm(quotient,1<<31,quotient);
3670 emit_shr(quotient,quotient,quotient);
3671 emit_cmp(remainder,HOST_TEMPREG);
3672 emit_subcs(remainder,HOST_TEMPREG,remainder);
3673 emit_adcs(quotient,quotient,quotient);
3674 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
3675 emit_jcc(out-16); // -4
3676 emit_teq(d1,d2);
3677 emit_negmi(quotient,quotient);
3678 emit_test(d1,d1);
3679 emit_negmi(remainder,remainder);
3680 }
3681 if(opcode2[i]==0x1B) // DIVU
3682 {
3683 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
3684 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
3685 assert(d1>=0);
3686 assert(d2>=0);
3687 signed char quotient=get_reg(i_regs->regmap,LOREG);
3688 signed char remainder=get_reg(i_regs->regmap,HIREG);
3689 assert(quotient>=0);
3690 assert(remainder>=0);
3691 emit_mov(d1,remainder);
3692 emit_movimm(0xffffffff,quotient); // div0 case
3693 emit_test(d2,d2);
3694 emit_jeq((int)out+40); // Division by zero
3695#ifdef HAVE_ARMV5
3696 emit_clz(d2,HOST_TEMPREG);
3697 emit_movimm(1<<31,quotient);
3698 emit_shl(d2,HOST_TEMPREG,d2);
3699#else
3700 emit_movimm(0,HOST_TEMPREG);
3701 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
3702 emit_lslpls_imm(d2,1,d2);
3703 emit_jns((int)out-2*4);
3704 emit_movimm(1<<31,quotient);
3705#endif
3706 emit_shr(quotient,HOST_TEMPREG,quotient);
3707 emit_cmp(remainder,d2);
3708 emit_subcs(remainder,d2,remainder);
3709 emit_adcs(quotient,quotient,quotient);
3710 emit_shrcc_imm(d2,1,d2);
3711 emit_jcc(out-16); // -4
3712 }
3713 }
3714 else // 64-bit
3715 assert(0);
3716 }
3717 else
3718 {
3719 // Multiply by zero is zero.
3720 // MIPS does not have a divide by zero exception.
3721 // The result is undefined, we return zero.
3722 signed char hr=get_reg(i_regs->regmap,HIREG);
3723 signed char lr=get_reg(i_regs->regmap,LOREG);
3724 if(hr>=0) emit_zeroreg(hr);
3725 if(lr>=0) emit_zeroreg(lr);
3726 }
3727}
3728#define multdiv_assemble multdiv_assemble_arm
3729
3730static void do_preload_rhash(int r) {
3731 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
3732 // register. On ARM the hash can be done with a single instruction (below)
3733}
3734
3735static void do_preload_rhtbl(int ht) {
3736 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
3737}
3738
3739static void do_rhash(int rs,int rh) {
3740 emit_andimm(rs,0xf8,rh);
3741}
3742
3743static void do_miniht_load(int ht,int rh) {
3744 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
3745 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
3746}
3747
3748static void do_miniht_jump(int rs,int rh,int ht) {
3749 emit_cmp(rh,rs);
3750 emit_ldreq_indexed(ht,4,15);
3751 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3752 emit_mov(rs,7);
3753 emit_jmp(jump_vaddr_reg[7]);
3754 #else
3755 emit_jmp(jump_vaddr_reg[rs]);
3756 #endif
3757}
3758
3759static void do_miniht_insert(u_int return_address,int rt,int temp) {
3760 #ifndef HAVE_ARMV7
3761 emit_movimm(return_address,rt); // PC into link register
3762 add_to_linker(out,return_address,1);
3763 emit_pcreladdr(temp);
3764 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3765 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
3766 #else
3767 emit_movw(return_address&0x0000FFFF,rt);
3768 add_to_linker(out,return_address,1);
3769 emit_pcreladdr(temp);
3770 emit_writeword(temp,&mini_ht[(return_address&0xFF)>>3][1]);
3771 emit_movt(return_address&0xFFFF0000,rt);
3772 emit_writeword(rt,&mini_ht[(return_address&0xFF)>>3][0]);
3773 #endif
3774}
3775
3776static void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
3777{
3778 //if(dirty_pre==dirty) return;
3779 int hr,reg;
3780 for(hr=0;hr<HOST_REGS;hr++) {
3781 if(hr!=EXCLUDE_REG) {
3782 reg=pre[hr];
3783 if(((~u)>>(reg&63))&1) {
3784 if(reg>0) {
3785 if(((dirty_pre&~dirty)>>hr)&1) {
3786 if(reg>0&&reg<34) {
3787 emit_storereg(reg,hr);
3788 if( ((is32_pre&~uu)>>reg)&1 ) {
3789 emit_sarimm(hr,31,HOST_TEMPREG);
3790 emit_storereg(reg|64,HOST_TEMPREG);
3791 }
3792 }
3793 else if(reg>=64) {
3794 emit_storereg(reg,hr);
3795 }
3796 }
3797 }
3798 }
3799 }
3800 }
3801}
3802
3803static void mark_clear_cache(void *target)
3804{
3805 u_long offset = (u_char *)target - translation_cache;
3806 u_int mask = 1u << ((offset >> 12) & 31);
3807 if (!(needs_clear_cache[offset >> 17] & mask)) {
3808 char *start = (char *)((u_long)target & ~4095ul);
3809 start_tcache_write(start, start + 4096);
3810 needs_clear_cache[offset >> 17] |= mask;
3811 }
3812}
3813
3814// Clearing the cache is rather slow on ARM Linux, so mark the areas
3815// that need to be cleared, and then only clear these areas once.
3816static void do_clear_cache()
3817{
3818 int i,j;
3819 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
3820 {
3821 u_int bitmap=needs_clear_cache[i];
3822 if(bitmap) {
3823 u_char *start, *end;
3824 for(j=0;j<32;j++)
3825 {
3826 if(bitmap&(1<<j)) {
3827 start=translation_cache+i*131072+j*4096;
3828 end=start+4095;
3829 j++;
3830 while(j<32) {
3831 if(bitmap&(1<<j)) {
3832 end+=4096;
3833 j++;
3834 }else{
3835 end_tcache_write(start, end);
3836 break;
3837 }
3838 }
3839 }
3840 }
3841 needs_clear_cache[i]=0;
3842 }
3843 }
3844}
3845
3846// CPU-architecture-specific initialization
3847static void arch_init() {
3848}
3849
3850// vim:shiftwidth=2:expandtab