fully reinit drc on change
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
665f33e1 31#include "arm_features.h"
054175e9 32
a327ad27 33#if !BASE_ADDR_FIXED
bdeade46 34char translation_cache[1 << TARGET_SIZE_2] __attribute__((aligned(4096)));
35#endif
36
57871462 37extern int cycle_count;
38extern int last_count;
39extern int pcaddr;
40extern int pending_exception;
41extern int branch_target;
42extern uint64_t readmem_dword;
3d624f89 43#ifdef MUPEN64
57871462 44extern precomp_instr fake_pc;
3d624f89 45#endif
57871462 46extern void *dynarec_local;
47extern u_int memory_map[1048576];
48extern u_int mini_ht[32][2];
49extern u_int rounding_modes[4];
50
51void indirect_jump_indexed();
52void indirect_jump();
53void do_interrupt();
54void jump_vaddr_r0();
55void jump_vaddr_r1();
56void jump_vaddr_r2();
57void jump_vaddr_r3();
58void jump_vaddr_r4();
59void jump_vaddr_r5();
60void jump_vaddr_r6();
61void jump_vaddr_r7();
62void jump_vaddr_r8();
63void jump_vaddr_r9();
64void jump_vaddr_r10();
65void jump_vaddr_r12();
66
67const u_int jump_vaddr_reg[16] = {
68 (int)jump_vaddr_r0,
69 (int)jump_vaddr_r1,
70 (int)jump_vaddr_r2,
71 (int)jump_vaddr_r3,
72 (int)jump_vaddr_r4,
73 (int)jump_vaddr_r5,
74 (int)jump_vaddr_r6,
75 (int)jump_vaddr_r7,
76 (int)jump_vaddr_r8,
77 (int)jump_vaddr_r9,
78 (int)jump_vaddr_r10,
79 0,
80 (int)jump_vaddr_r12,
81 0,
82 0,
83 0};
84
0bbd1454 85void invalidate_addr_r0();
86void invalidate_addr_r1();
87void invalidate_addr_r2();
88void invalidate_addr_r3();
89void invalidate_addr_r4();
90void invalidate_addr_r5();
91void invalidate_addr_r6();
92void invalidate_addr_r7();
93void invalidate_addr_r8();
94void invalidate_addr_r9();
95void invalidate_addr_r10();
96void invalidate_addr_r12();
97
98const u_int invalidate_addr_reg[16] = {
99 (int)invalidate_addr_r0,
100 (int)invalidate_addr_r1,
101 (int)invalidate_addr_r2,
102 (int)invalidate_addr_r3,
103 (int)invalidate_addr_r4,
104 (int)invalidate_addr_r5,
105 (int)invalidate_addr_r6,
106 (int)invalidate_addr_r7,
107 (int)invalidate_addr_r8,
108 (int)invalidate_addr_r9,
109 (int)invalidate_addr_r10,
110 0,
111 (int)invalidate_addr_r12,
112 0,
113 0,
114 0};
115
57871462 116#include "fpu.h"
117
dd3a91a1 118unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
119
57871462 120/* Linker */
121
122void set_jump_target(int addr,u_int target)
123{
124 u_char *ptr=(u_char *)addr;
125 u_int *ptr2=(u_int *)ptr;
126 if(ptr[3]==0xe2) {
127 assert((target-(u_int)ptr2-8)<1024);
128 assert((addr&3)==0);
129 assert((target&3)==0);
130 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
131 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
132 }
133 else if(ptr[3]==0x72) {
134 // generated by emit_jno_unlikely
135 if((target-(u_int)ptr2-8)<1024) {
136 assert((addr&3)==0);
137 assert((target&3)==0);
138 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
139 }
140 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
141 assert((addr&3)==0);
142 assert((target&3)==0);
143 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
144 }
145 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
146 }
147 else {
148 assert((ptr[3]&0x0e)==0xa);
149 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
150 }
151}
152
153// This optionally copies the instruction from the target of the branch into
154// the space before the branch. Works, but the difference in speed is
155// usually insignificant.
156void set_jump_target_fillslot(int addr,u_int target,int copy)
157{
158 u_char *ptr=(u_char *)addr;
159 u_int *ptr2=(u_int *)ptr;
160 assert(!copy||ptr2[-1]==0xe28dd000);
161 if(ptr[3]==0xe2) {
162 assert(!copy);
163 assert((target-(u_int)ptr2-8)<4096);
164 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
165 }
166 else {
167 assert((ptr[3]&0x0e)==0xa);
168 u_int target_insn=*(u_int *)target;
169 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
170 copy=0;
171 }
172 if((target_insn&0x0c100000)==0x04100000) { // Load
173 copy=0;
174 }
175 if(target_insn&0x08000000) {
176 copy=0;
177 }
178 if(copy) {
179 ptr2[-1]=target_insn;
180 target+=4;
181 }
182 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
183 }
184}
185
186/* Literal pool */
187add_literal(int addr,int val)
188{
15776b68 189 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 190 literals[literalcount][0]=addr;
191 literals[literalcount][1]=val;
192 literalcount++;
193}
194
f76eeef9 195void *kill_pointer(void *stub)
57871462 196{
197 int *ptr=(int *)(stub+4);
198 assert((*ptr&0x0ff00000)==0x05900000);
199 u_int offset=*ptr&0xfff;
200 int **l_ptr=(void *)ptr+offset+8;
201 int *i_ptr=*l_ptr;
202 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 203 return i_ptr;
57871462 204}
205
f968d35d 206// find where external branch is liked to using addr of it's stub:
207// get address that insn one after stub loads (dyna_linker arg1),
208// treat it as a pointer to branch insn,
209// return addr where that branch jumps to
57871462 210int get_pointer(void *stub)
211{
212 //printf("get_pointer(%x)\n",(int)stub);
213 int *ptr=(int *)(stub+4);
f968d35d 214 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 215 u_int offset=*ptr&0xfff;
216 int **l_ptr=(void *)ptr+offset+8;
217 int *i_ptr=*l_ptr;
218 assert((*i_ptr&0x0f000000)==0x0a000000);
219 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
220}
221
222// Find the "clean" entry point from a "dirty" entry point
223// by skipping past the call to verify_code
224u_int get_clean_addr(int addr)
225{
226 int *ptr=(int *)addr;
665f33e1 227 #ifndef HAVE_ARMV7
57871462 228 ptr+=4;
229 #else
230 ptr+=6;
231 #endif
232 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
233 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
234 ptr++;
235 if((*ptr&0xFF000000)==0xea000000) {
236 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
237 }
238 return (u_int)ptr;
239}
240
241int verify_dirty(int addr)
242{
243 u_int *ptr=(u_int *)addr;
665f33e1 244 #ifndef HAVE_ARMV7
57871462 245 // get from literal pool
15776b68 246 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 247 u_int offset=*ptr&0xfff;
248 u_int *l_ptr=(void *)ptr+offset+8;
249 u_int source=l_ptr[0];
250 u_int copy=l_ptr[1];
251 u_int len=l_ptr[2];
252 ptr+=4;
253 #else
254 // ARMv7 movw/movt
255 assert((*ptr&0xFFF00000)==0xe3000000);
256 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
257 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
258 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
259 ptr+=6;
260 #endif
261 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
262 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 263#ifndef DISABLE_TLB
cfcba99a 264 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 265 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
266 unsigned int page=source>>12;
267 unsigned int map_value=memory_map[page];
268 if(map_value>=0x80000000) return 0;
269 while(page<((source+len-1)>>12)) {
270 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
271 }
272 source = source+(map_value<<2);
273 }
63cb0298 274#endif
57871462 275 //printf("verify_dirty: %x %x %x\n",source,copy,len);
276 return !memcmp((void *)source,(void *)copy,len);
277}
278
279// This doesn't necessarily find all clean entry points, just
280// guarantees that it's not dirty
281int isclean(int addr)
282{
665f33e1 283 #ifndef HAVE_ARMV7
57871462 284 int *ptr=((u_int *)addr)+4;
285 #else
286 int *ptr=((u_int *)addr)+6;
287 #endif
288 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
289 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
290 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
291 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
292 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
293 return 1;
294}
295
4a35de07 296// get source that block at addr was compiled from (host pointers)
57871462 297void get_bounds(int addr,u_int *start,u_int *end)
298{
299 u_int *ptr=(u_int *)addr;
665f33e1 300 #ifndef HAVE_ARMV7
57871462 301 // get from literal pool
15776b68 302 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 303 u_int offset=*ptr&0xfff;
304 u_int *l_ptr=(void *)ptr+offset+8;
305 u_int source=l_ptr[0];
306 //u_int copy=l_ptr[1];
307 u_int len=l_ptr[2];
308 ptr+=4;
309 #else
310 // ARMv7 movw/movt
311 assert((*ptr&0xFFF00000)==0xe3000000);
312 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
313 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
314 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
315 ptr+=6;
316 #endif
317 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
318 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 319#ifndef DISABLE_TLB
cfcba99a 320 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 321 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
322 if(memory_map[source>>12]>=0x80000000) source = 0;
323 else source = source+(memory_map[source>>12]<<2);
324 }
63cb0298 325#endif
57871462 326 *start=source;
327 *end=source+len;
328}
329
330/* Register allocation */
331
332// Note: registers are allocated clean (unmodified state)
333// if you intend to modify the register, you must call dirty_reg().
334void alloc_reg(struct regstat *cur,int i,signed char reg)
335{
336 int r,hr;
337 int preferred_reg = (reg&7);
338 if(reg==CCREG) preferred_reg=HOST_CCREG;
339 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
340
341 // Don't allocate unused registers
342 if((cur->u>>reg)&1) return;
343
344 // see if it's already allocated
345 for(hr=0;hr<HOST_REGS;hr++)
346 {
347 if(cur->regmap[hr]==reg) return;
348 }
349
350 // Keep the same mapping if the register was already allocated in a loop
351 preferred_reg = loop_reg(i,reg,preferred_reg);
352
353 // Try to allocate the preferred register
354 if(cur->regmap[preferred_reg]==-1) {
355 cur->regmap[preferred_reg]=reg;
356 cur->dirty&=~(1<<preferred_reg);
357 cur->isconst&=~(1<<preferred_reg);
358 return;
359 }
360 r=cur->regmap[preferred_reg];
361 if(r<64&&((cur->u>>r)&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367 if(r>=64&&((cur->uu>>(r&63))&1)) {
368 cur->regmap[preferred_reg]=reg;
369 cur->dirty&=~(1<<preferred_reg);
370 cur->isconst&=~(1<<preferred_reg);
371 return;
372 }
373
374 // Clear any unneeded registers
375 // We try to keep the mapping consistent, if possible, because it
376 // makes branches easier (especially loops). So we try to allocate
377 // first (see above) before removing old mappings. If this is not
378 // possible then go ahead and clear out the registers that are no
379 // longer needed.
380 for(hr=0;hr<HOST_REGS;hr++)
381 {
382 r=cur->regmap[hr];
383 if(r>=0) {
384 if(r<64) {
385 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
386 }
387 else
388 {
389 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
390 }
391 }
392 }
393 // Try to allocate any available register, but prefer
394 // registers that have not been used recently.
395 if(i>0) {
396 for(hr=0;hr<HOST_REGS;hr++) {
397 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
398 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
399 cur->regmap[hr]=reg;
400 cur->dirty&=~(1<<hr);
401 cur->isconst&=~(1<<hr);
402 return;
403 }
404 }
405 }
406 }
407 // Try to allocate any available register
408 for(hr=0;hr<HOST_REGS;hr++) {
409 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
410 cur->regmap[hr]=reg;
411 cur->dirty&=~(1<<hr);
412 cur->isconst&=~(1<<hr);
413 return;
414 }
415 }
416
417 // Ok, now we have to evict someone
418 // Pick a register we hopefully won't need soon
419 u_char hsn[MAXREG+1];
420 memset(hsn,10,sizeof(hsn));
421 int j;
422 lsn(hsn,i,&preferred_reg);
423 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
424 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
425 if(i>0) {
426 // Don't evict the cycle count at entry points, otherwise the entry
427 // stub will have to write it.
428 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
429 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
430 for(j=10;j>=3;j--)
431 {
432 // Alloc preferred register if available
433 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
434 for(hr=0;hr<HOST_REGS;hr++) {
435 // Evict both parts of a 64-bit register
436 if((cur->regmap[hr]&63)==r) {
437 cur->regmap[hr]=-1;
438 cur->dirty&=~(1<<hr);
439 cur->isconst&=~(1<<hr);
440 }
441 }
442 cur->regmap[preferred_reg]=reg;
443 return;
444 }
445 for(r=1;r<=MAXREG;r++)
446 {
447 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
448 for(hr=0;hr<HOST_REGS;hr++) {
449 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
450 if(cur->regmap[hr]==r+64) {
451 cur->regmap[hr]=reg;
452 cur->dirty&=~(1<<hr);
453 cur->isconst&=~(1<<hr);
454 return;
455 }
456 }
457 }
458 for(hr=0;hr<HOST_REGS;hr++) {
459 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
460 if(cur->regmap[hr]==r) {
461 cur->regmap[hr]=reg;
462 cur->dirty&=~(1<<hr);
463 cur->isconst&=~(1<<hr);
464 return;
465 }
466 }
467 }
468 }
469 }
470 }
471 }
472 for(j=10;j>=0;j--)
473 {
474 for(r=1;r<=MAXREG;r++)
475 {
476 if(hsn[r]==j) {
477 for(hr=0;hr<HOST_REGS;hr++) {
478 if(cur->regmap[hr]==r+64) {
479 cur->regmap[hr]=reg;
480 cur->dirty&=~(1<<hr);
481 cur->isconst&=~(1<<hr);
482 return;
483 }
484 }
485 for(hr=0;hr<HOST_REGS;hr++) {
486 if(cur->regmap[hr]==r) {
487 cur->regmap[hr]=reg;
488 cur->dirty&=~(1<<hr);
489 cur->isconst&=~(1<<hr);
490 return;
491 }
492 }
493 }
494 }
495 }
496 printf("This shouldn't happen (alloc_reg)");exit(1);
497}
498
499void alloc_reg64(struct regstat *cur,int i,signed char reg)
500{
501 int preferred_reg = 8+(reg&1);
502 int r,hr;
503
504 // allocate the lower 32 bits
505 alloc_reg(cur,i,reg);
506
507 // Don't allocate unused registers
508 if((cur->uu>>reg)&1) return;
509
510 // see if the upper half is already allocated
511 for(hr=0;hr<HOST_REGS;hr++)
512 {
513 if(cur->regmap[hr]==reg+64) return;
514 }
515
516 // Keep the same mapping if the register was already allocated in a loop
517 preferred_reg = loop_reg(i,reg,preferred_reg);
518
519 // Try to allocate the preferred register
520 if(cur->regmap[preferred_reg]==-1) {
521 cur->regmap[preferred_reg]=reg|64;
522 cur->dirty&=~(1<<preferred_reg);
523 cur->isconst&=~(1<<preferred_reg);
524 return;
525 }
526 r=cur->regmap[preferred_reg];
527 if(r<64&&((cur->u>>r)&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533 if(r>=64&&((cur->uu>>(r&63))&1)) {
534 cur->regmap[preferred_reg]=reg|64;
535 cur->dirty&=~(1<<preferred_reg);
536 cur->isconst&=~(1<<preferred_reg);
537 return;
538 }
539
540 // Clear any unneeded registers
541 // We try to keep the mapping consistent, if possible, because it
542 // makes branches easier (especially loops). So we try to allocate
543 // first (see above) before removing old mappings. If this is not
544 // possible then go ahead and clear out the registers that are no
545 // longer needed.
546 for(hr=HOST_REGS-1;hr>=0;hr--)
547 {
548 r=cur->regmap[hr];
549 if(r>=0) {
550 if(r<64) {
551 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
552 }
553 else
554 {
555 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
556 }
557 }
558 }
559 // Try to allocate any available register, but prefer
560 // registers that have not been used recently.
561 if(i>0) {
562 for(hr=0;hr<HOST_REGS;hr++) {
563 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
564 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
565 cur->regmap[hr]=reg|64;
566 cur->dirty&=~(1<<hr);
567 cur->isconst&=~(1<<hr);
568 return;
569 }
570 }
571 }
572 }
573 // Try to allocate any available register
574 for(hr=0;hr<HOST_REGS;hr++) {
575 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
576 cur->regmap[hr]=reg|64;
577 cur->dirty&=~(1<<hr);
578 cur->isconst&=~(1<<hr);
579 return;
580 }
581 }
582
583 // Ok, now we have to evict someone
584 // Pick a register we hopefully won't need soon
585 u_char hsn[MAXREG+1];
586 memset(hsn,10,sizeof(hsn));
587 int j;
588 lsn(hsn,i,&preferred_reg);
589 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
590 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
591 if(i>0) {
592 // Don't evict the cycle count at entry points, otherwise the entry
593 // stub will have to write it.
594 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
595 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
596 for(j=10;j>=3;j--)
597 {
598 // Alloc preferred register if available
599 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
600 for(hr=0;hr<HOST_REGS;hr++) {
601 // Evict both parts of a 64-bit register
602 if((cur->regmap[hr]&63)==r) {
603 cur->regmap[hr]=-1;
604 cur->dirty&=~(1<<hr);
605 cur->isconst&=~(1<<hr);
606 }
607 }
608 cur->regmap[preferred_reg]=reg|64;
609 return;
610 }
611 for(r=1;r<=MAXREG;r++)
612 {
613 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
614 for(hr=0;hr<HOST_REGS;hr++) {
615 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
616 if(cur->regmap[hr]==r+64) {
617 cur->regmap[hr]=reg|64;
618 cur->dirty&=~(1<<hr);
619 cur->isconst&=~(1<<hr);
620 return;
621 }
622 }
623 }
624 for(hr=0;hr<HOST_REGS;hr++) {
625 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
626 if(cur->regmap[hr]==r) {
627 cur->regmap[hr]=reg|64;
628 cur->dirty&=~(1<<hr);
629 cur->isconst&=~(1<<hr);
630 return;
631 }
632 }
633 }
634 }
635 }
636 }
637 }
638 for(j=10;j>=0;j--)
639 {
640 for(r=1;r<=MAXREG;r++)
641 {
642 if(hsn[r]==j) {
643 for(hr=0;hr<HOST_REGS;hr++) {
644 if(cur->regmap[hr]==r+64) {
645 cur->regmap[hr]=reg|64;
646 cur->dirty&=~(1<<hr);
647 cur->isconst&=~(1<<hr);
648 return;
649 }
650 }
651 for(hr=0;hr<HOST_REGS;hr++) {
652 if(cur->regmap[hr]==r) {
653 cur->regmap[hr]=reg|64;
654 cur->dirty&=~(1<<hr);
655 cur->isconst&=~(1<<hr);
656 return;
657 }
658 }
659 }
660 }
661 }
662 printf("This shouldn't happen");exit(1);
663}
664
665// Allocate a temporary register. This is done without regard to
666// dirty status or whether the register we request is on the unneeded list
667// Note: This will only allocate one register, even if called multiple times
668void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
669{
670 int r,hr;
671 int preferred_reg = -1;
672
673 // see if it's already allocated
674 for(hr=0;hr<HOST_REGS;hr++)
675 {
676 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
677 }
678
679 // Try to allocate any available register
680 for(hr=HOST_REGS-1;hr>=0;hr--) {
681 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
682 cur->regmap[hr]=reg;
683 cur->dirty&=~(1<<hr);
684 cur->isconst&=~(1<<hr);
685 return;
686 }
687 }
688
689 // Find an unneeded register
690 for(hr=HOST_REGS-1;hr>=0;hr--)
691 {
692 r=cur->regmap[hr];
693 if(r>=0) {
694 if(r<64) {
695 if((cur->u>>r)&1) {
696 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
697 cur->regmap[hr]=reg;
698 cur->dirty&=~(1<<hr);
699 cur->isconst&=~(1<<hr);
700 return;
701 }
702 }
703 }
704 else
705 {
706 if((cur->uu>>(r&63))&1) {
707 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
708 cur->regmap[hr]=reg;
709 cur->dirty&=~(1<<hr);
710 cur->isconst&=~(1<<hr);
711 return;
712 }
713 }
714 }
715 }
716 }
717
718 // Ok, now we have to evict someone
719 // Pick a register we hopefully won't need soon
720 // TODO: we might want to follow unconditional jumps here
721 // TODO: get rid of dupe code and make this into a function
722 u_char hsn[MAXREG+1];
723 memset(hsn,10,sizeof(hsn));
724 int j;
725 lsn(hsn,i,&preferred_reg);
726 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
727 if(i>0) {
728 // Don't evict the cycle count at entry points, otherwise the entry
729 // stub will have to write it.
730 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
731 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
732 for(j=10;j>=3;j--)
733 {
734 for(r=1;r<=MAXREG;r++)
735 {
736 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
737 for(hr=0;hr<HOST_REGS;hr++) {
738 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
739 if(cur->regmap[hr]==r+64) {
740 cur->regmap[hr]=reg;
741 cur->dirty&=~(1<<hr);
742 cur->isconst&=~(1<<hr);
743 return;
744 }
745 }
746 }
747 for(hr=0;hr<HOST_REGS;hr++) {
748 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
749 if(cur->regmap[hr]==r) {
750 cur->regmap[hr]=reg;
751 cur->dirty&=~(1<<hr);
752 cur->isconst&=~(1<<hr);
753 return;
754 }
755 }
756 }
757 }
758 }
759 }
760 }
761 for(j=10;j>=0;j--)
762 {
763 for(r=1;r<=MAXREG;r++)
764 {
765 if(hsn[r]==j) {
766 for(hr=0;hr<HOST_REGS;hr++) {
767 if(cur->regmap[hr]==r+64) {
768 cur->regmap[hr]=reg;
769 cur->dirty&=~(1<<hr);
770 cur->isconst&=~(1<<hr);
771 return;
772 }
773 }
774 for(hr=0;hr<HOST_REGS;hr++) {
775 if(cur->regmap[hr]==r) {
776 cur->regmap[hr]=reg;
777 cur->dirty&=~(1<<hr);
778 cur->isconst&=~(1<<hr);
779 return;
780 }
781 }
782 }
783 }
784 }
785 printf("This shouldn't happen");exit(1);
786}
787// Allocate a specific ARM register.
788void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
789{
790 int n;
f776eb14 791 int dirty=0;
57871462 792
793 // see if it's already allocated (and dealloc it)
794 for(n=0;n<HOST_REGS;n++)
795 {
f776eb14 796 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
797 dirty=(cur->dirty>>n)&1;
798 cur->regmap[n]=-1;
799 }
57871462 800 }
801
802 cur->regmap[hr]=reg;
803 cur->dirty&=~(1<<hr);
f776eb14 804 cur->dirty|=dirty<<hr;
57871462 805 cur->isconst&=~(1<<hr);
806}
807
808// Alloc cycle count into dedicated register
809alloc_cc(struct regstat *cur,int i)
810{
811 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
812}
813
814/* Special alloc */
815
816
817/* Assembler */
818
819char regname[16][4] = {
820 "r0",
821 "r1",
822 "r2",
823 "r3",
824 "r4",
825 "r5",
826 "r6",
827 "r7",
828 "r8",
829 "r9",
830 "r10",
831 "fp",
832 "r12",
833 "sp",
834 "lr",
835 "pc"};
836
837void output_byte(u_char byte)
838{
839 *(out++)=byte;
840}
841void output_modrm(u_char mod,u_char rm,u_char ext)
842{
843 assert(mod<4);
844 assert(rm<8);
845 assert(ext<8);
846 u_char byte=(mod<<6)|(ext<<3)|rm;
847 *(out++)=byte;
848}
849void output_sib(u_char scale,u_char index,u_char base)
850{
851 assert(scale<4);
852 assert(index<8);
853 assert(base<8);
854 u_char byte=(scale<<6)|(index<<3)|base;
855 *(out++)=byte;
856}
857void output_w32(u_int word)
858{
859 *((u_int *)out)=word;
860 out+=4;
861}
862u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
863{
864 assert(rd<16);
865 assert(rn<16);
866 assert(rm<16);
867 return((rn<<16)|(rd<<12)|rm);
868}
869u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
870{
871 assert(rd<16);
872 assert(rn<16);
873 assert(imm<256);
874 assert((shift&1)==0);
875 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
876}
877u_int genimm(u_int imm,u_int *encoded)
878{
c2e3bd42 879 *encoded=0;
880 if(imm==0) return 1;
57871462 881 int i=32;
882 while(i>0)
883 {
884 if(imm<256) {
885 *encoded=((i&30)<<7)|imm;
886 return 1;
887 }
888 imm=(imm>>2)|(imm<<30);i-=2;
889 }
890 return 0;
891}
cfbd3c6e 892void genimm_checked(u_int imm,u_int *encoded)
893{
894 u_int ret=genimm(imm,encoded);
895 assert(ret);
896}
57871462 897u_int genjmp(u_int addr)
898{
899 int offset=addr-(int)out-8;
e80343e2 900 if(offset<-33554432||offset>=33554432) {
901 if (addr>2) {
902 printf("genjmp: out of range: %08x\n", offset);
903 exit(1);
904 }
905 return 0;
906 }
57871462 907 return ((u_int)offset>>2)&0xffffff;
908}
909
910void emit_mov(int rs,int rt)
911{
912 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
913 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
914}
915
916void emit_movs(int rs,int rt)
917{
918 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
919 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
920}
921
922void emit_add(int rs1,int rs2,int rt)
923{
924 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
926}
927
928void emit_adds(int rs1,int rs2,int rt)
929{
930 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_adcs(int rs1,int rs2,int rt)
935{
936 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
938}
939
940void emit_sbc(int rs1,int rs2,int rt)
941{
942 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
943 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
944}
945
946void emit_sbcs(int rs1,int rs2,int rt)
947{
948 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
949 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
950}
951
952void emit_neg(int rs, int rt)
953{
954 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
955 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
956}
957
958void emit_negs(int rs, int rt)
959{
960 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
961 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
962}
963
964void emit_sub(int rs1,int rs2,int rt)
965{
966 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
967 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
968}
969
970void emit_subs(int rs1,int rs2,int rt)
971{
972 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
973 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
974}
975
976void emit_zeroreg(int rt)
977{
978 assem_debug("mov %s,#0\n",regname[rt]);
979 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
980}
981
790ee18e 982void emit_loadlp(u_int imm,u_int rt)
983{
984 add_literal((int)out,imm);
985 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
986 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
987}
988void emit_movw(u_int imm,u_int rt)
989{
990 assert(imm<65536);
991 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
992 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
993}
994void emit_movt(u_int imm,u_int rt)
995{
996 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
997 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
998}
999void emit_movimm(u_int imm,u_int rt)
1000{
1001 u_int armval;
1002 if(genimm(imm,&armval)) {
1003 assem_debug("mov %s,#%d\n",regname[rt],imm);
1004 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
1005 }else if(genimm(~imm,&armval)) {
1006 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1007 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1008 }else if(imm<65536) {
665f33e1 1009 #ifndef HAVE_ARMV7
790ee18e 1010 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1011 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1012 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1013 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1014 #else
1015 emit_movw(imm,rt);
1016 #endif
1017 }else{
665f33e1 1018 #ifndef HAVE_ARMV7
790ee18e 1019 emit_loadlp(imm,rt);
1020 #else
1021 emit_movw(imm&0x0000FFFF,rt);
1022 emit_movt(imm&0xFFFF0000,rt);
1023 #endif
1024 }
1025}
1026void emit_pcreladdr(u_int rt)
1027{
1028 assem_debug("add %s,pc,#?\n",regname[rt]);
1029 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1030}
1031
57871462 1032void emit_loadreg(int r, int hr)
1033{
3d624f89 1034#ifdef FORCE32
1035 if(r&64) {
1036 printf("64bit load in 32bit mode!\n");
7f2607ea 1037 assert(0);
1038 return;
3d624f89 1039 }
1040#endif
57871462 1041 if((r&63)==0)
1042 emit_zeroreg(hr);
1043 else {
3d624f89 1044 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1045 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1046 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1047 if(r==CCREG) addr=(int)&cycle_count;
1048 if(r==CSREG) addr=(int)&Status;
1049 if(r==FSREG) addr=(int)&FCR31;
1050 if(r==INVCP) addr=(int)&invc_ptr;
1051 u_int offset = addr-(u_int)&dynarec_local;
1052 assert(offset<4096);
1053 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1054 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1055 }
1056}
1057void emit_storereg(int r, int hr)
1058{
3d624f89 1059#ifdef FORCE32
1060 if(r&64) {
1061 printf("64bit store in 32bit mode!\n");
7f2607ea 1062 assert(0);
1063 return;
3d624f89 1064 }
1065#endif
1066 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1067 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1068 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1069 if(r==CCREG) addr=(int)&cycle_count;
1070 if(r==FSREG) addr=(int)&FCR31;
1071 u_int offset = addr-(u_int)&dynarec_local;
1072 assert(offset<4096);
1073 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1074 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1075}
1076
1077void emit_test(int rs, int rt)
1078{
1079 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1080 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1081}
1082
1083void emit_testimm(int rs,int imm)
1084{
1085 u_int armval;
5a05d80c 1086 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1087 genimm_checked(imm,&armval);
57871462 1088 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1089}
1090
b9b61529 1091void emit_testeqimm(int rs,int imm)
1092{
1093 u_int armval;
1094 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1095 genimm_checked(imm,&armval);
b9b61529 1096 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1097}
1098
57871462 1099void emit_not(int rs,int rt)
1100{
1101 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1102 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1103}
1104
b9b61529 1105void emit_mvnmi(int rs,int rt)
1106{
1107 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1108 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1109}
1110
57871462 1111void emit_and(u_int rs1,u_int rs2,u_int rt)
1112{
1113 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1114 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1115}
1116
1117void emit_or(u_int rs1,u_int rs2,u_int rt)
1118{
1119 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1120 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1121}
1122void emit_or_and_set_flags(int rs1,int rs2,int rt)
1123{
1124 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1125 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1126}
1127
f70d384d 1128void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1129{
1130 assert(rs<16);
1131 assert(rt<16);
1132 assert(imm<32);
1133 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1134 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1135}
1136
576bbd8f 1137void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1138{
1139 assert(rs<16);
1140 assert(rt<16);
1141 assert(imm<32);
1142 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1143 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1144}
1145
57871462 1146void emit_xor(u_int rs1,u_int rs2,u_int rt)
1147{
1148 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1149 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1150}
1151
57871462 1152void emit_addimm(u_int rs,int imm,u_int rt)
1153{
1154 assert(rs<16);
1155 assert(rt<16);
1156 if(imm!=0) {
57871462 1157 u_int armval;
1158 if(genimm(imm,&armval)) {
1159 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1160 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1161 }else if(genimm(-imm,&armval)) {
8a0a8423 1162 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1163 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1164 }else if(imm<0) {
ffb0b9e0 1165 assert(imm>-65536);
57871462 1166 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1167 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1168 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1169 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1170 }else{
ffb0b9e0 1171 assert(imm<65536);
57871462 1172 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1173 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1174 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1175 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1176 }
1177 }
1178 else if(rs!=rt) emit_mov(rs,rt);
1179}
1180
1181void emit_addimm_and_set_flags(int imm,int rt)
1182{
1183 assert(imm>-65536&&imm<65536);
1184 u_int armval;
1185 if(genimm(imm,&armval)) {
1186 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1187 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1188 }else if(genimm(-imm,&armval)) {
1189 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1190 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1191 }else if(imm<0) {
1192 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1193 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1194 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1195 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1196 }else{
1197 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1198 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1199 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1200 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1201 }
1202}
1203void emit_addimm_no_flags(u_int imm,u_int rt)
1204{
1205 emit_addimm(rt,imm,rt);
1206}
1207
1208void emit_addnop(u_int r)
1209{
1210 assert(r<16);
1211 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1212 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1213}
1214
1215void emit_adcimm(u_int rs,int imm,u_int rt)
1216{
1217 u_int armval;
cfbd3c6e 1218 genimm_checked(imm,&armval);
57871462 1219 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1220 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1221}
1222/*void emit_sbcimm(int imm,u_int rt)
1223{
1224 u_int armval;
cfbd3c6e 1225 genimm_checked(imm,&armval);
57871462 1226 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1227 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1228}*/
1229void emit_sbbimm(int imm,u_int rt)
1230{
1231 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1232 assert(rt<8);
1233 if(imm<128&&imm>=-128) {
1234 output_byte(0x83);
1235 output_modrm(3,rt,3);
1236 output_byte(imm);
1237 }
1238 else
1239 {
1240 output_byte(0x81);
1241 output_modrm(3,rt,3);
1242 output_w32(imm);
1243 }
1244}
1245void emit_rscimm(int rs,int imm,u_int rt)
1246{
1247 assert(0);
1248 u_int armval;
cfbd3c6e 1249 genimm_checked(imm,&armval);
57871462 1250 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1251 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1252}
1253
1254void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1255{
1256 // TODO: if(genimm(imm,&armval)) ...
1257 // else
1258 emit_movimm(imm,HOST_TEMPREG);
1259 emit_adds(HOST_TEMPREG,rsl,rtl);
1260 emit_adcimm(rsh,0,rth);
1261}
1262
1263void emit_sbb(int rs1,int rs2)
1264{
1265 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1266 output_byte(0x19);
1267 output_modrm(3,rs1,rs2);
1268}
1269
1270void emit_andimm(int rs,int imm,int rt)
1271{
1272 u_int armval;
790ee18e 1273 if(imm==0) {
1274 emit_zeroreg(rt);
1275 }else if(genimm(imm,&armval)) {
57871462 1276 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1277 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1278 }else if(genimm(~imm,&armval)) {
1279 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1280 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1281 }else if(imm==65535) {
665f33e1 1282 #ifndef HAVE_ARMV7
57871462 1283 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1284 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1285 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1286 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1287 #else
1288 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1289 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1290 #endif
1291 }else{
1292 assert(imm>0&&imm<65535);
665f33e1 1293 #ifndef HAVE_ARMV7
57871462 1294 assem_debug("mov r14,#%d\n",imm&0xFF00);
1295 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1296 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1297 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1298 #else
1299 emit_movw(imm,HOST_TEMPREG);
1300 #endif
1301 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1302 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1303 }
1304}
1305
1306void emit_orimm(int rs,int imm,int rt)
1307{
1308 u_int armval;
790ee18e 1309 if(imm==0) {
1310 if(rs!=rt) emit_mov(rs,rt);
1311 }else if(genimm(imm,&armval)) {
57871462 1312 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1313 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1314 }else{
1315 assert(imm>0&&imm<65536);
1316 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1317 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1318 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1319 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1320 }
1321}
1322
1323void emit_xorimm(int rs,int imm,int rt)
1324{
57871462 1325 u_int armval;
790ee18e 1326 if(imm==0) {
1327 if(rs!=rt) emit_mov(rs,rt);
1328 }else if(genimm(imm,&armval)) {
57871462 1329 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1330 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1331 }else{
514ed0d9 1332 assert(imm>0&&imm<65536);
57871462 1333 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1334 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1335 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1336 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1337 }
1338}
1339
1340void emit_shlimm(int rs,u_int imm,int rt)
1341{
1342 assert(imm>0);
1343 assert(imm<32);
1344 //if(imm==1) ...
1345 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1346 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1347}
1348
c6c3b1b3 1349void emit_lsls_imm(int rs,int imm,int rt)
1350{
1351 assert(imm>0);
1352 assert(imm<32);
1353 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1354 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1355}
1356
665f33e1 1357void emit_lslpls_imm(int rs,int imm,int rt)
1358{
1359 assert(imm>0);
1360 assert(imm<32);
1361 assem_debug("lslpls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1362 output_w32(0x51b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1363}
1364
57871462 1365void emit_shrimm(int rs,u_int imm,int rt)
1366{
1367 assert(imm>0);
1368 assert(imm<32);
1369 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1370 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1371}
1372
1373void emit_sarimm(int rs,u_int imm,int rt)
1374{
1375 assert(imm>0);
1376 assert(imm<32);
1377 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1378 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1379}
1380
1381void emit_rorimm(int rs,u_int imm,int rt)
1382{
1383 assert(imm>0);
1384 assert(imm<32);
1385 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1386 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1387}
1388
1389void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1390{
1391 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1392 assert(imm>0);
1393 assert(imm<32);
1394 //if(imm==1) ...
1395 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1396 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1397 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1398 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1399}
1400
1401void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1402{
1403 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1404 assert(imm>0);
1405 assert(imm<32);
1406 //if(imm==1) ...
1407 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1408 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1409 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1410 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1411}
1412
b9b61529 1413void emit_signextend16(int rs,int rt)
1414{
665f33e1 1415 #ifndef HAVE_ARMV7
b9b61529 1416 emit_shlimm(rs,16,rt);
1417 emit_sarimm(rt,16,rt);
1418 #else
1419 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1420 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1421 #endif
1422}
1423
c6c3b1b3 1424void emit_signextend8(int rs,int rt)
1425{
665f33e1 1426 #ifndef HAVE_ARMV7
c6c3b1b3 1427 emit_shlimm(rs,24,rt);
1428 emit_sarimm(rt,24,rt);
1429 #else
1430 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1431 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1432 #endif
1433}
1434
57871462 1435void emit_shl(u_int rs,u_int shift,u_int rt)
1436{
1437 assert(rs<16);
1438 assert(rt<16);
1439 assert(shift<16);
1440 //if(imm==1) ...
1441 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1442 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1443}
1444void emit_shr(u_int rs,u_int shift,u_int rt)
1445{
1446 assert(rs<16);
1447 assert(rt<16);
1448 assert(shift<16);
1449 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1450 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1451}
1452void emit_sar(u_int rs,u_int shift,u_int rt)
1453{
1454 assert(rs<16);
1455 assert(rt<16);
1456 assert(shift<16);
1457 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1458 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1459}
1460void emit_shlcl(int r)
1461{
1462 assem_debug("shl %%%s,%%cl\n",regname[r]);
1463 assert(0);
1464}
1465void emit_shrcl(int r)
1466{
1467 assem_debug("shr %%%s,%%cl\n",regname[r]);
1468 assert(0);
1469}
1470void emit_sarcl(int r)
1471{
1472 assem_debug("sar %%%s,%%cl\n",regname[r]);
1473 assert(0);
1474}
1475
1476void emit_shldcl(int r1,int r2)
1477{
1478 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1479 assert(0);
1480}
1481void emit_shrdcl(int r1,int r2)
1482{
1483 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1484 assert(0);
1485}
1486void emit_orrshl(u_int rs,u_int shift,u_int rt)
1487{
1488 assert(rs<16);
1489 assert(rt<16);
1490 assert(shift<16);
1491 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1492 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1493}
1494void emit_orrshr(u_int rs,u_int shift,u_int rt)
1495{
1496 assert(rs<16);
1497 assert(rt<16);
1498 assert(shift<16);
1499 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1500 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1501}
1502
1503void emit_cmpimm(int rs,int imm)
1504{
1505 u_int armval;
1506 if(genimm(imm,&armval)) {
5a05d80c 1507 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1508 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1509 }else if(genimm(-imm,&armval)) {
5a05d80c 1510 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1511 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1512 }else if(imm>0) {
1513 assert(imm<65536);
57871462 1514 emit_movimm(imm,HOST_TEMPREG);
57871462 1515 assem_debug("cmp %s,r14\n",regname[rs]);
1516 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1517 }else{
1518 assert(imm>-65536);
57871462 1519 emit_movimm(-imm,HOST_TEMPREG);
57871462 1520 assem_debug("cmn %s,r14\n",regname[rs]);
1521 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1522 }
1523}
1524
1525void emit_cmovne(u_int *addr,int rt)
1526{
1527 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1528 assert(0);
1529}
1530void emit_cmovl(u_int *addr,int rt)
1531{
1532 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1533 assert(0);
1534}
1535void emit_cmovs(u_int *addr,int rt)
1536{
1537 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1538 assert(0);
1539}
1540void emit_cmovne_imm(int imm,int rt)
1541{
1542 assem_debug("movne %s,#%d\n",regname[rt],imm);
1543 u_int armval;
cfbd3c6e 1544 genimm_checked(imm,&armval);
57871462 1545 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1546}
1547void emit_cmovl_imm(int imm,int rt)
1548{
1549 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1550 u_int armval;
cfbd3c6e 1551 genimm_checked(imm,&armval);
57871462 1552 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1553}
1554void emit_cmovb_imm(int imm,int rt)
1555{
1556 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1557 u_int armval;
cfbd3c6e 1558 genimm_checked(imm,&armval);
57871462 1559 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1560}
1561void emit_cmovs_imm(int imm,int rt)
1562{
1563 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1564 u_int armval;
cfbd3c6e 1565 genimm_checked(imm,&armval);
57871462 1566 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1567}
1568void emit_cmove_reg(int rs,int rt)
1569{
1570 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1571 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1572}
1573void emit_cmovne_reg(int rs,int rt)
1574{
1575 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1576 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1577}
1578void emit_cmovl_reg(int rs,int rt)
1579{
1580 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1581 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1582}
1583void emit_cmovs_reg(int rs,int rt)
1584{
1585 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1586 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1587}
1588
1589void emit_slti32(int rs,int imm,int rt)
1590{
1591 if(rs!=rt) emit_zeroreg(rt);
1592 emit_cmpimm(rs,imm);
1593 if(rs==rt) emit_movimm(0,rt);
1594 emit_cmovl_imm(1,rt);
1595}
1596void emit_sltiu32(int rs,int imm,int rt)
1597{
1598 if(rs!=rt) emit_zeroreg(rt);
1599 emit_cmpimm(rs,imm);
1600 if(rs==rt) emit_movimm(0,rt);
1601 emit_cmovb_imm(1,rt);
1602}
1603void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1604{
1605 assert(rsh!=rt);
1606 emit_slti32(rsl,imm,rt);
1607 if(imm>=0)
1608 {
1609 emit_test(rsh,rsh);
1610 emit_cmovne_imm(0,rt);
1611 emit_cmovs_imm(1,rt);
1612 }
1613 else
1614 {
1615 emit_cmpimm(rsh,-1);
1616 emit_cmovne_imm(0,rt);
1617 emit_cmovl_imm(1,rt);
1618 }
1619}
1620void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1621{
1622 assert(rsh!=rt);
1623 emit_sltiu32(rsl,imm,rt);
1624 if(imm>=0)
1625 {
1626 emit_test(rsh,rsh);
1627 emit_cmovne_imm(0,rt);
1628 }
1629 else
1630 {
1631 emit_cmpimm(rsh,-1);
1632 emit_cmovne_imm(1,rt);
1633 }
1634}
1635
1636void emit_cmp(int rs,int rt)
1637{
1638 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1639 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1640}
1641void emit_set_gz32(int rs, int rt)
1642{
1643 //assem_debug("set_gz32\n");
1644 emit_cmpimm(rs,1);
1645 emit_movimm(1,rt);
1646 emit_cmovl_imm(0,rt);
1647}
1648void emit_set_nz32(int rs, int rt)
1649{
1650 //assem_debug("set_nz32\n");
1651 if(rs!=rt) emit_movs(rs,rt);
1652 else emit_test(rs,rs);
1653 emit_cmovne_imm(1,rt);
1654}
1655void emit_set_gz64_32(int rsh, int rsl, int rt)
1656{
1657 //assem_debug("set_gz64\n");
1658 emit_set_gz32(rsl,rt);
1659 emit_test(rsh,rsh);
1660 emit_cmovne_imm(1,rt);
1661 emit_cmovs_imm(0,rt);
1662}
1663void emit_set_nz64_32(int rsh, int rsl, int rt)
1664{
1665 //assem_debug("set_nz64\n");
1666 emit_or_and_set_flags(rsh,rsl,rt);
1667 emit_cmovne_imm(1,rt);
1668}
1669void emit_set_if_less32(int rs1, int rs2, int rt)
1670{
1671 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1672 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1673 emit_cmp(rs1,rs2);
1674 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1675 emit_cmovl_imm(1,rt);
1676}
1677void emit_set_if_carry32(int rs1, int rs2, int rt)
1678{
1679 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1680 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1681 emit_cmp(rs1,rs2);
1682 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1683 emit_cmovb_imm(1,rt);
1684}
1685void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1686{
1687 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1688 assert(u1!=rt);
1689 assert(u2!=rt);
1690 emit_cmp(l1,l2);
1691 emit_movimm(0,rt);
1692 emit_sbcs(u1,u2,HOST_TEMPREG);
1693 emit_cmovl_imm(1,rt);
1694}
1695void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1696{
1697 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1698 assert(u1!=rt);
1699 assert(u2!=rt);
1700 emit_cmp(l1,l2);
1701 emit_movimm(0,rt);
1702 emit_sbcs(u1,u2,HOST_TEMPREG);
1703 emit_cmovb_imm(1,rt);
1704}
1705
1706void emit_call(int a)
1707{
1708 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1709 u_int offset=genjmp(a);
1710 output_w32(0xeb000000|offset);
1711}
1712void emit_jmp(int a)
1713{
1714 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1715 u_int offset=genjmp(a);
1716 output_w32(0xea000000|offset);
1717}
1718void emit_jne(int a)
1719{
1720 assem_debug("bne %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x1a000000|offset);
1723}
1724void emit_jeq(int a)
1725{
1726 assem_debug("beq %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x0a000000|offset);
1729}
1730void emit_js(int a)
1731{
1732 assem_debug("bmi %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x4a000000|offset);
1735}
1736void emit_jns(int a)
1737{
1738 assem_debug("bpl %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0x5a000000|offset);
1741}
1742void emit_jl(int a)
1743{
1744 assem_debug("blt %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0xba000000|offset);
1747}
1748void emit_jge(int a)
1749{
1750 assem_debug("bge %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0xaa000000|offset);
1753}
1754void emit_jno(int a)
1755{
1756 assem_debug("bvc %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0x7a000000|offset);
1759}
1760void emit_jc(int a)
1761{
1762 assem_debug("bcs %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x2a000000|offset);
1765}
1766void emit_jcc(int a)
1767{
1768 assem_debug("bcc %x\n",a);
1769 u_int offset=genjmp(a);
1770 output_w32(0x3a000000|offset);
1771}
1772
1773void emit_pushimm(int imm)
1774{
1775 assem_debug("push $%x\n",imm);
1776 assert(0);
1777}
1778void emit_pusha()
1779{
1780 assem_debug("pusha\n");
1781 assert(0);
1782}
1783void emit_popa()
1784{
1785 assem_debug("popa\n");
1786 assert(0);
1787}
1788void emit_pushreg(u_int r)
1789{
1790 assem_debug("push %%%s\n",regname[r]);
1791 assert(0);
1792}
1793void emit_popreg(u_int r)
1794{
1795 assem_debug("pop %%%s\n",regname[r]);
1796 assert(0);
1797}
1798void emit_callreg(u_int r)
1799{
c6c3b1b3 1800 assert(r<15);
1801 assem_debug("blx %s\n",regname[r]);
1802 output_w32(0xe12fff30|r);
57871462 1803}
1804void emit_jmpreg(u_int r)
1805{
1806 assem_debug("mov pc,%s\n",regname[r]);
1807 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1808}
1809
1810void emit_readword_indexed(int offset, int rs, int rt)
1811{
1812 assert(offset>-4096&&offset<4096);
1813 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1814 if(offset>=0) {
1815 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1816 }else{
1817 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1818 }
1819}
1820void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1821{
1822 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1823 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1824}
c6c3b1b3 1825void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1826{
1827 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1828 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1829}
1830void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1831{
1832 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1833 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1834}
1835void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1836{
1837 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1838 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1839}
1840void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1841{
1842 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1843 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1844}
1845void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1846{
1847 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1848 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1849}
57871462 1850void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1851{
1852 if(map<0) emit_readword_indexed(addr, rs, rt);
1853 else {
1854 assert(addr==0);
1855 emit_readword_dualindexedx4(rs, map, rt);
1856 }
1857}
1858void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1859{
1860 if(map<0) {
1861 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1862 emit_readword_indexed(addr+4, rs, rl);
1863 }else{
1864 assert(rh!=rs);
1865 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1866 emit_addimm(map,1,map);
1867 emit_readword_indexed_tlb(addr, rs, map, rl);
1868 }
1869}
1870void emit_movsbl_indexed(int offset, int rs, int rt)
1871{
1872 assert(offset>-256&&offset<256);
1873 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1874 if(offset>=0) {
1875 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1876 }else{
1877 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1878 }
1879}
1880void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1881{
1882 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1883 else {
1884 if(addr==0) {
1885 emit_shlimm(map,2,map);
1886 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1887 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1888 }else{
1889 assert(addr>-256&&addr<256);
1890 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1891 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1892 emit_movsbl_indexed(addr, rt, rt);
1893 }
1894 }
1895}
1896void emit_movswl_indexed(int offset, int rs, int rt)
1897{
1898 assert(offset>-256&&offset<256);
1899 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1900 if(offset>=0) {
1901 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1902 }else{
1903 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1904 }
1905}
1906void emit_movzbl_indexed(int offset, int rs, int rt)
1907{
1908 assert(offset>-4096&&offset<4096);
1909 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1910 if(offset>=0) {
1911 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1912 }else{
1913 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1914 }
1915}
1916void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1917{
1918 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1919 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1920}
1921void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1922{
1923 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1924 else {
1925 if(addr==0) {
1926 emit_movzbl_dualindexedx4(rs, map, rt);
1927 }else{
1928 emit_addimm(rs,addr,rt);
1929 emit_movzbl_dualindexedx4(rt, map, rt);
1930 }
1931 }
1932}
1933void emit_movzwl_indexed(int offset, int rs, int rt)
1934{
1935 assert(offset>-256&&offset<256);
1936 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1937 if(offset>=0) {
1938 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1939 }else{
1940 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1941 }
1942}
054175e9 1943static void emit_ldrd(int offset, int rs, int rt)
1944{
1945 assert(offset>-256&&offset<256);
1946 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1947 if(offset>=0) {
1948 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1949 }else{
1950 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1951 }
1952}
57871462 1953void emit_readword(int addr, int rt)
1954{
1955 u_int offset = addr-(u_int)&dynarec_local;
1956 assert(offset<4096);
1957 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1958 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1959}
1960void emit_movsbl(int addr, int rt)
1961{
1962 u_int offset = addr-(u_int)&dynarec_local;
1963 assert(offset<256);
1964 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1965 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1966}
1967void emit_movswl(int addr, int rt)
1968{
1969 u_int offset = addr-(u_int)&dynarec_local;
1970 assert(offset<256);
1971 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1972 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1973}
1974void emit_movzbl(int addr, int rt)
1975{
1976 u_int offset = addr-(u_int)&dynarec_local;
1977 assert(offset<4096);
1978 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1979 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1980}
1981void emit_movzwl(int addr, int rt)
1982{
1983 u_int offset = addr-(u_int)&dynarec_local;
1984 assert(offset<256);
1985 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1986 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1987}
1988void emit_movzwl_reg(int rs, int rt)
1989{
1990 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1991 assert(0);
1992}
1993
1994void emit_xchg(int rs, int rt)
1995{
1996 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1997 assert(0);
1998}
1999void emit_writeword_indexed(int rt, int offset, int rs)
2000{
2001 assert(offset>-4096&&offset<4096);
2002 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
2003 if(offset>=0) {
2004 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
2005 }else{
2006 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2007 }
2008}
2009void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2010{
2011 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2012 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2013}
2014void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2015{
2016 if(map<0) emit_writeword_indexed(rt, addr, rs);
2017 else {
2018 assert(addr==0);
2019 emit_writeword_dualindexedx4(rt, rs, map);
2020 }
2021}
2022void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2023{
2024 if(map<0) {
2025 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2026 emit_writeword_indexed(rl, addr+4, rs);
2027 }else{
2028 assert(rh>=0);
2029 if(temp!=rs) emit_addimm(map,1,temp);
2030 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2031 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2032 else {
2033 emit_addimm(rs,4,rs);
2034 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2035 }
2036 }
2037}
2038void emit_writehword_indexed(int rt, int offset, int rs)
2039{
2040 assert(offset>-256&&offset<256);
2041 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2042 if(offset>=0) {
2043 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2044 }else{
2045 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2046 }
2047}
2048void emit_writebyte_indexed(int rt, int offset, int rs)
2049{
2050 assert(offset>-4096&&offset<4096);
2051 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2052 if(offset>=0) {
2053 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2054 }else{
2055 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2056 }
2057}
2058void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2059{
2060 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2061 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2062}
2063void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2064{
2065 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2066 else {
2067 if(addr==0) {
2068 emit_writebyte_dualindexedx4(rt, rs, map);
2069 }else{
2070 emit_addimm(rs,addr,temp);
2071 emit_writebyte_dualindexedx4(rt, temp, map);
2072 }
2073 }
2074}
b96d3df7 2075void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2076{
2077 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2078 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2079}
2080void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2081{
2082 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2083 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2084}
2085void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2086{
2087 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2088 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2089}
57871462 2090void emit_writeword(int rt, int addr)
2091{
2092 u_int offset = addr-(u_int)&dynarec_local;
2093 assert(offset<4096);
2094 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2095 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2096}
2097void emit_writehword(int rt, int addr)
2098{
2099 u_int offset = addr-(u_int)&dynarec_local;
2100 assert(offset<256);
2101 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2102 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2103}
2104void emit_writebyte(int rt, int addr)
2105{
2106 u_int offset = addr-(u_int)&dynarec_local;
2107 assert(offset<4096);
74426039 2108 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2109 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2110}
2111void emit_writeword_imm(int imm, int addr)
2112{
2113 assem_debug("movl $%x,%x\n",imm,addr);
2114 assert(0);
2115}
2116void emit_writebyte_imm(int imm, int addr)
2117{
2118 assem_debug("movb $%x,%x\n",imm,addr);
2119 assert(0);
2120}
2121
2122void emit_mul(int rs)
2123{
2124 assem_debug("mul %%%s\n",regname[rs]);
2125 assert(0);
2126}
2127void emit_imul(int rs)
2128{
2129 assem_debug("imul %%%s\n",regname[rs]);
2130 assert(0);
2131}
2132void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2133{
2134 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2135 assert(rs1<16);
2136 assert(rs2<16);
2137 assert(hi<16);
2138 assert(lo<16);
2139 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2140}
2141void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2142{
2143 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2144 assert(rs1<16);
2145 assert(rs2<16);
2146 assert(hi<16);
2147 assert(lo<16);
2148 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2149}
2150
2151void emit_div(int rs)
2152{
2153 assem_debug("div %%%s\n",regname[rs]);
2154 assert(0);
2155}
2156void emit_idiv(int rs)
2157{
2158 assem_debug("idiv %%%s\n",regname[rs]);
2159 assert(0);
2160}
2161void emit_cdq()
2162{
2163 assem_debug("cdq\n");
2164 assert(0);
2165}
2166
2167void emit_clz(int rs,int rt)
2168{
2169 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2170 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2171}
2172
2173void emit_subcs(int rs1,int rs2,int rt)
2174{
2175 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2176 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2177}
2178
2179void emit_shrcc_imm(int rs,u_int imm,int rt)
2180{
2181 assert(imm>0);
2182 assert(imm<32);
2183 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2184 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2185}
2186
b1be1eee 2187void emit_shrne_imm(int rs,u_int imm,int rt)
2188{
2189 assert(imm>0);
2190 assert(imm<32);
2191 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2192 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2193}
2194
57871462 2195void emit_negmi(int rs, int rt)
2196{
2197 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2198 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2199}
2200
2201void emit_negsmi(int rs, int rt)
2202{
2203 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2204 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2205}
2206
2207void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2208{
2209 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2210 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2211}
2212
2213void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2214{
2215 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2216 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2217}
2218
2219void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2220{
2221 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2223}
2224
2225void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2226{
2227 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2228 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2229}
2230
2231void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2232{
2233 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2234 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2235}
2236
2237void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2241}
2242
2243void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2244{
2245 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2246 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2247}
2248
2249void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2250{
2251 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2252 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2253}
2254
2255void emit_teq(int rs, int rt)
2256{
2257 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2258 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2259}
2260
2261void emit_rsbimm(int rs, int imm, int rt)
2262{
2263 u_int armval;
cfbd3c6e 2264 genimm_checked(imm,&armval);
57871462 2265 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2266 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2267}
2268
2269// Load 2 immediates optimizing for small code size
2270void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2271{
2272 emit_movimm(imm1,rt1);
2273 u_int armval;
2274 if(genimm(imm2-imm1,&armval)) {
2275 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2276 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2277 }else if(genimm(imm1-imm2,&armval)) {
2278 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2279 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2280 }
2281 else emit_movimm(imm2,rt2);
2282}
2283
2284// Conditionally select one of two immediates, optimizing for small code size
2285// This will only be called if HAVE_CMOV_IMM is defined
2286void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2287{
2288 u_int armval;
2289 if(genimm(imm2-imm1,&armval)) {
2290 emit_movimm(imm1,rt);
2291 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2292 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2293 }else if(genimm(imm1-imm2,&armval)) {
2294 emit_movimm(imm1,rt);
2295 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2296 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2297 }
2298 else {
665f33e1 2299 #ifndef HAVE_ARMV7
57871462 2300 emit_movimm(imm1,rt);
2301 add_literal((int)out,imm2);
2302 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2303 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2304 #else
2305 emit_movw(imm1&0x0000FFFF,rt);
2306 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2307 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2308 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2309 }
2310 emit_movt(imm1&0xFFFF0000,rt);
2311 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2312 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2313 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2314 }
2315 #endif
2316 }
2317}
2318
2319// special case for checking invalid_code
2320void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2321{
2322 assert(0);
2323}
2324
2325// special case for checking invalid_code
2326void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2327{
2328 assert(imm<128&&imm>=0);
2329 assert(r>=0&&r<16);
2330 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2331 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2332 emit_cmpimm(HOST_TEMPREG,imm);
2333}
2334
2335// special case for tlb mapping
2336void emit_addsr12(int rs1,int rs2,int rt)
2337{
2338 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2339 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2340}
2341
0bbd1454 2342void emit_callne(int a)
2343{
2344 assem_debug("blne %x\n",a);
2345 u_int offset=genjmp(a);
2346 output_w32(0x1b000000|offset);
2347}
2348
57871462 2349// Used to preload hash table entries
2350void emit_prefetch(void *addr)
2351{
2352 assem_debug("prefetch %x\n",(int)addr);
2353 output_byte(0x0F);
2354 output_byte(0x18);
2355 output_modrm(0,5,1);
2356 output_w32((int)addr);
2357}
2358void emit_prefetchreg(int r)
2359{
2360 assem_debug("pld %s\n",regname[r]);
2361 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2362}
2363
2364// Special case for mini_ht
2365void emit_ldreq_indexed(int rs, u_int offset, int rt)
2366{
2367 assert(offset<4096);
2368 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2369 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2370}
2371
2372void emit_flds(int r,int sr)
2373{
2374 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2375 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2376}
2377
2378void emit_vldr(int r,int vr)
2379{
2380 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2381 output_w32(0xed900b00|(vr<<12)|(r<<16));
2382}
2383
2384void emit_fsts(int sr,int r)
2385{
2386 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2387 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2388}
2389
2390void emit_vstr(int vr,int r)
2391{
2392 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2393 output_w32(0xed800b00|(vr<<12)|(r<<16));
2394}
2395
2396void emit_ftosizs(int s,int d)
2397{
2398 assem_debug("ftosizs s%d,s%d\n",d,s);
2399 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2400}
2401
2402void emit_ftosizd(int s,int d)
2403{
2404 assem_debug("ftosizd s%d,d%d\n",d,s);
2405 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2406}
2407
2408void emit_fsitos(int s,int d)
2409{
2410 assem_debug("fsitos s%d,s%d\n",d,s);
2411 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2412}
2413
2414void emit_fsitod(int s,int d)
2415{
2416 assem_debug("fsitod d%d,s%d\n",d,s);
2417 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2418}
2419
2420void emit_fcvtds(int s,int d)
2421{
2422 assem_debug("fcvtds d%d,s%d\n",d,s);
2423 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2424}
2425
2426void emit_fcvtsd(int s,int d)
2427{
2428 assem_debug("fcvtsd s%d,d%d\n",d,s);
2429 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2430}
2431
2432void emit_fsqrts(int s,int d)
2433{
2434 assem_debug("fsqrts d%d,s%d\n",d,s);
2435 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2436}
2437
2438void emit_fsqrtd(int s,int d)
2439{
2440 assem_debug("fsqrtd s%d,d%d\n",d,s);
2441 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2442}
2443
2444void emit_fabss(int s,int d)
2445{
2446 assem_debug("fabss d%d,s%d\n",d,s);
2447 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2448}
2449
2450void emit_fabsd(int s,int d)
2451{
2452 assem_debug("fabsd s%d,d%d\n",d,s);
2453 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2454}
2455
2456void emit_fnegs(int s,int d)
2457{
2458 assem_debug("fnegs d%d,s%d\n",d,s);
2459 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2460}
2461
2462void emit_fnegd(int s,int d)
2463{
2464 assem_debug("fnegd s%d,d%d\n",d,s);
2465 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2466}
2467
2468void emit_fadds(int s1,int s2,int d)
2469{
2470 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2471 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2472}
2473
2474void emit_faddd(int s1,int s2,int d)
2475{
2476 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2477 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2478}
2479
2480void emit_fsubs(int s1,int s2,int d)
2481{
2482 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2483 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2484}
2485
2486void emit_fsubd(int s1,int s2,int d)
2487{
2488 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2489 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2490}
2491
2492void emit_fmuls(int s1,int s2,int d)
2493{
2494 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2495 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2496}
2497
2498void emit_fmuld(int s1,int s2,int d)
2499{
2500 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2501 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2502}
2503
2504void emit_fdivs(int s1,int s2,int d)
2505{
2506 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2507 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2508}
2509
2510void emit_fdivd(int s1,int s2,int d)
2511{
2512 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2513 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2514}
2515
2516void emit_fcmps(int x,int y)
2517{
2518 assem_debug("fcmps s14, s15\n");
2519 output_w32(0xeeb47a67);
2520}
2521
2522void emit_fcmpd(int x,int y)
2523{
2524 assem_debug("fcmpd d6, d7\n");
2525 output_w32(0xeeb46b47);
2526}
2527
2528void emit_fmstat()
2529{
2530 assem_debug("fmstat\n");
2531 output_w32(0xeef1fa10);
2532}
2533
2534void emit_bicne_imm(int rs,int imm,int rt)
2535{
2536 u_int armval;
cfbd3c6e 2537 genimm_checked(imm,&armval);
57871462 2538 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2539 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2540}
2541
2542void emit_biccs_imm(int rs,int imm,int rt)
2543{
2544 u_int armval;
cfbd3c6e 2545 genimm_checked(imm,&armval);
57871462 2546 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2547 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2548}
2549
2550void emit_bicvc_imm(int rs,int imm,int rt)
2551{
2552 u_int armval;
cfbd3c6e 2553 genimm_checked(imm,&armval);
57871462 2554 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2555 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2556}
2557
2558void emit_bichi_imm(int rs,int imm,int rt)
2559{
2560 u_int armval;
cfbd3c6e 2561 genimm_checked(imm,&armval);
57871462 2562 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2563 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2564}
2565
2566void emit_orrvs_imm(int rs,int imm,int rt)
2567{
2568 u_int armval;
cfbd3c6e 2569 genimm_checked(imm,&armval);
57871462 2570 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2571 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2572}
2573
b9b61529 2574void emit_orrne_imm(int rs,int imm,int rt)
2575{
2576 u_int armval;
cfbd3c6e 2577 genimm_checked(imm,&armval);
b9b61529 2578 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2579 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2580}
2581
2582void emit_andne_imm(int rs,int imm,int rt)
2583{
2584 u_int armval;
cfbd3c6e 2585 genimm_checked(imm,&armval);
b9b61529 2586 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2587 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2588}
2589
665f33e1 2590void emit_addpl_imm(int rs,int imm,int rt)
2591{
2592 u_int armval;
2593 genimm_checked(imm,&armval);
2594 assem_debug("addpl %s,%s,#%d\n",regname[rt],regname[rs],imm);
2595 output_w32(0x52800000|rd_rn_rm(rt,rs,0)|armval);
2596}
2597
57871462 2598void emit_jno_unlikely(int a)
2599{
2600 //emit_jno(a);
2601 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2602 output_w32(0x72800000|rd_rn_rm(15,15,0));
2603}
2604
054175e9 2605static void save_regs_all(u_int reglist)
57871462 2606{
054175e9 2607 int i;
57871462 2608 if(!reglist) return;
2609 assem_debug("stmia fp,{");
054175e9 2610 for(i=0;i<16;i++)
2611 if(reglist&(1<<i))
2612 assem_debug("r%d,",i);
57871462 2613 assem_debug("}\n");
2614 output_w32(0xe88b0000|reglist);
2615}
054175e9 2616static void restore_regs_all(u_int reglist)
57871462 2617{
054175e9 2618 int i;
57871462 2619 if(!reglist) return;
2620 assem_debug("ldmia fp,{");
054175e9 2621 for(i=0;i<16;i++)
2622 if(reglist&(1<<i))
2623 assem_debug("r%d,",i);
57871462 2624 assem_debug("}\n");
2625 output_w32(0xe89b0000|reglist);
2626}
054175e9 2627// Save registers before function call
2628static void save_regs(u_int reglist)
2629{
2630 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2631 save_regs_all(reglist);
2632}
2633// Restore registers after function call
2634static void restore_regs(u_int reglist)
2635{
2636 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2637 restore_regs_all(reglist);
2638}
57871462 2639
2640// Write back consts using r14 so we don't disturb the other registers
2641void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2642{
2643 int hr;
2644 for(hr=0;hr<HOST_REGS;hr++) {
2645 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2646 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2647 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2648 int value=constmap[i][hr];
2649 if(value==0) {
2650 emit_zeroreg(HOST_TEMPREG);
2651 }
2652 else {
2653 emit_movimm(value,HOST_TEMPREG);
2654 }
2655 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2656#ifndef FORCE32
57871462 2657 if((i_is32>>i_regmap[hr])&1) {
2658 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2659 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2660 }
24385cae 2661#endif
57871462 2662 }
2663 }
2664 }
2665 }
2666}
2667
2668/* Stubs/epilogue */
2669
2670void literal_pool(int n)
2671{
2672 if(!literalcount) return;
2673 if(n) {
2674 if((int)out-literals[0][0]<4096-n) return;
2675 }
2676 u_int *ptr;
2677 int i;
2678 for(i=0;i<literalcount;i++)
2679 {
77750690 2680 u_int l_addr=(u_int)out;
2681 int j;
2682 for(j=0;j<i;j++) {
2683 if(literals[j][1]==literals[i][1]) {
2684 //printf("dup %08x\n",literals[i][1]);
2685 l_addr=literals[j][0];
2686 break;
2687 }
2688 }
57871462 2689 ptr=(u_int *)literals[i][0];
77750690 2690 u_int offset=l_addr-(u_int)ptr-8;
57871462 2691 assert(offset<4096);
2692 assert(!(offset&3));
2693 *ptr|=offset;
77750690 2694 if(l_addr==(u_int)out) {
2695 literals[i][0]=l_addr; // remember for dupes
2696 output_w32(literals[i][1]);
2697 }
57871462 2698 }
2699 literalcount=0;
2700}
2701
2702void literal_pool_jumpover(int n)
2703{
2704 if(!literalcount) return;
2705 if(n) {
2706 if((int)out-literals[0][0]<4096-n) return;
2707 }
2708 int jaddr=(int)out;
2709 emit_jmp(0);
2710 literal_pool(0);
2711 set_jump_target(jaddr,(int)out);
2712}
2713
c67af2ac 2714emit_extjump2(u_int addr, int target, int linker)
57871462 2715{
2716 u_char *ptr=(u_char *)addr;
2717 assert((ptr[3]&0x0e)==0xa);
2718 emit_loadlp(target,0);
2719 emit_loadlp(addr,1);
24385cae 2720 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2721 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2722//DEBUG >
2723#ifdef DEBUG_CYCLE_COUNT
2724 emit_readword((int)&last_count,ECX);
2725 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2726 emit_readword((int)&next_interupt,ECX);
2727 emit_writeword(HOST_CCREG,(int)&Count);
2728 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2729 emit_writeword(ECX,(int)&last_count);
2730#endif
2731//DEBUG <
2732 emit_jmp(linker);
2733}
2734
2735emit_extjump(int addr, int target)
2736{
2737 emit_extjump2(addr, target, (int)dyna_linker);
2738}
2739emit_extjump_ds(int addr, int target)
2740{
2741 emit_extjump2(addr, target, (int)dyna_linker_ds);
2742}
2743
13e35c04 2744// put rt_val into rt, potentially making use of rs with value rs_val
2745static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2746{
8575a877 2747 u_int armval;
2748 int diff;
2749 if(genimm(rt_val,&armval)) {
2750 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2751 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2752 return;
2753 }
2754 if(genimm(~rt_val,&armval)) {
2755 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2756 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2757 return;
2758 }
2759 diff=rt_val-rs_val;
2760 if(genimm(diff,&armval)) {
2761 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2762 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2763 return;
2764 }else if(genimm(-diff,&armval)) {
2765 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2766 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2767 return;
2768 }
2769 emit_movimm(rt_val,rt);
2770}
2771
2772// return 1 if above function can do it's job cheaply
2773static int is_similar_value(u_int v1,u_int v2)
2774{
13e35c04 2775 u_int xs;
8575a877 2776 int diff;
2777 if(v1==v2) return 1;
2778 diff=v2-v1;
2779 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2780 ;
8575a877 2781 if(xs<0x100) return 1;
2782 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2783 ;
2784 if(xs<0x100) return 1;
2785 return 0;
13e35c04 2786}
cbbab9cd 2787
b96d3df7 2788// trashes r2
2789static void pass_args(int a0, int a1)
2790{
2791 if(a0==1&&a1==0) {
2792 // must swap
2793 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2794 }
2795 else if(a0!=0&&a1==0) {
2796 emit_mov(a1,1);
2797 if (a0>=0) emit_mov(a0,0);
2798 }
2799 else {
2800 if(a0>=0&&a0!=0) emit_mov(a0,0);
2801 if(a1>=0&&a1!=1) emit_mov(a1,1);
2802 }
2803}
2804
b1be1eee 2805static void mov_loadtype_adj(int type,int rs,int rt)
2806{
2807 switch(type) {
2808 case LOADB_STUB: emit_signextend8(rs,rt); break;
2809 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2810 case LOADH_STUB: emit_signextend16(rs,rt); break;
2811 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2812 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2813 default: assert(0);
2814 }
2815}
2816
2817#ifdef PCSX
2818#include "pcsxmem.h"
2819#include "pcsxmem_inline.c"
2820#endif
2821
57871462 2822do_readstub(int n)
2823{
2824 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2825 literal_pool(256);
2826 set_jump_target(stubs[n][1],(int)out);
2827 int type=stubs[n][0];
2828 int i=stubs[n][3];
2829 int rs=stubs[n][4];
2830 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2831 u_int reglist=stubs[n][7];
2832 signed char *i_regmap=i_regs->regmap;
2833 int addr=get_reg(i_regmap,AGEN1+(i&1));
2834 int rth,rt;
2835 int ds;
b9b61529 2836 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2837 rth=get_reg(i_regmap,FTEMP|64);
2838 rt=get_reg(i_regmap,FTEMP);
2839 }else{
2840 rth=get_reg(i_regmap,rt1[i]|64);
2841 rt=get_reg(i_regmap,rt1[i]);
2842 }
2843 assert(rs>=0);
c6c3b1b3 2844#ifdef PCSX
2845 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2846 reglist|=(1<<rs);
2847 for(r=0;r<=12;r++) {
2848 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2849 temp=r; break;
2850 }
2851 }
db829eeb 2852 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 2853 reglist&=~(1<<rt);
2854 if(temp==-1) {
2855 save_regs(reglist);
2856 regs_saved=1;
2857 temp=(rs==0)?2:0;
2858 }
2859 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2860 temp2=1;
2861 emit_readword((int)&mem_rtab,temp);
2862 emit_shrimm(rs,12,temp2);
2863 emit_readword_dualindexedx4(temp,temp2,temp2);
2864 emit_lsls_imm(temp2,1,temp2);
2865 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2866 switch(type) {
2867 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2868 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2869 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2870 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2871 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2872 }
2873 }
2874 if(regs_saved) {
2875 restore_jump=(int)out;
2876 emit_jcc(0); // jump to reg restore
2877 }
2878 else
2879 emit_jcc(stubs[n][2]); // return address
2880
2881 if(!regs_saved)
2882 save_regs(reglist);
2883 int handler=0;
2884 if(type==LOADB_STUB||type==LOADBU_STUB)
2885 handler=(int)jump_handler_read8;
2886 if(type==LOADH_STUB||type==LOADHU_STUB)
2887 handler=(int)jump_handler_read16;
2888 if(type==LOADW_STUB)
2889 handler=(int)jump_handler_read32;
2890 assert(handler!=0);
b96d3df7 2891 pass_args(rs,temp2);
c6c3b1b3 2892 int cc=get_reg(i_regmap,CCREG);
2893 if(cc<0)
2894 emit_loadreg(CCREG,2);
2573466a 2895 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2896 emit_call(handler);
2897 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2898 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2899 }
2900 if(restore_jump)
2901 set_jump_target(restore_jump,(int)out);
2902 restore_regs(reglist);
2903 emit_jmp(stubs[n][2]); // return address
2904#else // !PCSX
57871462 2905 if(addr<0) addr=rt;
535d208a 2906 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2907 assert(addr>=0);
2908 int ftable=0;
2909 if(type==LOADB_STUB||type==LOADBU_STUB)
2910 ftable=(int)readmemb;
2911 if(type==LOADH_STUB||type==LOADHU_STUB)
2912 ftable=(int)readmemh;
2913 if(type==LOADW_STUB)
2914 ftable=(int)readmem;
24385cae 2915#ifndef FORCE32
57871462 2916 if(type==LOADD_STUB)
2917 ftable=(int)readmemd;
24385cae 2918#endif
2919 assert(ftable!=0);
57871462 2920 emit_writeword(rs,(int)&address);
2921 //emit_pusha();
2922 save_regs(reglist);
97a238a6 2923#ifndef PCSX
57871462 2924 ds=i_regs!=&regs[i];
2925 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2926 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2927 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2928 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2929 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2930#endif
57871462 2931 emit_shrimm(rs,16,1);
2932 int cc=get_reg(i_regmap,CCREG);
2933 if(cc<0) {
2934 emit_loadreg(CCREG,2);
2935 }
2936 emit_movimm(ftable,0);
2937 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2938#ifndef PCSX
57871462 2939 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2940#endif
57871462 2941 //emit_readword((int)&last_count,temp);
2942 //emit_add(cc,temp,cc);
2943 //emit_writeword(cc,(int)&Count);
2944 //emit_mov(15,14);
2945 emit_call((int)&indirect_jump_indexed);
2946 //emit_callreg(rs);
2947 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2948#ifndef PCSX
57871462 2949 // We really shouldn't need to update the count here,
2950 // but not doing so causes random crashes...
2951 emit_readword((int)&Count,HOST_TEMPREG);
2952 emit_readword((int)&next_interupt,2);
2953 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2954 emit_writeword(2,(int)&last_count);
2955 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2956 if(cc<0) {
2957 emit_storereg(CCREG,HOST_TEMPREG);
2958 }
f51dc36c 2959#endif
57871462 2960 //emit_popa();
2961 restore_regs(reglist);
2962 //if((cc=get_reg(regmap,CCREG))>=0) {
2963 // emit_loadreg(CCREG,cc);
2964 //}
f18c0f46 2965 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2966 assert(rt>=0);
2967 if(type==LOADB_STUB)
2968 emit_movsbl((int)&readmem_dword,rt);
2969 if(type==LOADBU_STUB)
2970 emit_movzbl((int)&readmem_dword,rt);
2971 if(type==LOADH_STUB)
2972 emit_movswl((int)&readmem_dword,rt);
2973 if(type==LOADHU_STUB)
2974 emit_movzwl((int)&readmem_dword,rt);
2975 if(type==LOADW_STUB)
2976 emit_readword((int)&readmem_dword,rt);
2977 if(type==LOADD_STUB) {
2978 emit_readword((int)&readmem_dword,rt);
2979 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2980 }
57871462 2981 }
2982 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2983#endif // !PCSX
57871462 2984}
2985
c6c3b1b3 2986#ifdef PCSX
2987// return memhandler, or get directly accessable address and return 0
2988u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2989{
2990 u_int l1,l2=0;
2991 l1=((u_int *)table)[addr>>12];
2992 if((l1&(1<<31))==0) {
2993 u_int v=l1<<1;
2994 *addr_host=v+addr;
2995 return 0;
2996 }
2997 else {
2998 l1<<=1;
2999 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
3000 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 3001 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 3002 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
3003 else
3004 l2=((u_int *)l1)[(addr&0xfff)/4];
3005 if((l2&(1<<31))==0) {
3006 u_int v=l2<<1;
3007 *addr_host=v+(addr&0xfff);
3008 return 0;
3009 }
3010 return l2<<1;
3011 }
3012}
3013#endif
3014
57871462 3015inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3016{
3017 int rs=get_reg(regmap,target);
3018 int rth=get_reg(regmap,target|64);
3019 int rt=get_reg(regmap,target);
535d208a 3020 if(rs<0) rs=get_reg(regmap,-1);
57871462 3021 assert(rs>=0);
c6c3b1b3 3022#ifdef PCSX
b1be1eee 3023 u_int handler,host_addr=0,is_dynamic,far_call=0;
3024 int cc=get_reg(regmap,CCREG);
3025 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3026 return;
c6c3b1b3 3027 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3028 if (handler==0) {
db829eeb 3029 if(rt<0||rt1[i]==0)
c6c3b1b3 3030 return;
13e35c04 3031 if(addr!=host_addr)
3032 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3033 switch(type) {
3034 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3035 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3036 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3037 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3038 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3039 default: assert(0);
3040 }
3041 return;
3042 }
b1be1eee 3043 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3044 if(is_dynamic) {
3045 if(type==LOADB_STUB||type==LOADBU_STUB)
3046 handler=(int)jump_handler_read8;
3047 if(type==LOADH_STUB||type==LOADHU_STUB)
3048 handler=(int)jump_handler_read16;
3049 if(type==LOADW_STUB)
3050 handler=(int)jump_handler_read32;
3051 }
c6c3b1b3 3052
3053 // call a memhandler
db829eeb 3054 if(rt>=0&&rt1[i]!=0)
c6c3b1b3 3055 reglist&=~(1<<rt);
3056 save_regs(reglist);
3057 if(target==0)
3058 emit_movimm(addr,0);
3059 else if(rs!=0)
3060 emit_mov(rs,0);
c6c3b1b3 3061 int offset=(int)handler-(int)out-8;
3062 if(offset<-33554432||offset>=33554432) {
3063 // unreachable memhandler, a plugin func perhaps
b1be1eee 3064 emit_movimm(handler,12);
3065 far_call=1;
3066 }
3067 if(cc<0)
3068 emit_loadreg(CCREG,2);
3069 if(is_dynamic) {
3070 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3071 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3072 }
b1be1eee 3073 else {
3074 emit_readword((int)&last_count,3);
3075 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3076 emit_add(2,3,2);
3077 emit_writeword(2,(int)&Count);
3078 }
3079
3080 if(far_call)
3081 emit_callreg(12);
c6c3b1b3 3082 else
3083 emit_call(handler);
b1be1eee 3084
db829eeb 3085 if(rt>=0&&rt1[i]!=0) {
c6c3b1b3 3086 switch(type) {
3087 case LOADB_STUB: emit_signextend8(0,rt); break;
3088 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3089 case LOADH_STUB: emit_signextend16(0,rt); break;
3090 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3091 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3092 default: assert(0);
3093 }
3094 }
3095 restore_regs(reglist);
3096#else // if !PCSX
57871462 3097 int ftable=0;
3098 if(type==LOADB_STUB||type==LOADBU_STUB)
3099 ftable=(int)readmemb;
3100 if(type==LOADH_STUB||type==LOADHU_STUB)
3101 ftable=(int)readmemh;
3102 if(type==LOADW_STUB)
3103 ftable=(int)readmem;
24385cae 3104#ifndef FORCE32
57871462 3105 if(type==LOADD_STUB)
3106 ftable=(int)readmemd;
24385cae 3107#endif
3108 assert(ftable!=0);
fd99c415 3109 if(target==0)
3110 emit_movimm(addr,rs);
57871462 3111 emit_writeword(rs,(int)&address);
3112 //emit_pusha();
3113 save_regs(reglist);
0c1fe38b 3114#ifndef PCSX
3115 if((signed int)addr>=(signed int)0xC0000000) {
3116 // Theoretically we can have a pagefault here, if the TLB has never
3117 // been enabled and the address is outside the range 80000000..BFFFFFFF
3118 // Write out the registers so the pagefault can be handled. This is
3119 // a very rare case and likely represents a bug.
3120 int ds=regmap!=regs[i].regmap;
3121 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3122 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3123 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3124 }
3125#endif
57871462 3126 //emit_shrimm(rs,16,1);
3127 int cc=get_reg(regmap,CCREG);
3128 if(cc<0) {
3129 emit_loadreg(CCREG,2);
3130 }
3131 //emit_movimm(ftable,0);
3132 emit_movimm(((u_int *)ftable)[addr>>16],0);
3133 //emit_readword((int)&last_count,12);
2573466a 3134 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3135#ifndef PCSX
57871462 3136 if((signed int)addr>=(signed int)0xC0000000) {
3137 // Pagefault address
3138 int ds=regmap!=regs[i].regmap;
3139 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3140 }
f51dc36c 3141#endif
57871462 3142 //emit_add(12,2,2);
3143 //emit_writeword(2,(int)&Count);
3144 //emit_call(((u_int *)ftable)[addr>>16]);
3145 emit_call((int)&indirect_jump);
f51dc36c 3146#ifndef PCSX
57871462 3147 // We really shouldn't need to update the count here,
3148 // but not doing so causes random crashes...
3149 emit_readword((int)&Count,HOST_TEMPREG);
3150 emit_readword((int)&next_interupt,2);
2573466a 3151 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3152 emit_writeword(2,(int)&last_count);
3153 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3154 if(cc<0) {
3155 emit_storereg(CCREG,HOST_TEMPREG);
3156 }
f51dc36c 3157#endif
57871462 3158 //emit_popa();
3159 restore_regs(reglist);
fd99c415 3160 if(rt>=0) {
3161 if(type==LOADB_STUB)
3162 emit_movsbl((int)&readmem_dword,rt);
3163 if(type==LOADBU_STUB)
3164 emit_movzbl((int)&readmem_dword,rt);
3165 if(type==LOADH_STUB)
3166 emit_movswl((int)&readmem_dword,rt);
3167 if(type==LOADHU_STUB)
3168 emit_movzwl((int)&readmem_dword,rt);
3169 if(type==LOADW_STUB)
3170 emit_readword((int)&readmem_dword,rt);
3171 if(type==LOADD_STUB) {
3172 emit_readword((int)&readmem_dword,rt);
3173 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3174 }
57871462 3175 }
c6c3b1b3 3176#endif // !PCSX
57871462 3177}
3178
3179do_writestub(int n)
3180{
3181 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3182 literal_pool(256);
3183 set_jump_target(stubs[n][1],(int)out);
3184 int type=stubs[n][0];
3185 int i=stubs[n][3];
3186 int rs=stubs[n][4];
3187 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3188 u_int reglist=stubs[n][7];
3189 signed char *i_regmap=i_regs->regmap;
3190 int addr=get_reg(i_regmap,AGEN1+(i&1));
3191 int rth,rt,r;
3192 int ds;
b9b61529 3193 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3194 rth=get_reg(i_regmap,FTEMP|64);
3195 rt=get_reg(i_regmap,r=FTEMP);
3196 }else{
3197 rth=get_reg(i_regmap,rs2[i]|64);
3198 rt=get_reg(i_regmap,r=rs2[i]);
3199 }
3200 assert(rs>=0);
3201 assert(rt>=0);
b96d3df7 3202#ifdef PCSX
3203 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3204 int reglist2=reglist|(1<<rs)|(1<<rt);
3205 for(rtmp=0;rtmp<=12;rtmp++) {
3206 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3207 temp=rtmp; break;
3208 }
3209 }
3210 if(temp==-1) {
3211 save_regs(reglist);
3212 regs_saved=1;
3213 for(rtmp=0;rtmp<=3;rtmp++)
3214 if(rtmp!=rs&&rtmp!=rt)
3215 {temp=rtmp;break;}
3216 }
3217 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3218 temp2=3;
3219 emit_readword((int)&mem_wtab,temp);
3220 emit_shrimm(rs,12,temp2);
3221 emit_readword_dualindexedx4(temp,temp2,temp2);
3222 emit_lsls_imm(temp2,1,temp2);
3223 switch(type) {
3224 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3225 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3226 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3227 default: assert(0);
3228 }
3229 if(regs_saved) {
3230 restore_jump=(int)out;
3231 emit_jcc(0); // jump to reg restore
3232 }
3233 else
3234 emit_jcc(stubs[n][2]); // return address (invcode check)
3235
3236 if(!regs_saved)
3237 save_regs(reglist);
3238 int handler=0;
3239 switch(type) {
3240 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3241 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3242 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3243 }
3244 assert(handler!=0);
3245 pass_args(rs,rt);
3246 if(temp2!=3)
3247 emit_mov(temp2,3);
3248 int cc=get_reg(i_regmap,CCREG);
3249 if(cc<0)
3250 emit_loadreg(CCREG,2);
2573466a 3251 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3252 // returns new cycle_count
3253 emit_call(handler);
2573466a 3254 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3255 if(cc<0)
3256 emit_storereg(CCREG,2);
3257 if(restore_jump)
3258 set_jump_target(restore_jump,(int)out);
3259 restore_regs(reglist);
3260 ra=stubs[n][2];
b96d3df7 3261 emit_jmp(ra);
3262#else // if !PCSX
57871462 3263 if(addr<0) addr=get_reg(i_regmap,-1);
3264 assert(addr>=0);
3265 int ftable=0;
3266 if(type==STOREB_STUB)
3267 ftable=(int)writememb;
3268 if(type==STOREH_STUB)
3269 ftable=(int)writememh;
3270 if(type==STOREW_STUB)
3271 ftable=(int)writemem;
24385cae 3272#ifndef FORCE32
57871462 3273 if(type==STORED_STUB)
3274 ftable=(int)writememd;
24385cae 3275#endif
3276 assert(ftable!=0);
57871462 3277 emit_writeword(rs,(int)&address);
3278 //emit_shrimm(rs,16,rs);
3279 //emit_movmem_indexedx4(ftable,rs,rs);
3280 if(type==STOREB_STUB)
3281 emit_writebyte(rt,(int)&byte);
3282 if(type==STOREH_STUB)
3283 emit_writehword(rt,(int)&hword);
3284 if(type==STOREW_STUB)
3285 emit_writeword(rt,(int)&word);
3286 if(type==STORED_STUB) {
3d624f89 3287#ifndef FORCE32
57871462 3288 emit_writeword(rt,(int)&dword);
3289 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3290#else
3291 printf("STORED_STUB\n");
3292#endif
57871462 3293 }
3294 //emit_pusha();
3295 save_regs(reglist);
97a238a6 3296#ifndef PCSX
57871462 3297 ds=i_regs!=&regs[i];
3298 int real_rs=get_reg(i_regmap,rs1[i]);
3299 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3300 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3301 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3302 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3303#endif
57871462 3304 emit_shrimm(rs,16,1);
3305 int cc=get_reg(i_regmap,CCREG);
3306 if(cc<0) {
3307 emit_loadreg(CCREG,2);
3308 }
3309 emit_movimm(ftable,0);
3310 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3311#ifndef PCSX
57871462 3312 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3313#endif
57871462 3314 //emit_readword((int)&last_count,temp);
3315 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3316 //emit_add(cc,temp,cc);
3317 //emit_writeword(cc,(int)&Count);
3318 emit_call((int)&indirect_jump_indexed);
3319 //emit_callreg(rs);
3320 emit_readword((int)&Count,HOST_TEMPREG);
3321 emit_readword((int)&next_interupt,2);
3322 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3323 emit_writeword(2,(int)&last_count);
3324 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3325 if(cc<0) {
3326 emit_storereg(CCREG,HOST_TEMPREG);
3327 }
3328 //emit_popa();
3329 restore_regs(reglist);
3330 //if((cc=get_reg(regmap,CCREG))>=0) {
3331 // emit_loadreg(CCREG,cc);
3332 //}
3333 emit_jmp(stubs[n][2]); // return address
b96d3df7 3334#endif // !PCSX
57871462 3335}
3336
3337inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3338{
3339 int rs=get_reg(regmap,-1);
3340 int rth=get_reg(regmap,target|64);
3341 int rt=get_reg(regmap,target);
3342 assert(rs>=0);
3343 assert(rt>=0);
cbbab9cd 3344#ifdef PCSX
b96d3df7 3345 u_int handler,host_addr=0;
b96d3df7 3346 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3347 if (handler==0) {
13e35c04 3348 if(addr!=host_addr)
3349 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3350 switch(type) {
3351 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3352 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3353 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3354 default: assert(0);
3355 }
3356 return;
3357 }
3358
3359 // call a memhandler
3360 save_regs(reglist);
13e35c04 3361 pass_args(rs,rt);
b96d3df7 3362 int cc=get_reg(regmap,CCREG);
3363 if(cc<0)
3364 emit_loadreg(CCREG,2);
2573466a 3365 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3366 emit_movimm(handler,3);
3367 // returns new cycle_count
3368 emit_call((int)jump_handler_write_h);
2573466a 3369 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3370 if(cc<0)
3371 emit_storereg(CCREG,2);
3372 restore_regs(reglist);
3373#else // if !pcsx
57871462 3374 int ftable=0;
3375 if(type==STOREB_STUB)
3376 ftable=(int)writememb;
3377 if(type==STOREH_STUB)
3378 ftable=(int)writememh;
3379 if(type==STOREW_STUB)
3380 ftable=(int)writemem;
24385cae 3381#ifndef FORCE32
57871462 3382 if(type==STORED_STUB)
3383 ftable=(int)writememd;
24385cae 3384#endif
3385 assert(ftable!=0);
57871462 3386 emit_writeword(rs,(int)&address);
3387 //emit_shrimm(rs,16,rs);
3388 //emit_movmem_indexedx4(ftable,rs,rs);
3389 if(type==STOREB_STUB)
3390 emit_writebyte(rt,(int)&byte);
3391 if(type==STOREH_STUB)
3392 emit_writehword(rt,(int)&hword);
3393 if(type==STOREW_STUB)
3394 emit_writeword(rt,(int)&word);
3395 if(type==STORED_STUB) {
3d624f89 3396#ifndef FORCE32
57871462 3397 emit_writeword(rt,(int)&dword);
3398 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3399#else
3400 printf("STORED_STUB\n");
3401#endif
57871462 3402 }
3403 //emit_pusha();
3404 save_regs(reglist);
0c1fe38b 3405#ifndef PCSX
3406 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3407 if((signed int)addr>=(signed int)0xC0000000) {
3408 // Theoretically we can have a pagefault here, if the TLB has never
3409 // been enabled and the address is outside the range 80000000..BFFFFFFF
3410 // Write out the registers so the pagefault can be handled. This is
3411 // a very rare case and likely represents a bug.
3412 int ds=regmap!=regs[i].regmap;
3413 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3414 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3415 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3416 }
3417#endif
57871462 3418 //emit_shrimm(rs,16,1);
3419 int cc=get_reg(regmap,CCREG);
3420 if(cc<0) {
3421 emit_loadreg(CCREG,2);
3422 }
3423 //emit_movimm(ftable,0);
3424 emit_movimm(((u_int *)ftable)[addr>>16],0);
3425 //emit_readword((int)&last_count,12);
2573466a 3426 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3427#ifndef PCSX
57871462 3428 if((signed int)addr>=(signed int)0xC0000000) {
3429 // Pagefault address
3430 int ds=regmap!=regs[i].regmap;
3431 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3432 }
f51dc36c 3433#endif
57871462 3434 //emit_add(12,2,2);
3435 //emit_writeword(2,(int)&Count);
3436 //emit_call(((u_int *)ftable)[addr>>16]);
3437 emit_call((int)&indirect_jump);
3438 emit_readword((int)&Count,HOST_TEMPREG);
3439 emit_readword((int)&next_interupt,2);
2573466a 3440 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3441 emit_writeword(2,(int)&last_count);
3442 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3443 if(cc<0) {
3444 emit_storereg(CCREG,HOST_TEMPREG);
3445 }
3446 //emit_popa();
3447 restore_regs(reglist);
b96d3df7 3448#endif
57871462 3449}
3450
3451do_unalignedwritestub(int n)
3452{
b7918751 3453 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3454 literal_pool(256);
57871462 3455 set_jump_target(stubs[n][1],(int)out);
b7918751 3456
3457 int i=stubs[n][3];
3458 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3459 int addr=stubs[n][5];
3460 u_int reglist=stubs[n][7];
3461 signed char *i_regmap=i_regs->regmap;
3462 int temp2=get_reg(i_regmap,FTEMP);
3463 int rt;
3464 int ds, real_rs;
3465 rt=get_reg(i_regmap,rs2[i]);
3466 assert(rt>=0);
3467 assert(addr>=0);
3468 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3469 reglist|=(1<<addr);
3470 reglist&=~(1<<temp2);
3471
b96d3df7 3472#if 1
3473 // don't bother with it and call write handler
3474 save_regs(reglist);
3475 pass_args(addr,rt);
3476 int cc=get_reg(i_regmap,CCREG);
3477 if(cc<0)
3478 emit_loadreg(CCREG,2);
2573466a 3479 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3480 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3481 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3482 if(cc<0)
3483 emit_storereg(CCREG,2);
3484 restore_regs(reglist);
3485 emit_jmp(stubs[n][2]); // return address
3486#else
b7918751 3487 emit_andimm(addr,0xfffffffc,temp2);
3488 emit_writeword(temp2,(int)&address);
3489
3490 save_regs(reglist);
97a238a6 3491#ifndef PCSX
b7918751 3492 ds=i_regs!=&regs[i];
3493 real_rs=get_reg(i_regmap,rs1[i]);
3494 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3495 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3496 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3497 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3498#endif
b7918751 3499 emit_shrimm(addr,16,1);
3500 int cc=get_reg(i_regmap,CCREG);
3501 if(cc<0) {
3502 emit_loadreg(CCREG,2);
3503 }
3504 emit_movimm((u_int)readmem,0);
3505 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3506#ifndef PCSX
3507 // pagefault address
3508 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3509#endif
b7918751 3510 emit_call((int)&indirect_jump_indexed);
3511 restore_regs(reglist);
3512
3513 emit_readword((int)&readmem_dword,temp2);
3514 int temp=addr; //hmh
3515 emit_shlimm(addr,3,temp);
3516 emit_andimm(temp,24,temp);
3517#ifdef BIG_ENDIAN_MIPS
3518 if (opcode[i]==0x2e) // SWR
3519#else
3520 if (opcode[i]==0x2a) // SWL
3521#endif
3522 emit_xorimm(temp,24,temp);
3523 emit_movimm(-1,HOST_TEMPREG);
55439448 3524 if (opcode[i]==0x2a) { // SWL
b7918751 3525 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3526 emit_orrshr(rt,temp,temp2);
3527 }else{
3528 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3529 emit_orrshl(rt,temp,temp2);
3530 }
3531 emit_readword((int)&address,addr);
3532 emit_writeword(temp2,(int)&word);
3533 //save_regs(reglist); // don't need to, no state changes
3534 emit_shrimm(addr,16,1);
3535 emit_movimm((u_int)writemem,0);
3536 //emit_call((int)&indirect_jump_indexed);
3537 emit_mov(15,14);
3538 emit_readword_dualindexedx4(0,1,15);
3539 emit_readword((int)&Count,HOST_TEMPREG);
3540 emit_readword((int)&next_interupt,2);
3541 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3542 emit_writeword(2,(int)&last_count);
3543 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3544 if(cc<0) {
3545 emit_storereg(CCREG,HOST_TEMPREG);
3546 }
3547 restore_regs(reglist);
57871462 3548 emit_jmp(stubs[n][2]); // return address
b96d3df7 3549#endif
57871462 3550}
3551
3552void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3553{
3554 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3555}
3556
3557do_invstub(int n)
3558{
3559 literal_pool(20);
3560 u_int reglist=stubs[n][3];
3561 set_jump_target(stubs[n][1],(int)out);
3562 save_regs(reglist);
3563 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3564 emit_call((int)&invalidate_addr);
3565 restore_regs(reglist);
3566 emit_jmp(stubs[n][2]); // return address
3567}
3568
3569int do_dirty_stub(int i)
3570{
3571 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3572 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3573 #ifdef PCSX
3574 addr=(u_int)source;
3575 #endif
57871462 3576 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3577 #ifndef HAVE_ARMV7
ac545b3a 3578 emit_loadlp(addr,1);
57871462 3579 emit_loadlp((int)copy,2);
3580 emit_loadlp(slen*4,3);
3581 #else
ac545b3a 3582 emit_movw(addr&0x0000FFFF,1);
57871462 3583 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3584 emit_movt(addr&0xFFFF0000,1);
57871462 3585 emit_movt(((u_int)copy)&0xFFFF0000,2);
3586 emit_movw(slen*4,3);
3587 #endif
3588 emit_movimm(start+i*4,0);
3589 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3590 int entry=(int)out;
3591 load_regs_entry(i);
3592 if(entry==(int)out) entry=instr_addr[i];
3593 emit_jmp(instr_addr[i]);
3594 return entry;
3595}
3596
3597void do_dirty_stub_ds()
3598{
3599 // Careful about the code output here, verify_dirty needs to parse it.
665f33e1 3600 #ifndef HAVE_ARMV7
57871462 3601 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3602 emit_loadlp((int)copy,2);
3603 emit_loadlp(slen*4,3);
3604 #else
3605 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3606 emit_movw(((u_int)copy)&0x0000FFFF,2);
3607 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3608 emit_movt(((u_int)copy)&0xFFFF0000,2);
3609 emit_movw(slen*4,3);
3610 #endif
3611 emit_movimm(start+1,0);
3612 emit_call((int)&verify_code_ds);
3613}
3614
3615do_cop1stub(int n)
3616{
3617 literal_pool(256);
3618 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3619 set_jump_target(stubs[n][1],(int)out);
3620 int i=stubs[n][3];
3d624f89 3621// int rs=stubs[n][4];
57871462 3622 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3623 int ds=stubs[n][6];
3624 if(!ds) {
3625 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3626 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3627 }
3628 //else {printf("fp exception in delay slot\n");}
3629 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3630 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3631 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3632 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3633 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3634}
3635
63cb0298 3636#ifndef DISABLE_TLB
3637
57871462 3638/* TLB */
3639
3640int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3641{
3642 if(c) {
3643 if((signed int)addr>=(signed int)0xC0000000) {
3644 // address_generation already loaded the const
3645 emit_readword_dualindexedx4(FP,map,map);
3646 }
3647 else
3648 return -1; // No mapping
3649 }
3650 else {
3651 assert(s!=map);
3652 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3653 emit_addsr12(map,s,map);
3654 // Schedule this while we wait on the load
3655 //if(x) emit_xorimm(s,x,ar);
3656 if(shift>=0) emit_shlimm(s,3,shift);
3657 if(~a) emit_andimm(s,a,ar);
3658 emit_readword_dualindexedx4(FP,map,map);
3659 }
3660 return map;
3661}
3662int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3663{
3664 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3665 emit_test(map,map);
3666 *jaddr=(int)out;
3667 emit_js(0);
3668 }
3669 return map;
3670}
3671
3672int gen_tlb_addr_r(int ar, int map) {
3673 if(map>=0) {
3674 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3675 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3676 }
3677}
3678
3679int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3680{
3681 if(c) {
3682 if(addr<0x80800000||addr>=0xC0000000) {
3683 // address_generation already loaded the const
3684 emit_readword_dualindexedx4(FP,map,map);
3685 }
3686 else
3687 return -1; // No mapping
3688 }
3689 else {
3690 assert(s!=map);
3691 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3692 emit_addsr12(map,s,map);
3693 // Schedule this while we wait on the load
3694 //if(x) emit_xorimm(s,x,ar);
3695 emit_readword_dualindexedx4(FP,map,map);
3696 }
3697 return map;
3698}
3699int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3700{
3701 if(!c||addr<0x80800000||addr>=0xC0000000) {
3702 emit_testimm(map,0x40000000);
3703 *jaddr=(int)out;
3704 emit_jne(0);
3705 }
3706}
3707
3708int gen_tlb_addr_w(int ar, int map) {
3709 if(map>=0) {
3710 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3711 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3712 }
3713}
3714
3715// Generate the address of the memory_map entry, relative to dynarec_local
3716generate_map_const(u_int addr,int reg) {
3717 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3718 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3719}
3720
63cb0298 3721#else
3722
3723static int do_tlb_r() { return 0; }
3724static int do_tlb_r_branch() { return 0; }
3725static int gen_tlb_addr_r() { return 0; }
3726static int do_tlb_w() { return 0; }
3727static int do_tlb_w_branch() { return 0; }
3728static int gen_tlb_addr_w() { return 0; }
3729
3730#endif // DISABLE_TLB
3731
57871462 3732/* Special assem */
3733
3734void shift_assemble_arm(int i,struct regstat *i_regs)
3735{
3736 if(rt1[i]) {
3737 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3738 {
3739 signed char s,t,shift;
3740 t=get_reg(i_regs->regmap,rt1[i]);
3741 s=get_reg(i_regs->regmap,rs1[i]);
3742 shift=get_reg(i_regs->regmap,rs2[i]);
3743 if(t>=0){
3744 if(rs1[i]==0)
3745 {
3746 emit_zeroreg(t);
3747 }
3748 else if(rs2[i]==0)
3749 {
3750 assert(s>=0);
3751 if(s!=t) emit_mov(s,t);
3752 }
3753 else
3754 {
3755 emit_andimm(shift,31,HOST_TEMPREG);
3756 if(opcode2[i]==4) // SLLV
3757 {
3758 emit_shl(s,HOST_TEMPREG,t);
3759 }
3760 if(opcode2[i]==6) // SRLV
3761 {
3762 emit_shr(s,HOST_TEMPREG,t);
3763 }
3764 if(opcode2[i]==7) // SRAV
3765 {
3766 emit_sar(s,HOST_TEMPREG,t);
3767 }
3768 }
3769 }
3770 } else { // DSLLV/DSRLV/DSRAV
3771 signed char sh,sl,th,tl,shift;
3772 th=get_reg(i_regs->regmap,rt1[i]|64);
3773 tl=get_reg(i_regs->regmap,rt1[i]);
3774 sh=get_reg(i_regs->regmap,rs1[i]|64);
3775 sl=get_reg(i_regs->regmap,rs1[i]);
3776 shift=get_reg(i_regs->regmap,rs2[i]);
3777 if(tl>=0){
3778 if(rs1[i]==0)
3779 {
3780 emit_zeroreg(tl);
3781 if(th>=0) emit_zeroreg(th);
3782 }
3783 else if(rs2[i]==0)
3784 {
3785 assert(sl>=0);
3786 if(sl!=tl) emit_mov(sl,tl);
3787 if(th>=0&&sh!=th) emit_mov(sh,th);
3788 }
3789 else
3790 {
3791 // FIXME: What if shift==tl ?
3792 assert(shift!=tl);
3793 int temp=get_reg(i_regs->regmap,-1);
3794 int real_th=th;
3795 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3796 assert(sl>=0);
3797 assert(sh>=0);
3798 emit_andimm(shift,31,HOST_TEMPREG);
3799 if(opcode2[i]==0x14) // DSLLV
3800 {
3801 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3802 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3803 emit_orrshr(sl,HOST_TEMPREG,th);
3804 emit_andimm(shift,31,HOST_TEMPREG);
3805 emit_testimm(shift,32);
3806 emit_shl(sl,HOST_TEMPREG,tl);
3807 if(th>=0) emit_cmovne_reg(tl,th);
3808 emit_cmovne_imm(0,tl);
3809 }
3810 if(opcode2[i]==0x16) // DSRLV
3811 {
3812 assert(th>=0);
3813 emit_shr(sl,HOST_TEMPREG,tl);
3814 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3815 emit_orrshl(sh,HOST_TEMPREG,tl);
3816 emit_andimm(shift,31,HOST_TEMPREG);
3817 emit_testimm(shift,32);
3818 emit_shr(sh,HOST_TEMPREG,th);
3819 emit_cmovne_reg(th,tl);
3820 if(real_th>=0) emit_cmovne_imm(0,th);
3821 }
3822 if(opcode2[i]==0x17) // DSRAV
3823 {
3824 assert(th>=0);
3825 emit_shr(sl,HOST_TEMPREG,tl);
3826 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3827 if(real_th>=0) {
3828 assert(temp>=0);
3829 emit_sarimm(th,31,temp);
3830 }
3831 emit_orrshl(sh,HOST_TEMPREG,tl);
3832 emit_andimm(shift,31,HOST_TEMPREG);
3833 emit_testimm(shift,32);
3834 emit_sar(sh,HOST_TEMPREG,th);
3835 emit_cmovne_reg(th,tl);
3836 if(real_th>=0) emit_cmovne_reg(temp,th);
3837 }
3838 }
3839 }
3840 }
3841 }
3842}
ffb0b9e0 3843
3844#ifdef PCSX
3845static void speculate_mov(int rs,int rt)
3846{
3847 if(rt!=0) {
3848 smrv_strong_next|=1<<rt;
3849 smrv[rt]=smrv[rs];
3850 }
3851}
3852
3853static void speculate_mov_weak(int rs,int rt)
3854{
3855 if(rt!=0) {
3856 smrv_weak_next|=1<<rt;
3857 smrv[rt]=smrv[rs];
3858 }
3859}
3860
3861static void speculate_register_values(int i)
3862{
3863 if(i==0) {
3864 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3865 // gp,sp are likely to stay the same throughout the block
3866 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3867 smrv_weak_next=~smrv_strong_next;
3868 //printf(" llr %08x\n", smrv[4]);
3869 }
3870 smrv_strong=smrv_strong_next;
3871 smrv_weak=smrv_weak_next;
3872 switch(itype[i]) {
3873 case ALU:
3874 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3875 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3876 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3877 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3878 else {
3879 smrv_strong_next&=~(1<<rt1[i]);
3880 smrv_weak_next&=~(1<<rt1[i]);
3881 }
3882 break;
3883 case SHIFTIMM:
3884 smrv_strong_next&=~(1<<rt1[i]);
3885 smrv_weak_next&=~(1<<rt1[i]);
3886 // fallthrough
3887 case IMM16:
3888 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3889 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3890 if(hr>=0) {
3891 if(get_final_value(hr,i,&value))
3892 smrv[rt1[i]]=value;
3893 else smrv[rt1[i]]=constmap[i][hr];
3894 smrv_strong_next|=1<<rt1[i];
3895 }
3896 }
3897 else {
3898 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3899 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3900 }
3901 break;
3902 case LOAD:
3903 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3904 // special case for BIOS
3905 smrv[rt1[i]]=0xa0000000;
3906 smrv_strong_next|=1<<rt1[i];
3907 break;
3908 }
3909 // fallthrough
3910 case SHIFT:
3911 case LOADLR:
3912 case MOV:
3913 smrv_strong_next&=~(1<<rt1[i]);
3914 smrv_weak_next&=~(1<<rt1[i]);
3915 break;
3916 case COP0:
3917 case COP2:
3918 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3919 smrv_strong_next&=~(1<<rt1[i]);
3920 smrv_weak_next&=~(1<<rt1[i]);
3921 }
3922 break;
3923 case C2LS:
3924 if (opcode[i]==0x32) { // LWC2
3925 smrv_strong_next&=~(1<<rt1[i]);
3926 smrv_weak_next&=~(1<<rt1[i]);
3927 }
3928 break;
3929 }
3930#if 0
3931 int r=4;
3932 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3933 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3934#endif
3935}
3936
3937enum {
3938 MTYPE_8000 = 0,
3939 MTYPE_8020,
3940 MTYPE_0000,
3941 MTYPE_A000,
3942 MTYPE_1F80,
3943};
3944
3945static int get_ptr_mem_type(u_int a)
3946{
3947 if(a < 0x00200000) {
3948 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3949 // return wrong, must use memhandler for BIOS self-test to pass
3950 // 007 does similar stuff from a00 mirror, weird stuff
3951 return MTYPE_8000;
3952 return MTYPE_0000;
3953 }
3954 if(0x1f800000 <= a && a < 0x1f801000)
3955 return MTYPE_1F80;
3956 if(0x80200000 <= a && a < 0x80800000)
3957 return MTYPE_8020;
3958 if(0xa0000000 <= a && a < 0xa0200000)
3959 return MTYPE_A000;
3960 return MTYPE_8000;
3961}
3962#endif
3963
3964static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3965{
3966 int jaddr,type=0;
3967
3968#ifdef PCSX
3969 int mr=rs1[i];
3970 if(((smrv_strong|smrv_weak)>>mr)&1) {
3971 type=get_ptr_mem_type(smrv[mr]);
3972 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3973 }
3974 else {
3975 // use the mirror we are running on
3976 type=get_ptr_mem_type(start);
3977 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3978 }
3979
3980 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3981 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3982 addr=*addr_reg_override=HOST_TEMPREG;
3983 type=0;
3984 }
3985 else if(type==MTYPE_0000) { // RAM 0 mirror
3986 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3987 addr=*addr_reg_override=HOST_TEMPREG;
3988 type=0;
3989 }
3990 else if(type==MTYPE_A000) { // RAM A mirror
3991 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3992 addr=*addr_reg_override=HOST_TEMPREG;
3993 type=0;
3994 }
3995 else if(type==MTYPE_1F80) { // scratchpad
3996 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3997 emit_cmpimm(HOST_TEMPREG,0x1000);
3998 jaddr=(int)out;
3999 emit_jc(0);
4000 }
4001#endif
4002
4003 if(type==0)
4004 {
4005 emit_cmpimm(addr,RAM_SIZE);
4006 jaddr=(int)out;
4007 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
4008 // Hint to branch predictor that the branch is unlikely to be taken
4009 if(rs1[i]>=28)
4010 emit_jno_unlikely(0);
4011 else
4012 #endif
4013 emit_jno(0);
a327ad27 4014 if(ram_offset!=0) {
4015 emit_addimm(addr,ram_offset,HOST_TEMPREG);
4016 addr=*addr_reg_override=HOST_TEMPREG;
4017 }
ffb0b9e0 4018 }
4019
4020 return jaddr;
4021}
4022
57871462 4023#define shift_assemble shift_assemble_arm
4024
4025void loadlr_assemble_arm(int i,struct regstat *i_regs)
4026{
4027 int s,th,tl,temp,temp2,addr,map=-1;
4028 int offset;
4029 int jaddr=0;
af4ee1fe 4030 int memtarget=0,c=0;
ffb0b9e0 4031 int fastload_reg_override=0;
57871462 4032 u_int hr,reglist=0;
4033 th=get_reg(i_regs->regmap,rt1[i]|64);
4034 tl=get_reg(i_regs->regmap,rt1[i]);
4035 s=get_reg(i_regs->regmap,rs1[i]);
4036 temp=get_reg(i_regs->regmap,-1);
4037 temp2=get_reg(i_regs->regmap,FTEMP);
4038 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4039 assert(addr<0);
4040 offset=imm[i];
4041 for(hr=0;hr<HOST_REGS;hr++) {
4042 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4043 }
4044 reglist|=1<<temp;
4045 if(offset||s<0||c) addr=temp2;
4046 else addr=s;
4047 if(s>=0) {
4048 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4049 if(c) {
4050 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4051 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4052 }
57871462 4053 }
535d208a 4054 if(!using_tlb) {
4055 if(!c) {
4056 #ifdef RAM_OFFSET
4057 map=get_reg(i_regs->regmap,ROREG);
4058 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4059 #endif
4060 emit_shlimm(addr,3,temp);
4061 if (opcode[i]==0x22||opcode[i]==0x26) {
4062 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4063 }else{
535d208a 4064 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4065 }
ffb0b9e0 4066 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4067 }
4068 else {
a327ad27 4069 if(ram_offset&&memtarget) {
4070 emit_addimm(temp2,ram_offset,HOST_TEMPREG);
4071 fastload_reg_override=HOST_TEMPREG;
4072 }
535d208a 4073 if (opcode[i]==0x22||opcode[i]==0x26) {
4074 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4075 }else{
4076 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4077 }
57871462 4078 }
535d208a 4079 }else{ // using tlb
4080 int a;
4081 if(c) {
4082 a=-1;
4083 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4084 a=0xFFFFFFFC; // LWL/LWR
4085 }else{
4086 a=0xFFFFFFF8; // LDL/LDR
4087 }
4088 map=get_reg(i_regs->regmap,TLREG);
4089 assert(map>=0);
ea3d2e6e 4090 reglist&=~(1<<map);
535d208a 4091 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4092 if(c) {
4093 if (opcode[i]==0x22||opcode[i]==0x26) {
4094 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4095 }else{
4096 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4097 }
535d208a 4098 }
4099 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4100 }
4101 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4102 if(!c||memtarget) {
ffb0b9e0 4103 int a=temp2;
4104 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4105 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4106 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4107 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4108 }
4109 else
4110 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4111 if(rt1[i]) {
4112 assert(tl>=0);
57871462 4113 emit_andimm(temp,24,temp);
2002a1db 4114#ifdef BIG_ENDIAN_MIPS
4115 if (opcode[i]==0x26) // LWR
4116#else
4117 if (opcode[i]==0x22) // LWL
4118#endif
4119 emit_xorimm(temp,24,temp);
57871462 4120 emit_movimm(-1,HOST_TEMPREG);
4121 if (opcode[i]==0x26) {
4122 emit_shr(temp2,temp,temp2);
4123 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4124 }else{
4125 emit_shl(temp2,temp,temp2);
4126 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4127 }
4128 emit_or(temp2,tl,tl);
57871462 4129 }
535d208a 4130 //emit_storereg(rt1[i],tl); // DEBUG
4131 }
4132 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4133 // FIXME: little endian, fastload_reg_override
535d208a 4134 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4135 if(!c||memtarget) {
4136 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4137 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4138 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4139 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4140 }
4141 else
4142 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4143 if(rt1[i]) {
4144 assert(th>=0);
4145 assert(tl>=0);
57871462 4146 emit_testimm(temp,32);
4147 emit_andimm(temp,24,temp);
4148 if (opcode[i]==0x1A) { // LDL
4149 emit_rsbimm(temp,32,HOST_TEMPREG);
4150 emit_shl(temp2h,temp,temp2h);
4151 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4152 emit_movimm(-1,HOST_TEMPREG);
4153 emit_shl(temp2,temp,temp2);
4154 emit_cmove_reg(temp2h,th);
4155 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4156 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4157 emit_orreq(temp2,tl,tl);
4158 emit_orrne(temp2,th,th);
4159 }
4160 if (opcode[i]==0x1B) { // LDR
4161 emit_xorimm(temp,24,temp);
4162 emit_rsbimm(temp,32,HOST_TEMPREG);
4163 emit_shr(temp2,temp,temp2);
4164 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4165 emit_movimm(-1,HOST_TEMPREG);
4166 emit_shr(temp2h,temp,temp2h);
4167 emit_cmovne_reg(temp2,tl);
4168 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4169 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4170 emit_orrne(temp2h,th,th);
4171 emit_orreq(temp2h,tl,tl);
4172 }
4173 }
4174 }
4175}
4176#define loadlr_assemble loadlr_assemble_arm
4177
4178void cop0_assemble(int i,struct regstat *i_regs)
4179{
4180 if(opcode2[i]==0) // MFC0
4181 {
4182 signed char t=get_reg(i_regs->regmap,rt1[i]);
4183 char copr=(source[i]>>11)&0x1f;
4184 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4185 if(t>=0&&rt1[i]!=0) {
7139f3c8 4186#ifdef MUPEN64
57871462 4187 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4188 emit_movimm((source[i]>>11)&0x1f,1);
4189 emit_writeword(0,(int)&PC);
4190 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4191 if(copr==9) {
4192 emit_readword((int)&last_count,ECX);
4193 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4194 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4195 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4196 emit_writeword(HOST_CCREG,(int)&Count);
4197 }
4198 emit_call((int)MFC0);
4199 emit_readword((int)&readmem_dword,t);
7139f3c8 4200#else
4201 emit_readword((int)&reg_cop0+copr*4,t);
4202#endif
57871462 4203 }
4204 }
4205 else if(opcode2[i]==4) // MTC0
4206 {
4207 signed char s=get_reg(i_regs->regmap,rs1[i]);
4208 char copr=(source[i]>>11)&0x1f;
4209 assert(s>=0);
63cb0298 4210#ifdef MUPEN64
57871462 4211 emit_writeword(s,(int)&readmem_dword);
4212 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4213 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4214 emit_movimm((source[i]>>11)&0x1f,1);
4215 emit_writeword(0,(int)&PC);
4216 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4217#else
4218 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4219#endif
4220 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4221 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4222 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4223 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4224 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4225 emit_writeword(HOST_CCREG,(int)&Count);
4226 }
4227 // What a mess. The status register (12) can enable interrupts,
4228 // so needs a special case to handle a pending interrupt.
4229 // The interrupt must be taken immediately, because a subsequent
4230 // instruction might disable interrupts again.
7139f3c8 4231 if(copr==12||copr==13) {
fca1aef2 4232#ifdef PCSX
4233 if (is_delayslot) {
4234 // burn cycles to cause cc_interrupt, which will
4235 // reschedule next_interupt. Relies on CCREG from above.
4236 assem_debug("MTC0 DS %d\n", copr);
4237 emit_writeword(HOST_CCREG,(int)&last_count);
4238 emit_movimm(0,HOST_CCREG);
4239 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4240 emit_loadreg(rs1[i],1);
fca1aef2 4241 emit_movimm(copr,0);
4242 emit_call((int)pcsx_mtc0_ds);
042c7287 4243 emit_loadreg(rs1[i],s);
fca1aef2 4244 return;
4245 }
4246#endif
63cb0298 4247 emit_movimm(start+i*4+4,HOST_TEMPREG);
4248 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4249 emit_movimm(0,HOST_TEMPREG);
4250 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4251 }
4252 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4253 //else
fca1aef2 4254#ifdef PCSX
caeefe31 4255 if(s==HOST_CCREG)
4256 emit_loadreg(rs1[i],1);
4257 else if(s!=1)
63cb0298 4258 emit_mov(s,1);
fca1aef2 4259 emit_movimm(copr,0);
4260 emit_call((int)pcsx_mtc0);
4261#else
57871462 4262 emit_call((int)MTC0);
fca1aef2 4263#endif
7139f3c8 4264 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4265 emit_readword((int)&Count,HOST_CCREG);
042c7287 4266 emit_readword((int)&next_interupt,HOST_TEMPREG);
2573466a 4267 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
042c7287 4268 emit_sub(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
4269 emit_writeword(HOST_TEMPREG,(int)&last_count);
57871462 4270 emit_storereg(CCREG,HOST_CCREG);
4271 }
7139f3c8 4272 if(copr==12||copr==13) {
57871462 4273 assert(!is_delayslot);
4274 emit_readword((int)&pending_exception,14);
042c7287 4275 emit_test(14,14);
4276 emit_jne((int)&do_interrupt);
57871462 4277 }
4278 emit_loadreg(rs1[i],s);
4279 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4280 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
57871462 4281 cop1_usable=0;
4282 }
4283 else
4284 {
4285 assert(opcode2[i]==0x10);
3d624f89 4286#ifndef DISABLE_TLB
57871462 4287 if((source[i]&0x3f)==0x01) // TLBR
4288 emit_call((int)TLBR);
4289 if((source[i]&0x3f)==0x02) // TLBWI
4290 emit_call((int)TLBWI_new);
4291 if((source[i]&0x3f)==0x06) { // TLBWR
4292 // The TLB entry written by TLBWR is dependent on the count,
4293 // so update the cycle count
4294 emit_readword((int)&last_count,ECX);
4295 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4296 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4297 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4298 emit_writeword(HOST_CCREG,(int)&Count);
4299 emit_call((int)TLBWR_new);
4300 }
4301 if((source[i]&0x3f)==0x08) // TLBP
4302 emit_call((int)TLBP);
3d624f89 4303#endif
576bbd8f 4304#ifdef PCSX
4305 if((source[i]&0x3f)==0x10) // RFE
4306 {
4307 emit_readword((int)&Status,0);
4308 emit_andimm(0,0x3c,1);
4309 emit_andimm(0,~0xf,0);
4310 emit_orrshr_imm(1,2,0);
4311 emit_writeword(0,(int)&Status);
4312 }
4313#else
57871462 4314 if((source[i]&0x3f)==0x18) // ERET
4315 {
4316 int count=ccadj[i];
4317 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4318 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4319 emit_jmp((int)jump_eret);
4320 }
576bbd8f 4321#endif
57871462 4322 }
4323}
4324
b9b61529 4325static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4326{
4327 switch (copr) {
4328 case 1:
4329 case 3:
4330 case 5:
4331 case 8:
4332 case 9:
4333 case 10:
4334 case 11:
4335 emit_readword((int)&reg_cop2d[copr],tl);
4336 emit_signextend16(tl,tl);
4337 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4338 break;
4339 case 7:
4340 case 16:
4341 case 17:
4342 case 18:
4343 case 19:
4344 emit_readword((int)&reg_cop2d[copr],tl);
4345 emit_andimm(tl,0xffff,tl);
4346 emit_writeword(tl,(int)&reg_cop2d[copr]);
4347 break;
4348 case 15:
4349 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4350 emit_writeword(tl,(int)&reg_cop2d[copr]);
4351 break;
4352 case 28:
b9b61529 4353 case 29:
4354 emit_readword((int)&reg_cop2d[9],temp);
4355 emit_testimm(temp,0x8000); // do we need this?
4356 emit_andimm(temp,0xf80,temp);
4357 emit_andne_imm(temp,0,temp);
f70d384d 4358 emit_shrimm(temp,7,tl);
b9b61529 4359 emit_readword((int)&reg_cop2d[10],temp);
4360 emit_testimm(temp,0x8000);
4361 emit_andimm(temp,0xf80,temp);
4362 emit_andne_imm(temp,0,temp);
f70d384d 4363 emit_orrshr_imm(temp,2,tl);
b9b61529 4364 emit_readword((int)&reg_cop2d[11],temp);
4365 emit_testimm(temp,0x8000);
4366 emit_andimm(temp,0xf80,temp);
4367 emit_andne_imm(temp,0,temp);
f70d384d 4368 emit_orrshl_imm(temp,3,tl);
b9b61529 4369 emit_writeword(tl,(int)&reg_cop2d[copr]);
4370 break;
4371 default:
4372 emit_readword((int)&reg_cop2d[copr],tl);
4373 break;
4374 }
4375}
4376
4377static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4378{
4379 switch (copr) {
4380 case 15:
4381 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4382 emit_writeword(sl,(int)&reg_cop2d[copr]);
4383 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4384 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4385 emit_writeword(sl,(int)&reg_cop2d[14]);
4386 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4387 break;
4388 case 28:
4389 emit_andimm(sl,0x001f,temp);
f70d384d 4390 emit_shlimm(temp,7,temp);
b9b61529 4391 emit_writeword(temp,(int)&reg_cop2d[9]);
4392 emit_andimm(sl,0x03e0,temp);
f70d384d 4393 emit_shlimm(temp,2,temp);
b9b61529 4394 emit_writeword(temp,(int)&reg_cop2d[10]);
4395 emit_andimm(sl,0x7c00,temp);
f70d384d 4396 emit_shrimm(temp,3,temp);
b9b61529 4397 emit_writeword(temp,(int)&reg_cop2d[11]);
4398 emit_writeword(sl,(int)&reg_cop2d[28]);
4399 break;
4400 case 30:
4401 emit_movs(sl,temp);
4402 emit_mvnmi(temp,temp);
665f33e1 4403#ifdef HAVE_ARMV5
b9b61529 4404 emit_clz(temp,temp);
665f33e1 4405#else
4406 emit_movs(temp,HOST_TEMPREG);
4407 emit_movimm(0,temp);
4408 emit_jeq((int)out+4*4);
4409 emit_addpl_imm(temp,1,temp);
4410 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
4411 emit_jns((int)out-2*4);
4412#endif
b9b61529 4413 emit_writeword(sl,(int)&reg_cop2d[30]);
4414 emit_writeword(temp,(int)&reg_cop2d[31]);
4415 break;
b9b61529 4416 case 31:
4417 break;
4418 default:
4419 emit_writeword(sl,(int)&reg_cop2d[copr]);
4420 break;
4421 }
4422}
4423
4424void cop2_assemble(int i,struct regstat *i_regs)
4425{
4426 u_int copr=(source[i]>>11)&0x1f;
4427 signed char temp=get_reg(i_regs->regmap,-1);
4428 if (opcode2[i]==0) { // MFC2
4429 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4430 if(tl>=0&&rt1[i]!=0)
b9b61529 4431 cop2_get_dreg(copr,tl,temp);
4432 }
4433 else if (opcode2[i]==4) { // MTC2
4434 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4435 cop2_put_dreg(copr,sl,temp);
4436 }
4437 else if (opcode2[i]==2) // CFC2
4438 {
4439 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4440 if(tl>=0&&rt1[i]!=0)
b9b61529 4441 emit_readword((int)&reg_cop2c[copr],tl);
4442 }
4443 else if (opcode2[i]==6) // CTC2
4444 {
4445 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4446 switch(copr) {
4447 case 4:
4448 case 12:
4449 case 20:
4450 case 26:
4451 case 27:
4452 case 29:
4453 case 30:
4454 emit_signextend16(sl,temp);
4455 break;
4456 case 31:
4457 //value = value & 0x7ffff000;
4458 //if (value & 0x7f87e000) value |= 0x80000000;
4459 emit_shrimm(sl,12,temp);
4460 emit_shlimm(temp,12,temp);
4461 emit_testimm(temp,0x7f000000);
4462 emit_testeqimm(temp,0x00870000);
4463 emit_testeqimm(temp,0x0000e000);
4464 emit_orrne_imm(temp,0x80000000,temp);
4465 break;
4466 default:
4467 temp=sl;
4468 break;
4469 }
4470 emit_writeword(temp,(int)&reg_cop2c[copr]);
4471 assert(sl>=0);
4472 }
4473}
4474
054175e9 4475static void c2op_prologue(u_int op,u_int reglist)
4476{
4477 save_regs_all(reglist);
82ed88eb 4478#ifdef PCNT
4479 emit_movimm(op,0);
4480 emit_call((int)pcnt_gte_start);
4481#endif
054175e9 4482 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4483}
4484
4485static void c2op_epilogue(u_int op,u_int reglist)
4486{
82ed88eb 4487#ifdef PCNT
4488 emit_movimm(op,0);
4489 emit_call((int)pcnt_gte_end);
4490#endif
054175e9 4491 restore_regs_all(reglist);
4492}
4493
6c0eefaf 4494static void c2op_call_MACtoIR(int lm,int need_flags)
4495{
4496 if(need_flags)
4497 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4498 else
4499 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4500}
4501
4502static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4503{
4504 emit_call((int)func);
4505 // func is C code and trashes r0
4506 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4507 if(need_flags||need_ir)
4508 c2op_call_MACtoIR(lm,need_flags);
4509 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4510}
4511
054175e9 4512static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4513{
4514 signed char temp=get_reg(i_regs->regmap,-1);
4515 u_int c2op=source[i]&0x3f;
6c0eefaf 4516 u_int hr,reglist_full=0,reglist;
054175e9 4517 int need_flags,need_ir;
b9b61529 4518 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4519 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4520 }
6c0eefaf 4521 reglist=reglist_full&0x100f;
b9b61529 4522
4523 if (gte_handlers[c2op]!=NULL) {
bedfea38 4524 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4525 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
cbbd8dd7 4526 assem_debug("gte op %08x, unneeded %016llx, need_flags %d, need_ir %d\n",
4527 source[i],gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4528 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4529 need_flags=0;
6c0eefaf 4530 int shift = (source[i] >> 19) & 1;
4531 int lm = (source[i] >> 10) & 1;
054175e9 4532 switch(c2op) {
19776aef 4533#ifndef DRC_DBG
054175e9 4534 case GTE_MVMVA: {
82336ba3 4535#ifdef HAVE_ARMV5
054175e9 4536 int v = (source[i] >> 15) & 3;
4537 int cv = (source[i] >> 13) & 3;
4538 int mx = (source[i] >> 17) & 3;
6c0eefaf 4539 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4540 c2op_prologue(c2op,reglist);
4541 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4542 if(v<3)
4543 emit_ldrd(v*8,0,4);
4544 else {
4545 emit_movzwl_indexed(9*4,0,4); // gteIR
4546 emit_movzwl_indexed(10*4,0,6);
4547 emit_movzwl_indexed(11*4,0,5);
4548 emit_orrshl_imm(6,16,4);
4549 }
4550 if(mx<3)
4551 emit_addimm(0,32*4+mx*8*4,6);
4552 else
4553 emit_readword((int)&zeromem_ptr,6);
4554 if(cv<3)
4555 emit_addimm(0,32*4+(cv*8+5)*4,7);
4556 else
4557 emit_readword((int)&zeromem_ptr,7);
4558#ifdef __ARM_NEON__
4559 emit_movimm(source[i],1); // opcode
4560 emit_call((int)gteMVMVA_part_neon);
4561 if(need_flags) {
4562 emit_movimm(lm,1);
4563 emit_call((int)gteMACtoIR_flags_neon);
4564 }
4565#else
4566 if(cv==3&&shift)
4567 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4568 else {
4569 emit_movimm(shift,1);
4570 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4571 }
6c0eefaf 4572 if(need_flags||need_ir)
4573 c2op_call_MACtoIR(lm,need_flags);
82336ba3 4574#endif
4575#else /* if not HAVE_ARMV5 */
4576 c2op_prologue(c2op,reglist);
4577 emit_movimm(source[i],1); // opcode
4578 emit_writeword(1,(int)&psxRegs.code);
4579 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
054175e9 4580#endif
4581 break;
4582 }
6c0eefaf 4583 case GTE_OP:
4584 c2op_prologue(c2op,reglist);
4585 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4586 if(need_flags||need_ir) {
4587 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4588 c2op_call_MACtoIR(lm,need_flags);
4589 }
4590 break;
4591 case GTE_DPCS:
4592 c2op_prologue(c2op,reglist);
4593 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4594 break;
4595 case GTE_INTPL:
4596 c2op_prologue(c2op,reglist);
4597 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4598 break;
4599 case GTE_SQR:
4600 c2op_prologue(c2op,reglist);
4601 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4602 if(need_flags||need_ir) {
4603 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4604 c2op_call_MACtoIR(lm,need_flags);
4605 }
4606 break;
4607 case GTE_DCPL:
4608 c2op_prologue(c2op,reglist);
4609 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4610 break;
4611 case GTE_GPF:
4612 c2op_prologue(c2op,reglist);
4613 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4614 break;
4615 case GTE_GPL:
4616 c2op_prologue(c2op,reglist);
4617 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4618 break;
19776aef 4619#endif
054175e9 4620 default:
054175e9 4621 c2op_prologue(c2op,reglist);
19776aef 4622#ifdef DRC_DBG
4623 emit_movimm(source[i],1); // opcode
4624 emit_writeword(1,(int)&psxRegs.code);
4625#endif
054175e9 4626 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4627 break;
4628 }
4629 c2op_epilogue(c2op,reglist);
4630 }
b9b61529 4631}
4632
4633void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4634{
4635 // XXX: should just just do the exception instead
4636 if(!cop1_usable) {
4637 int jaddr=(int)out;
4638 emit_jmp(0);
4639 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4640 cop1_usable=1;
4641 }
4642}
4643
57871462 4644void cop1_assemble(int i,struct regstat *i_regs)
4645{
3d624f89 4646#ifndef DISABLE_COP1
57871462 4647 // Check cop1 unusable
4648 if(!cop1_usable) {
4649 signed char rs=get_reg(i_regs->regmap,CSREG);
4650 assert(rs>=0);
4651 emit_testimm(rs,0x20000000);
4652 int jaddr=(int)out;
4653 emit_jeq(0);
4654 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4655 cop1_usable=1;
4656 }
4657 if (opcode2[i]==0) { // MFC1
4658 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4659 if(tl>=0) {
4660 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4661 emit_readword_indexed(0,tl,tl);
4662 }
4663 }
4664 else if (opcode2[i]==1) { // DMFC1
4665 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4666 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4667 if(tl>=0) {
4668 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4669 if(th>=0) emit_readword_indexed(4,tl,th);
4670 emit_readword_indexed(0,tl,tl);
4671 }
4672 }
4673 else if (opcode2[i]==4) { // MTC1
4674 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4675 signed char temp=get_reg(i_regs->regmap,-1);
4676 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4677 emit_writeword_indexed(sl,0,temp);
4678 }
4679 else if (opcode2[i]==5) { // DMTC1
4680 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4681 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4682 signed char temp=get_reg(i_regs->regmap,-1);
4683 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4684 emit_writeword_indexed(sh,4,temp);
4685 emit_writeword_indexed(sl,0,temp);
4686 }
4687 else if (opcode2[i]==2) // CFC1
4688 {
4689 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4690 if(tl>=0) {
4691 u_int copr=(source[i]>>11)&0x1f;
4692 if(copr==0) emit_readword((int)&FCR0,tl);
4693 if(copr==31) emit_readword((int)&FCR31,tl);
4694 }
4695 }
4696 else if (opcode2[i]==6) // CTC1
4697 {
4698 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4699 u_int copr=(source[i]>>11)&0x1f;
4700 assert(sl>=0);
4701 if(copr==31)
4702 {
4703 emit_writeword(sl,(int)&FCR31);
4704 // Set the rounding mode
4705 //FIXME
4706 //char temp=get_reg(i_regs->regmap,-1);
4707 //emit_andimm(sl,3,temp);
4708 //emit_fldcw_indexed((int)&rounding_modes,temp);
4709 }
4710 }
3d624f89 4711#else
4712 cop1_unusable(i, i_regs);
4713#endif
57871462 4714}
4715
4716void fconv_assemble_arm(int i,struct regstat *i_regs)
4717{
3d624f89 4718#ifndef DISABLE_COP1
57871462 4719 signed char temp=get_reg(i_regs->regmap,-1);
4720 assert(temp>=0);
4721 // Check cop1 unusable
4722 if(!cop1_usable) {
4723 signed char rs=get_reg(i_regs->regmap,CSREG);
4724 assert(rs>=0);
4725 emit_testimm(rs,0x20000000);
4726 int jaddr=(int)out;
4727 emit_jeq(0);
4728 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4729 cop1_usable=1;
4730 }
4731
4732 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4733 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4734 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4735 emit_flds(temp,15);
4736 emit_ftosizs(15,15); // float->int, truncate
4737 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4738 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4739 emit_fsts(15,temp);
4740 return;
4741 }
4742 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4743 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4744 emit_vldr(temp,7);
4745 emit_ftosizd(7,13); // double->int, truncate
4746 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4747 emit_fsts(13,temp);
4748 return;
4749 }
4750
4751 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4752 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4753 emit_flds(temp,13);
4754 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4755 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4756 emit_fsitos(13,15);
4757 emit_fsts(15,temp);
4758 return;
4759 }
4760 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4761 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4762 emit_flds(temp,13);
4763 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4764 emit_fsitod(13,7);
4765 emit_vstr(7,temp);
4766 return;
4767 }
4768
4769 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4770 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4771 emit_flds(temp,13);
4772 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4773 emit_fcvtds(13,7);
4774 emit_vstr(7,temp);
4775 return;
4776 }
4777 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4778 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4779 emit_vldr(temp,7);
4780 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4781 emit_fcvtsd(7,13);
4782 emit_fsts(13,temp);
4783 return;
4784 }
4785 #endif
4786
4787 // C emulation code
4788
4789 u_int hr,reglist=0;
4790 for(hr=0;hr<HOST_REGS;hr++) {
4791 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4792 }
4793 save_regs(reglist);
4794
4795 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4796 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4797 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4798 emit_call((int)cvt_s_w);
4799 }
4800 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4801 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4802 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4803 emit_call((int)cvt_d_w);
4804 }
4805 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4806 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4807 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4808 emit_call((int)cvt_s_l);
4809 }
4810 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4811 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4812 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4813 emit_call((int)cvt_d_l);
4814 }
4815
4816 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4817 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4818 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4819 emit_call((int)cvt_d_s);
4820 }
4821 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4822 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4823 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4824 emit_call((int)cvt_w_s);
4825 }
4826 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4827 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4828 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4829 emit_call((int)cvt_l_s);
4830 }
4831
4832 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4833 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4834 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4835 emit_call((int)cvt_s_d);
4836 }
4837 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4838 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4839 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4840 emit_call((int)cvt_w_d);
4841 }
4842 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4843 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4844 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4845 emit_call((int)cvt_l_d);
4846 }
4847
4848 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4849 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4850 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4851 emit_call((int)round_l_s);
4852 }
4853 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4854 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4855 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4856 emit_call((int)trunc_l_s);
4857 }
4858 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4859 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4860 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4861 emit_call((int)ceil_l_s);
4862 }
4863 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4864 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4865 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4866 emit_call((int)floor_l_s);
4867 }
4868 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4869 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4870 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4871 emit_call((int)round_w_s);
4872 }
4873 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4874 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4875 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4876 emit_call((int)trunc_w_s);
4877 }
4878 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4879 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4880 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4881 emit_call((int)ceil_w_s);
4882 }
4883 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4884 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4885 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4886 emit_call((int)floor_w_s);
4887 }
4888
4889 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4890 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4891 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4892 emit_call((int)round_l_d);
4893 }
4894 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4895 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4896 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4897 emit_call((int)trunc_l_d);
4898 }
4899 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4900 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4901 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4902 emit_call((int)ceil_l_d);
4903 }
4904 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4905 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4906 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4907 emit_call((int)floor_l_d);
4908 }
4909 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4910 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4911 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4912 emit_call((int)round_w_d);
4913 }
4914 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4915 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4916 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4917 emit_call((int)trunc_w_d);
4918 }
4919 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4920 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4921 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4922 emit_call((int)ceil_w_d);
4923 }
4924 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4925 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4926 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4927 emit_call((int)floor_w_d);
4928 }
4929
4930 restore_regs(reglist);
3d624f89 4931#else
4932 cop1_unusable(i, i_regs);
4933#endif
57871462 4934}
4935#define fconv_assemble fconv_assemble_arm
4936
4937void fcomp_assemble(int i,struct regstat *i_regs)
4938{
3d624f89 4939#ifndef DISABLE_COP1
57871462 4940 signed char fs=get_reg(i_regs->regmap,FSREG);
4941 signed char temp=get_reg(i_regs->regmap,-1);
4942 assert(temp>=0);
4943 // Check cop1 unusable
4944 if(!cop1_usable) {
4945 signed char cs=get_reg(i_regs->regmap,CSREG);
4946 assert(cs>=0);
4947 emit_testimm(cs,0x20000000);
4948 int jaddr=(int)out;
4949 emit_jeq(0);
4950 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4951 cop1_usable=1;
4952 }
4953
4954 if((source[i]&0x3f)==0x30) {
4955 emit_andimm(fs,~0x800000,fs);
4956 return;
4957 }
4958
4959 if((source[i]&0x3e)==0x38) {
4960 // sf/ngle - these should throw exceptions for NaNs
4961 emit_andimm(fs,~0x800000,fs);
4962 return;
4963 }
4964
4965 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4966 if(opcode2[i]==0x10) {
4967 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4968 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4969 emit_orimm(fs,0x800000,fs);
4970 emit_flds(temp,14);
4971 emit_flds(HOST_TEMPREG,15);
4972 emit_fcmps(14,15);
4973 emit_fmstat();
4974 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4975 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4976 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4977 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4978 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4979 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4980 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4981 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4982 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4983 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4984 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4985 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4986 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4987 return;
4988 }
4989 if(opcode2[i]==0x11) {
4990 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4991 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4992 emit_orimm(fs,0x800000,fs);
4993 emit_vldr(temp,6);
4994 emit_vldr(HOST_TEMPREG,7);
4995 emit_fcmpd(6,7);
4996 emit_fmstat();
4997 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4998 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4999 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
5000 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
5001 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
5002 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
5003 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
5004 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
5005 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
5006 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
5007 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
5008 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
5009 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
5010 return;
5011 }
5012 #endif
5013
5014 // C only
5015
5016 u_int hr,reglist=0;
5017 for(hr=0;hr<HOST_REGS;hr++) {
5018 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5019 }
5020 reglist&=~(1<<fs);
5021 save_regs(reglist);
5022 if(opcode2[i]==0x10) {
5023 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5024 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5025 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
5026 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
5027 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
5028 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
5029 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
5030 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
5031 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
5032 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
5033 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
5034 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
5035 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
5036 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5037 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5038 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5039 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5040 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5041 }
5042 if(opcode2[i]==0x11) {
5043 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5044 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5045 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5046 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5047 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5048 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5049 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5050 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5051 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5052 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5053 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5054 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5055 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5056 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5057 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5058 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5059 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5060 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5061 }
5062 restore_regs(reglist);
5063 emit_loadreg(FSREG,fs);
3d624f89 5064#else
5065 cop1_unusable(i, i_regs);
5066#endif
57871462 5067}
5068
5069void float_assemble(int i,struct regstat *i_regs)
5070{
3d624f89 5071#ifndef DISABLE_COP1
57871462 5072 signed char temp=get_reg(i_regs->regmap,-1);
5073 assert(temp>=0);
5074 // Check cop1 unusable
5075 if(!cop1_usable) {
5076 signed char cs=get_reg(i_regs->regmap,CSREG);
5077 assert(cs>=0);
5078 emit_testimm(cs,0x20000000);
5079 int jaddr=(int)out;
5080 emit_jeq(0);
5081 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5082 cop1_usable=1;
5083 }
5084
5085 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5086 if((source[i]&0x3f)==6) // mov
5087 {
5088 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5089 if(opcode2[i]==0x10) {
5090 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5091 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5092 emit_readword_indexed(0,temp,temp);
5093 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5094 }
5095 if(opcode2[i]==0x11) {
5096 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5097 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5098 emit_vldr(temp,7);
5099 emit_vstr(7,HOST_TEMPREG);
5100 }
5101 }
5102 return;
5103 }
5104
5105 if((source[i]&0x3f)>3)
5106 {
5107 if(opcode2[i]==0x10) {
5108 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5109 emit_flds(temp,15);
5110 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5111 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5112 }
5113 if((source[i]&0x3f)==4) // sqrt
5114 emit_fsqrts(15,15);
5115 if((source[i]&0x3f)==5) // abs
5116 emit_fabss(15,15);
5117 if((source[i]&0x3f)==7) // neg
5118 emit_fnegs(15,15);
5119 emit_fsts(15,temp);
5120 }
5121 if(opcode2[i]==0x11) {
5122 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5123 emit_vldr(temp,7);
5124 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5125 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5126 }
5127 if((source[i]&0x3f)==4) // sqrt
5128 emit_fsqrtd(7,7);
5129 if((source[i]&0x3f)==5) // abs
5130 emit_fabsd(7,7);
5131 if((source[i]&0x3f)==7) // neg
5132 emit_fnegd(7,7);
5133 emit_vstr(7,temp);
5134 }
5135 return;
5136 }
5137 if((source[i]&0x3f)<4)
5138 {
5139 if(opcode2[i]==0x10) {
5140 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5141 }
5142 if(opcode2[i]==0x11) {
5143 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5144 }
5145 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5146 if(opcode2[i]==0x10) {
5147 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5148 emit_flds(temp,15);
5149 emit_flds(HOST_TEMPREG,13);
5150 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5151 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5152 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5153 }
5154 }
5155 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5156 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5157 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5158 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5159 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5160 emit_fsts(15,HOST_TEMPREG);
5161 }else{
5162 emit_fsts(15,temp);
5163 }
5164 }
5165 else if(opcode2[i]==0x11) {
5166 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5167 emit_vldr(temp,7);
5168 emit_vldr(HOST_TEMPREG,6);
5169 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5170 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5171 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5172 }
5173 }
5174 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5175 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5176 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5177 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5178 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5179 emit_vstr(7,HOST_TEMPREG);
5180 }else{
5181 emit_vstr(7,temp);
5182 }
5183 }
5184 }
5185 else {
5186 if(opcode2[i]==0x10) {
5187 emit_flds(temp,15);
5188 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5189 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5190 }
5191 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5192 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5193 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5194 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5195 emit_fsts(15,temp);
5196 }
5197 else if(opcode2[i]==0x11) {
5198 emit_vldr(temp,7);
5199 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5200 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5201 }
5202 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5203 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5204 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5205 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5206 emit_vstr(7,temp);
5207 }
5208 }
5209 return;
5210 }
5211 #endif
5212
5213 u_int hr,reglist=0;
5214 for(hr=0;hr<HOST_REGS;hr++) {
5215 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5216 }
5217 if(opcode2[i]==0x10) { // Single precision
5218 save_regs(reglist);
5219 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5220 if((source[i]&0x3f)<4) {
5221 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5222 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5223 }else{
5224 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5225 }
5226 switch(source[i]&0x3f)
5227 {
5228 case 0x00: emit_call((int)add_s);break;
5229 case 0x01: emit_call((int)sub_s);break;
5230 case 0x02: emit_call((int)mul_s);break;
5231 case 0x03: emit_call((int)div_s);break;
5232 case 0x04: emit_call((int)sqrt_s);break;
5233 case 0x05: emit_call((int)abs_s);break;
5234 case 0x06: emit_call((int)mov_s);break;
5235 case 0x07: emit_call((int)neg_s);break;
5236 }
5237 restore_regs(reglist);
5238 }
5239 if(opcode2[i]==0x11) { // Double precision
5240 save_regs(reglist);
5241 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5242 if((source[i]&0x3f)<4) {
5243 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5244 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5245 }else{
5246 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5247 }
5248 switch(source[i]&0x3f)
5249 {
5250 case 0x00: emit_call((int)add_d);break;
5251 case 0x01: emit_call((int)sub_d);break;
5252 case 0x02: emit_call((int)mul_d);break;
5253 case 0x03: emit_call((int)div_d);break;
5254 case 0x04: emit_call((int)sqrt_d);break;
5255 case 0x05: emit_call((int)abs_d);break;
5256 case 0x06: emit_call((int)mov_d);break;
5257 case 0x07: emit_call((int)neg_d);break;
5258 }
5259 restore_regs(reglist);
5260 }
3d624f89 5261#else
5262 cop1_unusable(i, i_regs);
5263#endif
57871462 5264}
5265
5266void multdiv_assemble_arm(int i,struct regstat *i_regs)
5267{
5268 // case 0x18: MULT
5269 // case 0x19: MULTU
5270 // case 0x1A: DIV
5271 // case 0x1B: DIVU
5272 // case 0x1C: DMULT
5273 // case 0x1D: DMULTU
5274 // case 0x1E: DDIV
5275 // case 0x1F: DDIVU
5276 if(rs1[i]&&rs2[i])
5277 {
5278 if((opcode2[i]&4)==0) // 32-bit
5279 {
5280 if(opcode2[i]==0x18) // MULT
5281 {
5282 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5283 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5284 signed char hi=get_reg(i_regs->regmap,HIREG);
5285 signed char lo=get_reg(i_regs->regmap,LOREG);
5286 assert(m1>=0);
5287 assert(m2>=0);
5288 assert(hi>=0);
5289 assert(lo>=0);
5290 emit_smull(m1,m2,hi,lo);
5291 }
5292 if(opcode2[i]==0x19) // MULTU
5293 {
5294 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5295 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5296 signed char hi=get_reg(i_regs->regmap,HIREG);
5297 signed char lo=get_reg(i_regs->regmap,LOREG);
5298 assert(m1>=0);
5299 assert(m2>=0);
5300 assert(hi>=0);
5301 assert(lo>=0);
5302 emit_umull(m1,m2,hi,lo);
5303 }
5304 if(opcode2[i]==0x1A) // DIV
5305 {
5306 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5307 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5308 assert(d1>=0);
5309 assert(d2>=0);
5310 signed char quotient=get_reg(i_regs->regmap,LOREG);
5311 signed char remainder=get_reg(i_regs->regmap,HIREG);
5312 assert(quotient>=0);
5313 assert(remainder>=0);
5314 emit_movs(d1,remainder);
44a80f6a 5315 emit_movimm(0xffffffff,quotient);
5316 emit_negmi(quotient,quotient); // .. quotient and ..
5317 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5318 emit_movs(d2,HOST_TEMPREG);
5319 emit_jeq((int)out+52); // Division by zero
82336ba3 5320 emit_negsmi(HOST_TEMPREG,HOST_TEMPREG);
665f33e1 5321#ifdef HAVE_ARMV5
57871462 5322 emit_clz(HOST_TEMPREG,quotient);
5323 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
665f33e1 5324#else
5325 emit_movimm(0,quotient);
5326 emit_addpl_imm(quotient,1,quotient);
5327 emit_lslpls_imm(HOST_TEMPREG,1,HOST_TEMPREG);
5328 emit_jns((int)out-2*4);
5329#endif
57871462 5330 emit_orimm(quotient,1<<31,quotient);
5331 emit_shr(quotient,quotient,quotient);
5332 emit_cmp(remainder,HOST_TEMPREG);
5333 emit_subcs(remainder,HOST_TEMPREG,remainder);
5334 emit_adcs(quotient,quotient,quotient);
5335 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5336 emit_jcc((int)out-16); // -4
5337 emit_teq(d1,d2);
5338 emit_negmi(quotient,quotient);
5339 emit_test(d1,d1);
5340 emit_negmi(remainder,remainder);
5341 }
5342 if(opcode2[i]==0x1B) // DIVU
5343 {
5344 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5345 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5346 assert(d1>=0);
5347 assert(d2>=0);
5348 signed char quotient=get_reg(i_regs->regmap,LOREG);
5349 signed char remainder=get_reg(i_regs->regmap,HIREG);
5350 assert(quotient>=0);
5351 assert(remainder>=0);
44a80f6a 5352 emit_mov(d1,remainder);
5353 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5354 emit_test(d2,d2);
44a80f6a 5355 emit_jeq((int)out+40); // Division by zero
665f33e1 5356#ifdef HAVE_ARMV5
57871462 5357 emit_clz(d2,HOST_TEMPREG);
5358 emit_movimm(1<<31,quotient);
5359 emit_shl(d2,HOST_TEMPREG,d2);
665f33e1 5360#else
5361 emit_movimm(0,HOST_TEMPREG);
82336ba3 5362 emit_addpl_imm(HOST_TEMPREG,1,HOST_TEMPREG);
5363 emit_lslpls_imm(d2,1,d2);
665f33e1 5364 emit_jns((int)out-2*4);
5365 emit_movimm(1<<31,quotient);
5366#endif
57871462 5367 emit_shr(quotient,HOST_TEMPREG,quotient);
5368 emit_cmp(remainder,d2);
5369 emit_subcs(remainder,d2,remainder);
5370 emit_adcs(quotient,quotient,quotient);
5371 emit_shrcc_imm(d2,1,d2);
5372 emit_jcc((int)out-16); // -4
5373 }
5374 }
5375 else // 64-bit
4600ba03 5376#ifndef FORCE32
57871462 5377 {
5378 if(opcode2[i]==0x1C) // DMULT
5379 {
5380 assert(opcode2[i]!=0x1C);
5381 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5382 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5383 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5384 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5385 assert(m1h>=0);
5386 assert(m2h>=0);
5387 assert(m1l>=0);
5388 assert(m2l>=0);
5389 emit_pushreg(m2h);
5390 emit_pushreg(m2l);
5391 emit_pushreg(m1h);
5392 emit_pushreg(m1l);
5393 emit_call((int)&mult64);
5394 emit_popreg(m1l);
5395 emit_popreg(m1h);
5396 emit_popreg(m2l);
5397 emit_popreg(m2h);
5398 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5399 signed char hil=get_reg(i_regs->regmap,HIREG);
5400 if(hih>=0) emit_loadreg(HIREG|64,hih);
5401 if(hil>=0) emit_loadreg(HIREG,hil);
5402 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5403 signed char lol=get_reg(i_regs->regmap,LOREG);
5404 if(loh>=0) emit_loadreg(LOREG|64,loh);
5405 if(lol>=0) emit_loadreg(LOREG,lol);
5406 }
5407 if(opcode2[i]==0x1D) // DMULTU
5408 {
5409 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5410 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5411 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5412 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5413 assert(m1h>=0);
5414 assert(m2h>=0);
5415 assert(m1l>=0);
5416 assert(m2l>=0);
5417 save_regs(0x100f);
5418 if(m1l!=0) emit_mov(m1l,0);
5419 if(m1h==0) emit_readword((int)&dynarec_local,1);
5420 else if(m1h>1) emit_mov(m1h,1);
5421 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5422 else if(m2l>2) emit_mov(m2l,2);
5423 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5424 else if(m2h>3) emit_mov(m2h,3);
5425 emit_call((int)&multu64);
5426 restore_regs(0x100f);
5427 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5428 signed char hil=get_reg(i_regs->regmap,HIREG);
5429 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5430 signed char lol=get_reg(i_regs->regmap,LOREG);
5431 /*signed char temp=get_reg(i_regs->regmap,-1);
5432 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5433 signed char rl=get_reg(i_regs->regmap,HIREG);
5434 assert(m1h>=0);
5435 assert(m2h>=0);
5436 assert(m1l>=0);
5437 assert(m2l>=0);
5438 assert(temp>=0);
5439 //emit_mov(m1l,EAX);
5440 //emit_mul(m2l);
5441 emit_umull(rl,rh,m1l,m2l);
5442 emit_storereg(LOREG,rl);
5443 emit_mov(rh,temp);
5444 //emit_mov(m1h,EAX);
5445 //emit_mul(m2l);
5446 emit_umull(rl,rh,m1h,m2l);
5447 emit_adds(rl,temp,temp);
5448 emit_adcimm(rh,0,rh);
5449 emit_storereg(HIREG,rh);
5450 //emit_mov(m2h,EAX);
5451 //emit_mul(m1l);
5452 emit_umull(rl,rh,m1l,m2h);
5453 emit_adds(rl,temp,temp);
5454 emit_adcimm(rh,0,rh);
5455 emit_storereg(LOREG|64,temp);
5456 emit_mov(rh,temp);
5457 //emit_mov(m2h,EAX);
5458 //emit_mul(m1h);
5459 emit_umull(rl,rh,m1h,m2h);
5460 emit_adds(rl,temp,rl);
5461 emit_loadreg(HIREG,temp);
5462 emit_adcimm(rh,0,rh);
5463 emit_adds(rl,temp,rl);
5464 emit_adcimm(rh,0,rh);
5465 // DEBUG
5466 /*
5467 emit_pushreg(m2h);
5468 emit_pushreg(m2l);
5469 emit_pushreg(m1h);
5470 emit_pushreg(m1l);
5471 emit_call((int)&multu64);
5472 emit_popreg(m1l);
5473 emit_popreg(m1h);
5474 emit_popreg(m2l);
5475 emit_popreg(m2h);
5476 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5477 signed char hil=get_reg(i_regs->regmap,HIREG);
5478 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5479 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5480 */
5481 // Shouldn't be necessary
5482 //char loh=get_reg(i_regs->regmap,LOREG|64);
5483 //char lol=get_reg(i_regs->regmap,LOREG);
5484 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5485 //if(lol>=0) emit_loadreg(LOREG,lol);
5486 }
5487 if(opcode2[i]==0x1E) // DDIV
5488 {
5489 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5490 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5491 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5492 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5493 assert(d1h>=0);
5494 assert(d2h>=0);
5495 assert(d1l>=0);
5496 assert(d2l>=0);
5497 save_regs(0x100f);
5498 if(d1l!=0) emit_mov(d1l,0);
5499 if(d1h==0) emit_readword((int)&dynarec_local,1);
5500 else if(d1h>1) emit_mov(d1h,1);
5501 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5502 else if(d2l>2) emit_mov(d2l,2);
5503 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5504 else if(d2h>3) emit_mov(d2h,3);
5505 emit_call((int)&div64);
5506 restore_regs(0x100f);
5507 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5508 signed char hil=get_reg(i_regs->regmap,HIREG);
5509 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5510 signed char lol=get_reg(i_regs->regmap,LOREG);
5511 if(hih>=0) emit_loadreg(HIREG|64,hih);
5512 if(hil>=0) emit_loadreg(HIREG,hil);
5513 if(loh>=0) emit_loadreg(LOREG|64,loh);
5514 if(lol>=0) emit_loadreg(LOREG,lol);
5515 }
5516 if(opcode2[i]==0x1F) // DDIVU
5517 {
5518 //u_int hr,reglist=0;
5519 //for(hr=0;hr<HOST_REGS;hr++) {
5520 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5521 //}
5522 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5523 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5524 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5525 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5526 assert(d1h>=0);
5527 assert(d2h>=0);
5528 assert(d1l>=0);
5529 assert(d2l>=0);
5530 save_regs(0x100f);
5531 if(d1l!=0) emit_mov(d1l,0);
5532 if(d1h==0) emit_readword((int)&dynarec_local,1);
5533 else if(d1h>1) emit_mov(d1h,1);
5534 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5535 else if(d2l>2) emit_mov(d2l,2);
5536 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5537 else if(d2h>3) emit_mov(d2h,3);
5538 emit_call((int)&divu64);
5539 restore_regs(0x100f);
5540 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5541 signed char hil=get_reg(i_regs->regmap,HIREG);
5542 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5543 signed char lol=get_reg(i_regs->regmap,LOREG);
5544 if(hih>=0) emit_loadreg(HIREG|64,hih);
5545 if(hil>=0) emit_loadreg(HIREG,hil);
5546 if(loh>=0) emit_loadreg(LOREG|64,loh);
5547 if(lol>=0) emit_loadreg(LOREG,lol);
5548 }
5549 }
4600ba03 5550#else
5551 assert(0);
5552#endif
57871462 5553 }
5554 else
5555 {
5556 // Multiply by zero is zero.
5557 // MIPS does not have a divide by zero exception.
5558 // The result is undefined, we return zero.
5559 signed char hr=get_reg(i_regs->regmap,HIREG);
5560 signed char lr=get_reg(i_regs->regmap,LOREG);
5561 if(hr>=0) emit_zeroreg(hr);
5562 if(lr>=0) emit_zeroreg(lr);
5563 }
5564}
5565#define multdiv_assemble multdiv_assemble_arm
5566
5567void do_preload_rhash(int r) {
5568 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5569 // register. On ARM the hash can be done with a single instruction (below)
5570}
5571
5572void do_preload_rhtbl(int ht) {
5573 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5574}
5575
5576void do_rhash(int rs,int rh) {
5577 emit_andimm(rs,0xf8,rh);
5578}
5579
5580void do_miniht_load(int ht,int rh) {
5581 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5582 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5583}
5584
5585void do_miniht_jump(int rs,int rh,int ht) {
5586 emit_cmp(rh,rs);
5587 emit_ldreq_indexed(ht,4,15);
5588 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5589 emit_mov(rs,7);
5590 emit_jmp(jump_vaddr_reg[7]);
5591 #else
5592 emit_jmp(jump_vaddr_reg[rs]);
5593 #endif
5594}
5595
5596void do_miniht_insert(u_int return_address,int rt,int temp) {
665f33e1 5597 #ifndef HAVE_ARMV7
57871462 5598 emit_movimm(return_address,rt); // PC into link register
5599 add_to_linker((int)out,return_address,1);
5600 emit_pcreladdr(temp);
5601 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5602 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5603 #else
5604 emit_movw(return_address&0x0000FFFF,rt);
5605 add_to_linker((int)out,return_address,1);
5606 emit_pcreladdr(temp);
5607 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5608 emit_movt(return_address&0xFFFF0000,rt);
5609 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5610 #endif
5611}
5612
5613// Sign-extend to 64 bits and write out upper half of a register
5614// This is useful where we have a 32-bit value in a register, and want to
5615// keep it in a 32-bit register, but can't guarantee that it won't be read
5616// as a 64-bit value later.
5617void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5618{
24385cae 5619#ifndef FORCE32
57871462 5620 if(is32_pre==is32) return;
5621 int hr,reg;
5622 for(hr=0;hr<HOST_REGS;hr++) {
5623 if(hr!=EXCLUDE_REG) {
5624 //if(pre[hr]==entry[hr]) {
5625 if((reg=pre[hr])>=0) {
5626 if((dirty>>hr)&1) {
5627 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5628 emit_sarimm(hr,31,HOST_TEMPREG);
5629 emit_storereg(reg|64,HOST_TEMPREG);
5630 }
5631 }
5632 }
5633 //}
5634 }
5635 }
24385cae 5636#endif
57871462 5637}
5638
5639void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5640{
5641 //if(dirty_pre==dirty) return;
5642 int hr,reg,new_hr;
5643 for(hr=0;hr<HOST_REGS;hr++) {
5644 if(hr!=EXCLUDE_REG) {
5645 reg=pre[hr];
5646 if(((~u)>>(reg&63))&1) {
f776eb14 5647 if(reg>0) {
57871462 5648 if(((dirty_pre&~dirty)>>hr)&1) {
5649 if(reg>0&&reg<34) {
5650 emit_storereg(reg,hr);
5651 if( ((is32_pre&~uu)>>reg)&1 ) {
5652 emit_sarimm(hr,31,HOST_TEMPREG);
5653 emit_storereg(reg|64,HOST_TEMPREG);
5654 }
5655 }
5656 else if(reg>=64) {
5657 emit_storereg(reg,hr);
5658 }
5659 }
5660 }
57871462 5661 }
5662 }
5663 }
5664}
5665
5666
5667/* using strd could possibly help but you'd have to allocate registers in pairs
5668void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5669{
5670 int hr;
5671 int wrote=-1;
5672 for(hr=HOST_REGS-1;hr>=0;hr--) {
5673 if(hr!=EXCLUDE_REG) {
5674 if(pre[hr]!=entry[hr]) {
5675 if(pre[hr]>=0) {
5676 if((dirty>>hr)&1) {
5677 if(get_reg(entry,pre[hr])<0) {
5678 if(pre[hr]<64) {
5679 if(!((u>>pre[hr])&1)) {
5680 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5681 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5682 emit_sarimm(hr,31,hr+1);
5683 emit_strdreg(pre[hr],hr);
5684 }
5685 else
5686 emit_storereg(pre[hr],hr);
5687 }else{
5688 emit_storereg(pre[hr],hr);
5689 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5690 emit_sarimm(hr,31,hr);
5691 emit_storereg(pre[hr]|64,hr);
5692 }
5693 }
5694 }
5695 }else{
5696 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5697 emit_storereg(pre[hr],hr);
5698 }
5699 }
5700 wrote=hr;
5701 }
5702 }
5703 }
5704 }
5705 }
5706 }
5707 for(hr=0;hr<HOST_REGS;hr++) {
5708 if(hr!=EXCLUDE_REG) {
5709 if(pre[hr]!=entry[hr]) {
5710 if(pre[hr]>=0) {
5711 int nr;
5712 if((nr=get_reg(entry,pre[hr]))>=0) {
5713 emit_mov(hr,nr);
5714 }
5715 }
5716 }
5717 }
5718 }
5719}
5720#define wb_invalidate wb_invalidate_arm
5721*/
5722
dd3a91a1 5723// Clearing the cache is rather slow on ARM Linux, so mark the areas
5724// that need to be cleared, and then only clear these areas once.
5725void do_clear_cache()
5726{
5727 int i,j;
5728 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5729 {
5730 u_int bitmap=needs_clear_cache[i];
5731 if(bitmap) {
5732 u_int start,end;
5733 for(j=0;j<32;j++)
5734 {
5735 if(bitmap&(1<<j)) {
bdeade46 5736 start=(u_int)BASE_ADDR+i*131072+j*4096;
dd3a91a1 5737 end=start+4095;
5738 j++;
5739 while(j<32) {
5740 if(bitmap&(1<<j)) {
5741 end+=4096;
5742 j++;
5743 }else{
5744 __clear_cache((void *)start,(void *)end);
5745 break;
5746 }
5747 }
5748 }
5749 }
5750 needs_clear_cache[i]=0;
5751 }
5752 }
5753}
5754
57871462 5755// CPU-architecture-specific initialization
5756void arch_init() {
3d624f89 5757#ifndef DISABLE_COP1
57871462 5758 rounding_modes[0]=0x0<<22; // round
5759 rounding_modes[1]=0x3<<22; // trunc
5760 rounding_modes[2]=0x1<<22; // ceil
5761 rounding_modes[3]=0x2<<22; // floor
3d624f89 5762#endif
57871462 5763}
b9b61529 5764
5765// vim:shiftwidth=2:expandtab