drc: add some hack options
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
31
57871462 32extern int cycle_count;
33extern int last_count;
34extern int pcaddr;
35extern int pending_exception;
36extern int branch_target;
37extern uint64_t readmem_dword;
3d624f89 38#ifdef MUPEN64
57871462 39extern precomp_instr fake_pc;
3d624f89 40#endif
57871462 41extern void *dynarec_local;
42extern u_int memory_map[1048576];
43extern u_int mini_ht[32][2];
44extern u_int rounding_modes[4];
45
46void indirect_jump_indexed();
47void indirect_jump();
48void do_interrupt();
49void jump_vaddr_r0();
50void jump_vaddr_r1();
51void jump_vaddr_r2();
52void jump_vaddr_r3();
53void jump_vaddr_r4();
54void jump_vaddr_r5();
55void jump_vaddr_r6();
56void jump_vaddr_r7();
57void jump_vaddr_r8();
58void jump_vaddr_r9();
59void jump_vaddr_r10();
60void jump_vaddr_r12();
61
62const u_int jump_vaddr_reg[16] = {
63 (int)jump_vaddr_r0,
64 (int)jump_vaddr_r1,
65 (int)jump_vaddr_r2,
66 (int)jump_vaddr_r3,
67 (int)jump_vaddr_r4,
68 (int)jump_vaddr_r5,
69 (int)jump_vaddr_r6,
70 (int)jump_vaddr_r7,
71 (int)jump_vaddr_r8,
72 (int)jump_vaddr_r9,
73 (int)jump_vaddr_r10,
74 0,
75 (int)jump_vaddr_r12,
76 0,
77 0,
78 0};
79
0bbd1454 80void invalidate_addr_r0();
81void invalidate_addr_r1();
82void invalidate_addr_r2();
83void invalidate_addr_r3();
84void invalidate_addr_r4();
85void invalidate_addr_r5();
86void invalidate_addr_r6();
87void invalidate_addr_r7();
88void invalidate_addr_r8();
89void invalidate_addr_r9();
90void invalidate_addr_r10();
91void invalidate_addr_r12();
92
93const u_int invalidate_addr_reg[16] = {
94 (int)invalidate_addr_r0,
95 (int)invalidate_addr_r1,
96 (int)invalidate_addr_r2,
97 (int)invalidate_addr_r3,
98 (int)invalidate_addr_r4,
99 (int)invalidate_addr_r5,
100 (int)invalidate_addr_r6,
101 (int)invalidate_addr_r7,
102 (int)invalidate_addr_r8,
103 (int)invalidate_addr_r9,
104 (int)invalidate_addr_r10,
105 0,
106 (int)invalidate_addr_r12,
107 0,
108 0,
109 0};
110
57871462 111#include "fpu.h"
112
dd3a91a1 113unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
114
57871462 115/* Linker */
116
117void set_jump_target(int addr,u_int target)
118{
119 u_char *ptr=(u_char *)addr;
120 u_int *ptr2=(u_int *)ptr;
121 if(ptr[3]==0xe2) {
122 assert((target-(u_int)ptr2-8)<1024);
123 assert((addr&3)==0);
124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
127 }
128 else if(ptr[3]==0x72) {
129 // generated by emit_jno_unlikely
130 if((target-(u_int)ptr2-8)<1024) {
131 assert((addr&3)==0);
132 assert((target&3)==0);
133 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
134 }
135 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
136 assert((addr&3)==0);
137 assert((target&3)==0);
138 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
139 }
140 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
141 }
142 else {
143 assert((ptr[3]&0x0e)==0xa);
144 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146}
147
148// This optionally copies the instruction from the target of the branch into
149// the space before the branch. Works, but the difference in speed is
150// usually insignificant.
151void set_jump_target_fillslot(int addr,u_int target,int copy)
152{
153 u_char *ptr=(u_char *)addr;
154 u_int *ptr2=(u_int *)ptr;
155 assert(!copy||ptr2[-1]==0xe28dd000);
156 if(ptr[3]==0xe2) {
157 assert(!copy);
158 assert((target-(u_int)ptr2-8)<4096);
159 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
160 }
161 else {
162 assert((ptr[3]&0x0e)==0xa);
163 u_int target_insn=*(u_int *)target;
164 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
165 copy=0;
166 }
167 if((target_insn&0x0c100000)==0x04100000) { // Load
168 copy=0;
169 }
170 if(target_insn&0x08000000) {
171 copy=0;
172 }
173 if(copy) {
174 ptr2[-1]=target_insn;
175 target+=4;
176 }
177 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
178 }
179}
180
181/* Literal pool */
182add_literal(int addr,int val)
183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
187 literalcount++;
188}
189
f76eeef9 190void *kill_pointer(void *stub)
57871462 191{
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 198 return i_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
57871462 205int get_pointer(void *stub)
206{
207 //printf("get_pointer(%x)\n",(int)stub);
208 int *ptr=(int *)(stub+4);
f968d35d 209 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 210 u_int offset=*ptr&0xfff;
211 int **l_ptr=(void *)ptr+offset+8;
212 int *i_ptr=*l_ptr;
213 assert((*i_ptr&0x0f000000)==0x0a000000);
214 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
215}
216
217// Find the "clean" entry point from a "dirty" entry point
218// by skipping past the call to verify_code
219u_int get_clean_addr(int addr)
220{
221 int *ptr=(int *)addr;
222 #ifdef ARMv5_ONLY
223 ptr+=4;
224 #else
225 ptr+=6;
226 #endif
227 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
228 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
229 ptr++;
230 if((*ptr&0xFF000000)==0xea000000) {
231 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
232 }
233 return (u_int)ptr;
234}
235
236int verify_dirty(int addr)
237{
238 u_int *ptr=(u_int *)addr;
239 #ifdef ARMv5_ONLY
240 // get from literal pool
15776b68 241 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 242 u_int offset=*ptr&0xfff;
243 u_int *l_ptr=(void *)ptr+offset+8;
244 u_int source=l_ptr[0];
245 u_int copy=l_ptr[1];
246 u_int len=l_ptr[2];
247 ptr+=4;
248 #else
249 // ARMv7 movw/movt
250 assert((*ptr&0xFFF00000)==0xe3000000);
251 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
252 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
253 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
254 ptr+=6;
255 #endif
256 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
257 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 258#ifndef DISABLE_TLB
cfcba99a 259 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 260 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
261 unsigned int page=source>>12;
262 unsigned int map_value=memory_map[page];
263 if(map_value>=0x80000000) return 0;
264 while(page<((source+len-1)>>12)) {
265 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
266 }
267 source = source+(map_value<<2);
268 }
63cb0298 269#endif
57871462 270 //printf("verify_dirty: %x %x %x\n",source,copy,len);
271 return !memcmp((void *)source,(void *)copy,len);
272}
273
274// This doesn't necessarily find all clean entry points, just
275// guarantees that it's not dirty
276int isclean(int addr)
277{
278 #ifdef ARMv5_ONLY
279 int *ptr=((u_int *)addr)+4;
280 #else
281 int *ptr=((u_int *)addr)+6;
282 #endif
283 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
284 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
285 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
286 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
287 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
288 return 1;
289}
290
291void get_bounds(int addr,u_int *start,u_int *end)
292{
293 u_int *ptr=(u_int *)addr;
294 #ifdef ARMv5_ONLY
295 // get from literal pool
15776b68 296 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 297 u_int offset=*ptr&0xfff;
298 u_int *l_ptr=(void *)ptr+offset+8;
299 u_int source=l_ptr[0];
300 //u_int copy=l_ptr[1];
301 u_int len=l_ptr[2];
302 ptr+=4;
303 #else
304 // ARMv7 movw/movt
305 assert((*ptr&0xFFF00000)==0xe3000000);
306 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
307 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
308 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
309 ptr+=6;
310 #endif
311 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
312 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 313#ifndef DISABLE_TLB
cfcba99a 314 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 315 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
316 if(memory_map[source>>12]>=0x80000000) source = 0;
317 else source = source+(memory_map[source>>12]<<2);
318 }
63cb0298 319#endif
57871462 320 *start=source;
321 *end=source+len;
322}
323
324/* Register allocation */
325
326// Note: registers are allocated clean (unmodified state)
327// if you intend to modify the register, you must call dirty_reg().
328void alloc_reg(struct regstat *cur,int i,signed char reg)
329{
330 int r,hr;
331 int preferred_reg = (reg&7);
332 if(reg==CCREG) preferred_reg=HOST_CCREG;
333 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
334
335 // Don't allocate unused registers
336 if((cur->u>>reg)&1) return;
337
338 // see if it's already allocated
339 for(hr=0;hr<HOST_REGS;hr++)
340 {
341 if(cur->regmap[hr]==reg) return;
342 }
343
344 // Keep the same mapping if the register was already allocated in a loop
345 preferred_reg = loop_reg(i,reg,preferred_reg);
346
347 // Try to allocate the preferred register
348 if(cur->regmap[preferred_reg]==-1) {
349 cur->regmap[preferred_reg]=reg;
350 cur->dirty&=~(1<<preferred_reg);
351 cur->isconst&=~(1<<preferred_reg);
352 return;
353 }
354 r=cur->regmap[preferred_reg];
355 if(r<64&&((cur->u>>r)&1)) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 if(r>=64&&((cur->uu>>(r&63))&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367
368 // Clear any unneeded registers
369 // We try to keep the mapping consistent, if possible, because it
370 // makes branches easier (especially loops). So we try to allocate
371 // first (see above) before removing old mappings. If this is not
372 // possible then go ahead and clear out the registers that are no
373 // longer needed.
374 for(hr=0;hr<HOST_REGS;hr++)
375 {
376 r=cur->regmap[hr];
377 if(r>=0) {
378 if(r<64) {
379 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
380 }
381 else
382 {
383 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
384 }
385 }
386 }
387 // Try to allocate any available register, but prefer
388 // registers that have not been used recently.
389 if(i>0) {
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
392 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
393 cur->regmap[hr]=reg;
394 cur->dirty&=~(1<<hr);
395 cur->isconst&=~(1<<hr);
396 return;
397 }
398 }
399 }
400 }
401 // Try to allocate any available register
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
410
411 // Ok, now we have to evict someone
412 // Pick a register we hopefully won't need soon
413 u_char hsn[MAXREG+1];
414 memset(hsn,10,sizeof(hsn));
415 int j;
416 lsn(hsn,i,&preferred_reg);
417 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
418 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
419 if(i>0) {
420 // Don't evict the cycle count at entry points, otherwise the entry
421 // stub will have to write it.
422 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
423 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
424 for(j=10;j>=3;j--)
425 {
426 // Alloc preferred register if available
427 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 // Evict both parts of a 64-bit register
430 if((cur->regmap[hr]&63)==r) {
431 cur->regmap[hr]=-1;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 }
435 }
436 cur->regmap[preferred_reg]=reg;
437 return;
438 }
439 for(r=1;r<=MAXREG;r++)
440 {
441 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r+64) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 for(hr=0;hr<HOST_REGS;hr++) {
453 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
454 if(cur->regmap[hr]==r) {
455 cur->regmap[hr]=reg;
456 cur->dirty&=~(1<<hr);
457 cur->isconst&=~(1<<hr);
458 return;
459 }
460 }
461 }
462 }
463 }
464 }
465 }
466 for(j=10;j>=0;j--)
467 {
468 for(r=1;r<=MAXREG;r++)
469 {
470 if(hsn[r]==j) {
471 for(hr=0;hr<HOST_REGS;hr++) {
472 if(cur->regmap[hr]==r+64) {
473 cur->regmap[hr]=reg;
474 cur->dirty&=~(1<<hr);
475 cur->isconst&=~(1<<hr);
476 return;
477 }
478 }
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 }
488 }
489 }
490 printf("This shouldn't happen (alloc_reg)");exit(1);
491}
492
493void alloc_reg64(struct regstat *cur,int i,signed char reg)
494{
495 int preferred_reg = 8+(reg&1);
496 int r,hr;
497
498 // allocate the lower 32 bits
499 alloc_reg(cur,i,reg);
500
501 // Don't allocate unused registers
502 if((cur->uu>>reg)&1) return;
503
504 // see if the upper half is already allocated
505 for(hr=0;hr<HOST_REGS;hr++)
506 {
507 if(cur->regmap[hr]==reg+64) return;
508 }
509
510 // Keep the same mapping if the register was already allocated in a loop
511 preferred_reg = loop_reg(i,reg,preferred_reg);
512
513 // Try to allocate the preferred register
514 if(cur->regmap[preferred_reg]==-1) {
515 cur->regmap[preferred_reg]=reg|64;
516 cur->dirty&=~(1<<preferred_reg);
517 cur->isconst&=~(1<<preferred_reg);
518 return;
519 }
520 r=cur->regmap[preferred_reg];
521 if(r<64&&((cur->u>>r)&1)) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 if(r>=64&&((cur->uu>>(r&63))&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533
534 // Clear any unneeded registers
535 // We try to keep the mapping consistent, if possible, because it
536 // makes branches easier (especially loops). So we try to allocate
537 // first (see above) before removing old mappings. If this is not
538 // possible then go ahead and clear out the registers that are no
539 // longer needed.
540 for(hr=HOST_REGS-1;hr>=0;hr--)
541 {
542 r=cur->regmap[hr];
543 if(r>=0) {
544 if(r<64) {
545 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
546 }
547 else
548 {
549 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
550 }
551 }
552 }
553 // Try to allocate any available register, but prefer
554 // registers that have not been used recently.
555 if(i>0) {
556 for(hr=0;hr<HOST_REGS;hr++) {
557 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
558 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
559 cur->regmap[hr]=reg|64;
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
562 return;
563 }
564 }
565 }
566 }
567 // Try to allocate any available register
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576
577 // Ok, now we have to evict someone
578 // Pick a register we hopefully won't need soon
579 u_char hsn[MAXREG+1];
580 memset(hsn,10,sizeof(hsn));
581 int j;
582 lsn(hsn,i,&preferred_reg);
583 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
584 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
585 if(i>0) {
586 // Don't evict the cycle count at entry points, otherwise the entry
587 // stub will have to write it.
588 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
589 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
590 for(j=10;j>=3;j--)
591 {
592 // Alloc preferred register if available
593 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 // Evict both parts of a 64-bit register
596 if((cur->regmap[hr]&63)==r) {
597 cur->regmap[hr]=-1;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 }
601 }
602 cur->regmap[preferred_reg]=reg|64;
603 return;
604 }
605 for(r=1;r<=MAXREG;r++)
606 {
607 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r+64) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 for(hr=0;hr<HOST_REGS;hr++) {
619 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
620 if(cur->regmap[hr]==r) {
621 cur->regmap[hr]=reg|64;
622 cur->dirty&=~(1<<hr);
623 cur->isconst&=~(1<<hr);
624 return;
625 }
626 }
627 }
628 }
629 }
630 }
631 }
632 for(j=10;j>=0;j--)
633 {
634 for(r=1;r<=MAXREG;r++)
635 {
636 if(hsn[r]==j) {
637 for(hr=0;hr<HOST_REGS;hr++) {
638 if(cur->regmap[hr]==r+64) {
639 cur->regmap[hr]=reg|64;
640 cur->dirty&=~(1<<hr);
641 cur->isconst&=~(1<<hr);
642 return;
643 }
644 }
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 }
654 }
655 }
656 printf("This shouldn't happen");exit(1);
657}
658
659// Allocate a temporary register. This is done without regard to
660// dirty status or whether the register we request is on the unneeded list
661// Note: This will only allocate one register, even if called multiple times
662void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
663{
664 int r,hr;
665 int preferred_reg = -1;
666
667 // see if it's already allocated
668 for(hr=0;hr<HOST_REGS;hr++)
669 {
670 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
671 }
672
673 // Try to allocate any available register
674 for(hr=HOST_REGS-1;hr>=0;hr--) {
675 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
682
683 // Find an unneeded register
684 for(hr=HOST_REGS-1;hr>=0;hr--)
685 {
686 r=cur->regmap[hr];
687 if(r>=0) {
688 if(r<64) {
689 if((cur->u>>r)&1) {
690 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 else
699 {
700 if((cur->uu>>(r&63))&1) {
701 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
702 cur->regmap[hr]=reg;
703 cur->dirty&=~(1<<hr);
704 cur->isconst&=~(1<<hr);
705 return;
706 }
707 }
708 }
709 }
710 }
711
712 // Ok, now we have to evict someone
713 // Pick a register we hopefully won't need soon
714 // TODO: we might want to follow unconditional jumps here
715 // TODO: get rid of dupe code and make this into a function
716 u_char hsn[MAXREG+1];
717 memset(hsn,10,sizeof(hsn));
718 int j;
719 lsn(hsn,i,&preferred_reg);
720 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
721 if(i>0) {
722 // Don't evict the cycle count at entry points, otherwise the entry
723 // stub will have to write it.
724 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
725 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
726 for(j=10;j>=3;j--)
727 {
728 for(r=1;r<=MAXREG;r++)
729 {
730 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r+64) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 for(hr=0;hr<HOST_REGS;hr++) {
742 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
743 if(cur->regmap[hr]==r) {
744 cur->regmap[hr]=reg;
745 cur->dirty&=~(1<<hr);
746 cur->isconst&=~(1<<hr);
747 return;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755 for(j=10;j>=0;j--)
756 {
757 for(r=1;r<=MAXREG;r++)
758 {
759 if(hsn[r]==j) {
760 for(hr=0;hr<HOST_REGS;hr++) {
761 if(cur->regmap[hr]==r+64) {
762 cur->regmap[hr]=reg;
763 cur->dirty&=~(1<<hr);
764 cur->isconst&=~(1<<hr);
765 return;
766 }
767 }
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 }
777 }
778 }
779 printf("This shouldn't happen");exit(1);
780}
781// Allocate a specific ARM register.
782void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
783{
784 int n;
f776eb14 785 int dirty=0;
57871462 786
787 // see if it's already allocated (and dealloc it)
788 for(n=0;n<HOST_REGS;n++)
789 {
f776eb14 790 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
791 dirty=(cur->dirty>>n)&1;
792 cur->regmap[n]=-1;
793 }
57871462 794 }
795
796 cur->regmap[hr]=reg;
797 cur->dirty&=~(1<<hr);
f776eb14 798 cur->dirty|=dirty<<hr;
57871462 799 cur->isconst&=~(1<<hr);
800}
801
802// Alloc cycle count into dedicated register
803alloc_cc(struct regstat *cur,int i)
804{
805 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
806}
807
808/* Special alloc */
809
810
811/* Assembler */
812
813char regname[16][4] = {
814 "r0",
815 "r1",
816 "r2",
817 "r3",
818 "r4",
819 "r5",
820 "r6",
821 "r7",
822 "r8",
823 "r9",
824 "r10",
825 "fp",
826 "r12",
827 "sp",
828 "lr",
829 "pc"};
830
831void output_byte(u_char byte)
832{
833 *(out++)=byte;
834}
835void output_modrm(u_char mod,u_char rm,u_char ext)
836{
837 assert(mod<4);
838 assert(rm<8);
839 assert(ext<8);
840 u_char byte=(mod<<6)|(ext<<3)|rm;
841 *(out++)=byte;
842}
843void output_sib(u_char scale,u_char index,u_char base)
844{
845 assert(scale<4);
846 assert(index<8);
847 assert(base<8);
848 u_char byte=(scale<<6)|(index<<3)|base;
849 *(out++)=byte;
850}
851void output_w32(u_int word)
852{
853 *((u_int *)out)=word;
854 out+=4;
855}
856u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
857{
858 assert(rd<16);
859 assert(rn<16);
860 assert(rm<16);
861 return((rn<<16)|(rd<<12)|rm);
862}
863u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
864{
865 assert(rd<16);
866 assert(rn<16);
867 assert(imm<256);
868 assert((shift&1)==0);
869 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
870}
871u_int genimm(u_int imm,u_int *encoded)
872{
c2e3bd42 873 *encoded=0;
874 if(imm==0) return 1;
57871462 875 int i=32;
876 while(i>0)
877 {
878 if(imm<256) {
879 *encoded=((i&30)<<7)|imm;
880 return 1;
881 }
882 imm=(imm>>2)|(imm<<30);i-=2;
883 }
884 return 0;
885}
cfbd3c6e 886void genimm_checked(u_int imm,u_int *encoded)
887{
888 u_int ret=genimm(imm,encoded);
889 assert(ret);
890}
57871462 891u_int genjmp(u_int addr)
892{
893 int offset=addr-(int)out-8;
e80343e2 894 if(offset<-33554432||offset>=33554432) {
895 if (addr>2) {
896 printf("genjmp: out of range: %08x\n", offset);
897 exit(1);
898 }
899 return 0;
900 }
57871462 901 return ((u_int)offset>>2)&0xffffff;
902}
903
904void emit_mov(int rs,int rt)
905{
906 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
907 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
908}
909
910void emit_movs(int rs,int rt)
911{
912 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
913 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
914}
915
916void emit_add(int rs1,int rs2,int rt)
917{
918 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
919 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
920}
921
922void emit_adds(int rs1,int rs2,int rt)
923{
924 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
926}
927
928void emit_adcs(int rs1,int rs2,int rt)
929{
930 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_sbc(int rs1,int rs2,int rt)
935{
936 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
938}
939
940void emit_sbcs(int rs1,int rs2,int rt)
941{
942 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
943 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
944}
945
946void emit_neg(int rs, int rt)
947{
948 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
949 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
950}
951
952void emit_negs(int rs, int rt)
953{
954 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
955 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
956}
957
958void emit_sub(int rs1,int rs2,int rt)
959{
960 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
961 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
962}
963
964void emit_subs(int rs1,int rs2,int rt)
965{
966 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
967 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
968}
969
970void emit_zeroreg(int rt)
971{
972 assem_debug("mov %s,#0\n",regname[rt]);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
974}
975
790ee18e 976void emit_loadlp(u_int imm,u_int rt)
977{
978 add_literal((int)out,imm);
979 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
980 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
981}
982void emit_movw(u_int imm,u_int rt)
983{
984 assert(imm<65536);
985 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
986 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
987}
988void emit_movt(u_int imm,u_int rt)
989{
990 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
991 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
992}
993void emit_movimm(u_int imm,u_int rt)
994{
995 u_int armval;
996 if(genimm(imm,&armval)) {
997 assem_debug("mov %s,#%d\n",regname[rt],imm);
998 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
999 }else if(genimm(~imm,&armval)) {
1000 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1001 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1002 }else if(imm<65536) {
1003 #ifdef ARMv5_ONLY
1004 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1005 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1006 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1007 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1008 #else
1009 emit_movw(imm,rt);
1010 #endif
1011 }else{
1012 #ifdef ARMv5_ONLY
1013 emit_loadlp(imm,rt);
1014 #else
1015 emit_movw(imm&0x0000FFFF,rt);
1016 emit_movt(imm&0xFFFF0000,rt);
1017 #endif
1018 }
1019}
1020void emit_pcreladdr(u_int rt)
1021{
1022 assem_debug("add %s,pc,#?\n",regname[rt]);
1023 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1024}
1025
57871462 1026void emit_loadreg(int r, int hr)
1027{
3d624f89 1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit load in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
1034#endif
57871462 1035 if((r&63)==0)
1036 emit_zeroreg(hr);
1037 else {
3d624f89 1038 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1039 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1040 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1041 if(r==CCREG) addr=(int)&cycle_count;
1042 if(r==CSREG) addr=(int)&Status;
1043 if(r==FSREG) addr=(int)&FCR31;
1044 if(r==INVCP) addr=(int)&invc_ptr;
1045 u_int offset = addr-(u_int)&dynarec_local;
1046 assert(offset<4096);
1047 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1048 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1049 }
1050}
1051void emit_storereg(int r, int hr)
1052{
3d624f89 1053#ifdef FORCE32
1054 if(r&64) {
1055 printf("64bit store in 32bit mode!\n");
7f2607ea 1056 assert(0);
1057 return;
3d624f89 1058 }
1059#endif
1060 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1061 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1062 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1063 if(r==CCREG) addr=(int)&cycle_count;
1064 if(r==FSREG) addr=(int)&FCR31;
1065 u_int offset = addr-(u_int)&dynarec_local;
1066 assert(offset<4096);
1067 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1068 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1069}
1070
1071void emit_test(int rs, int rt)
1072{
1073 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1074 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1075}
1076
1077void emit_testimm(int rs,int imm)
1078{
1079 u_int armval;
5a05d80c 1080 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1081 genimm_checked(imm,&armval);
57871462 1082 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1083}
1084
b9b61529 1085void emit_testeqimm(int rs,int imm)
1086{
1087 u_int armval;
1088 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1089 genimm_checked(imm,&armval);
b9b61529 1090 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1091}
1092
57871462 1093void emit_not(int rs,int rt)
1094{
1095 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1096 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1097}
1098
b9b61529 1099void emit_mvnmi(int rs,int rt)
1100{
1101 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1102 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1103}
1104
57871462 1105void emit_and(u_int rs1,u_int rs2,u_int rt)
1106{
1107 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1109}
1110
1111void emit_or(u_int rs1,u_int rs2,u_int rt)
1112{
1113 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1114 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1115}
1116void emit_or_and_set_flags(int rs1,int rs2,int rt)
1117{
1118 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1119 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1120}
1121
f70d384d 1122void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1123{
1124 assert(rs<16);
1125 assert(rt<16);
1126 assert(imm<32);
1127 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1128 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1129}
1130
576bbd8f 1131void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1132{
1133 assert(rs<16);
1134 assert(rt<16);
1135 assert(imm<32);
1136 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1137 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1138}
1139
57871462 1140void emit_xor(u_int rs1,u_int rs2,u_int rt)
1141{
1142 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1143 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1144}
1145
57871462 1146void emit_addimm(u_int rs,int imm,u_int rt)
1147{
1148 assert(rs<16);
1149 assert(rt<16);
1150 if(imm!=0) {
57871462 1151 u_int armval;
1152 if(genimm(imm,&armval)) {
1153 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1154 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1155 }else if(genimm(-imm,&armval)) {
8a0a8423 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1157 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1158 }else if(imm<0) {
ffb0b9e0 1159 assert(imm>-65536);
57871462 1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1161 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1163 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
ffb0b9e0 1165 assert(imm<65536);
57871462 1166 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1167 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1168 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1169 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1170 }
1171 }
1172 else if(rs!=rt) emit_mov(rs,rt);
1173}
1174
1175void emit_addimm_and_set_flags(int imm,int rt)
1176{
1177 assert(imm>-65536&&imm<65536);
1178 u_int armval;
1179 if(genimm(imm,&armval)) {
1180 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1181 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1182 }else if(genimm(-imm,&armval)) {
1183 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1184 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1185 }else if(imm<0) {
1186 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1187 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1188 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1189 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1190 }else{
1191 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1192 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1193 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1194 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1195 }
1196}
1197void emit_addimm_no_flags(u_int imm,u_int rt)
1198{
1199 emit_addimm(rt,imm,rt);
1200}
1201
1202void emit_addnop(u_int r)
1203{
1204 assert(r<16);
1205 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1206 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1207}
1208
1209void emit_adcimm(u_int rs,int imm,u_int rt)
1210{
1211 u_int armval;
cfbd3c6e 1212 genimm_checked(imm,&armval);
57871462 1213 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1214 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1215}
1216/*void emit_sbcimm(int imm,u_int rt)
1217{
1218 u_int armval;
cfbd3c6e 1219 genimm_checked(imm,&armval);
57871462 1220 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1221 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1222}*/
1223void emit_sbbimm(int imm,u_int rt)
1224{
1225 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1226 assert(rt<8);
1227 if(imm<128&&imm>=-128) {
1228 output_byte(0x83);
1229 output_modrm(3,rt,3);
1230 output_byte(imm);
1231 }
1232 else
1233 {
1234 output_byte(0x81);
1235 output_modrm(3,rt,3);
1236 output_w32(imm);
1237 }
1238}
1239void emit_rscimm(int rs,int imm,u_int rt)
1240{
1241 assert(0);
1242 u_int armval;
cfbd3c6e 1243 genimm_checked(imm,&armval);
57871462 1244 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1246}
1247
1248void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1249{
1250 // TODO: if(genimm(imm,&armval)) ...
1251 // else
1252 emit_movimm(imm,HOST_TEMPREG);
1253 emit_adds(HOST_TEMPREG,rsl,rtl);
1254 emit_adcimm(rsh,0,rth);
1255}
1256
1257void emit_sbb(int rs1,int rs2)
1258{
1259 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1260 output_byte(0x19);
1261 output_modrm(3,rs1,rs2);
1262}
1263
1264void emit_andimm(int rs,int imm,int rt)
1265{
1266 u_int armval;
790ee18e 1267 if(imm==0) {
1268 emit_zeroreg(rt);
1269 }else if(genimm(imm,&armval)) {
57871462 1270 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1271 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1272 }else if(genimm(~imm,&armval)) {
1273 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1275 }else if(imm==65535) {
1276 #ifdef ARMv5_ONLY
1277 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1278 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1279 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1280 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1281 #else
1282 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1283 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1284 #endif
1285 }else{
1286 assert(imm>0&&imm<65535);
1287 #ifdef ARMv5_ONLY
1288 assem_debug("mov r14,#%d\n",imm&0xFF00);
1289 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1290 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1291 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1292 #else
1293 emit_movw(imm,HOST_TEMPREG);
1294 #endif
1295 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1296 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1297 }
1298}
1299
1300void emit_orimm(int rs,int imm,int rt)
1301{
1302 u_int armval;
790ee18e 1303 if(imm==0) {
1304 if(rs!=rt) emit_mov(rs,rt);
1305 }else if(genimm(imm,&armval)) {
57871462 1306 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1308 }else{
1309 assert(imm>0&&imm<65536);
1310 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1311 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1312 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1313 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1314 }
1315}
1316
1317void emit_xorimm(int rs,int imm,int rt)
1318{
57871462 1319 u_int armval;
790ee18e 1320 if(imm==0) {
1321 if(rs!=rt) emit_mov(rs,rt);
1322 }else if(genimm(imm,&armval)) {
57871462 1323 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1325 }else{
514ed0d9 1326 assert(imm>0&&imm<65536);
57871462 1327 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1328 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1329 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1330 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1331 }
1332}
1333
1334void emit_shlimm(int rs,u_int imm,int rt)
1335{
1336 assert(imm>0);
1337 assert(imm<32);
1338 //if(imm==1) ...
1339 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1341}
1342
c6c3b1b3 1343void emit_lsls_imm(int rs,int imm,int rt)
1344{
1345 assert(imm>0);
1346 assert(imm<32);
1347 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349}
1350
57871462 1351void emit_shrimm(int rs,u_int imm,int rt)
1352{
1353 assert(imm>0);
1354 assert(imm<32);
1355 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1356 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1357}
1358
1359void emit_sarimm(int rs,u_int imm,int rt)
1360{
1361 assert(imm>0);
1362 assert(imm<32);
1363 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1364 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1365}
1366
1367void emit_rorimm(int rs,u_int imm,int rt)
1368{
1369 assert(imm>0);
1370 assert(imm<32);
1371 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1373}
1374
1375void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1376{
1377 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1378 assert(imm>0);
1379 assert(imm<32);
1380 //if(imm==1) ...
1381 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1382 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1383 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1384 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1385}
1386
1387void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1388{
1389 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1390 assert(imm>0);
1391 assert(imm<32);
1392 //if(imm==1) ...
1393 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1394 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1395 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1396 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1397}
1398
b9b61529 1399void emit_signextend16(int rs,int rt)
1400{
1401 #ifdef ARMv5_ONLY
1402 emit_shlimm(rs,16,rt);
1403 emit_sarimm(rt,16,rt);
1404 #else
1405 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1406 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1407 #endif
1408}
1409
c6c3b1b3 1410void emit_signextend8(int rs,int rt)
1411{
1412 #ifdef ARMv5_ONLY
1413 emit_shlimm(rs,24,rt);
1414 emit_sarimm(rt,24,rt);
1415 #else
1416 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1417 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1418 #endif
1419}
1420
57871462 1421void emit_shl(u_int rs,u_int shift,u_int rt)
1422{
1423 assert(rs<16);
1424 assert(rt<16);
1425 assert(shift<16);
1426 //if(imm==1) ...
1427 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1428 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1429}
1430void emit_shr(u_int rs,u_int shift,u_int rt)
1431{
1432 assert(rs<16);
1433 assert(rt<16);
1434 assert(shift<16);
1435 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1436 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1437}
1438void emit_sar(u_int rs,u_int shift,u_int rt)
1439{
1440 assert(rs<16);
1441 assert(rt<16);
1442 assert(shift<16);
1443 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1444 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1445}
1446void emit_shlcl(int r)
1447{
1448 assem_debug("shl %%%s,%%cl\n",regname[r]);
1449 assert(0);
1450}
1451void emit_shrcl(int r)
1452{
1453 assem_debug("shr %%%s,%%cl\n",regname[r]);
1454 assert(0);
1455}
1456void emit_sarcl(int r)
1457{
1458 assem_debug("sar %%%s,%%cl\n",regname[r]);
1459 assert(0);
1460}
1461
1462void emit_shldcl(int r1,int r2)
1463{
1464 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1465 assert(0);
1466}
1467void emit_shrdcl(int r1,int r2)
1468{
1469 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1470 assert(0);
1471}
1472void emit_orrshl(u_int rs,u_int shift,u_int rt)
1473{
1474 assert(rs<16);
1475 assert(rt<16);
1476 assert(shift<16);
1477 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1478 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1479}
1480void emit_orrshr(u_int rs,u_int shift,u_int rt)
1481{
1482 assert(rs<16);
1483 assert(rt<16);
1484 assert(shift<16);
1485 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1486 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1487}
1488
1489void emit_cmpimm(int rs,int imm)
1490{
1491 u_int armval;
1492 if(genimm(imm,&armval)) {
5a05d80c 1493 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1494 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1495 }else if(genimm(-imm,&armval)) {
5a05d80c 1496 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1497 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1498 }else if(imm>0) {
1499 assert(imm<65536);
1500 #ifdef ARMv5_ONLY
1501 emit_movimm(imm,HOST_TEMPREG);
1502 #else
1503 emit_movw(imm,HOST_TEMPREG);
1504 #endif
1505 assem_debug("cmp %s,r14\n",regname[rs]);
1506 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1507 }else{
1508 assert(imm>-65536);
1509 #ifdef ARMv5_ONLY
1510 emit_movimm(-imm,HOST_TEMPREG);
1511 #else
1512 emit_movw(-imm,HOST_TEMPREG);
1513 #endif
1514 assem_debug("cmn %s,r14\n",regname[rs]);
1515 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1516 }
1517}
1518
1519void emit_cmovne(u_int *addr,int rt)
1520{
1521 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1522 assert(0);
1523}
1524void emit_cmovl(u_int *addr,int rt)
1525{
1526 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1527 assert(0);
1528}
1529void emit_cmovs(u_int *addr,int rt)
1530{
1531 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1532 assert(0);
1533}
1534void emit_cmovne_imm(int imm,int rt)
1535{
1536 assem_debug("movne %s,#%d\n",regname[rt],imm);
1537 u_int armval;
cfbd3c6e 1538 genimm_checked(imm,&armval);
57871462 1539 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1540}
1541void emit_cmovl_imm(int imm,int rt)
1542{
1543 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1544 u_int armval;
cfbd3c6e 1545 genimm_checked(imm,&armval);
57871462 1546 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1547}
1548void emit_cmovb_imm(int imm,int rt)
1549{
1550 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1551 u_int armval;
cfbd3c6e 1552 genimm_checked(imm,&armval);
57871462 1553 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1554}
1555void emit_cmovs_imm(int imm,int rt)
1556{
1557 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1558 u_int armval;
cfbd3c6e 1559 genimm_checked(imm,&armval);
57871462 1560 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1561}
1562void emit_cmove_reg(int rs,int rt)
1563{
1564 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1565 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1566}
1567void emit_cmovne_reg(int rs,int rt)
1568{
1569 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1570 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1571}
1572void emit_cmovl_reg(int rs,int rt)
1573{
1574 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1575 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1576}
1577void emit_cmovs_reg(int rs,int rt)
1578{
1579 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1580 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1581}
1582
1583void emit_slti32(int rs,int imm,int rt)
1584{
1585 if(rs!=rt) emit_zeroreg(rt);
1586 emit_cmpimm(rs,imm);
1587 if(rs==rt) emit_movimm(0,rt);
1588 emit_cmovl_imm(1,rt);
1589}
1590void emit_sltiu32(int rs,int imm,int rt)
1591{
1592 if(rs!=rt) emit_zeroreg(rt);
1593 emit_cmpimm(rs,imm);
1594 if(rs==rt) emit_movimm(0,rt);
1595 emit_cmovb_imm(1,rt);
1596}
1597void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1598{
1599 assert(rsh!=rt);
1600 emit_slti32(rsl,imm,rt);
1601 if(imm>=0)
1602 {
1603 emit_test(rsh,rsh);
1604 emit_cmovne_imm(0,rt);
1605 emit_cmovs_imm(1,rt);
1606 }
1607 else
1608 {
1609 emit_cmpimm(rsh,-1);
1610 emit_cmovne_imm(0,rt);
1611 emit_cmovl_imm(1,rt);
1612 }
1613}
1614void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1615{
1616 assert(rsh!=rt);
1617 emit_sltiu32(rsl,imm,rt);
1618 if(imm>=0)
1619 {
1620 emit_test(rsh,rsh);
1621 emit_cmovne_imm(0,rt);
1622 }
1623 else
1624 {
1625 emit_cmpimm(rsh,-1);
1626 emit_cmovne_imm(1,rt);
1627 }
1628}
1629
1630void emit_cmp(int rs,int rt)
1631{
1632 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1633 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1634}
1635void emit_set_gz32(int rs, int rt)
1636{
1637 //assem_debug("set_gz32\n");
1638 emit_cmpimm(rs,1);
1639 emit_movimm(1,rt);
1640 emit_cmovl_imm(0,rt);
1641}
1642void emit_set_nz32(int rs, int rt)
1643{
1644 //assem_debug("set_nz32\n");
1645 if(rs!=rt) emit_movs(rs,rt);
1646 else emit_test(rs,rs);
1647 emit_cmovne_imm(1,rt);
1648}
1649void emit_set_gz64_32(int rsh, int rsl, int rt)
1650{
1651 //assem_debug("set_gz64\n");
1652 emit_set_gz32(rsl,rt);
1653 emit_test(rsh,rsh);
1654 emit_cmovne_imm(1,rt);
1655 emit_cmovs_imm(0,rt);
1656}
1657void emit_set_nz64_32(int rsh, int rsl, int rt)
1658{
1659 //assem_debug("set_nz64\n");
1660 emit_or_and_set_flags(rsh,rsl,rt);
1661 emit_cmovne_imm(1,rt);
1662}
1663void emit_set_if_less32(int rs1, int rs2, int rt)
1664{
1665 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1666 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1667 emit_cmp(rs1,rs2);
1668 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1669 emit_cmovl_imm(1,rt);
1670}
1671void emit_set_if_carry32(int rs1, int rs2, int rt)
1672{
1673 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1674 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1675 emit_cmp(rs1,rs2);
1676 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1677 emit_cmovb_imm(1,rt);
1678}
1679void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1680{
1681 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1682 assert(u1!=rt);
1683 assert(u2!=rt);
1684 emit_cmp(l1,l2);
1685 emit_movimm(0,rt);
1686 emit_sbcs(u1,u2,HOST_TEMPREG);
1687 emit_cmovl_imm(1,rt);
1688}
1689void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1690{
1691 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1692 assert(u1!=rt);
1693 assert(u2!=rt);
1694 emit_cmp(l1,l2);
1695 emit_movimm(0,rt);
1696 emit_sbcs(u1,u2,HOST_TEMPREG);
1697 emit_cmovb_imm(1,rt);
1698}
1699
1700void emit_call(int a)
1701{
1702 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1703 u_int offset=genjmp(a);
1704 output_w32(0xeb000000|offset);
1705}
1706void emit_jmp(int a)
1707{
1708 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1709 u_int offset=genjmp(a);
1710 output_w32(0xea000000|offset);
1711}
1712void emit_jne(int a)
1713{
1714 assem_debug("bne %x\n",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x1a000000|offset);
1717}
1718void emit_jeq(int a)
1719{
1720 assem_debug("beq %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x0a000000|offset);
1723}
1724void emit_js(int a)
1725{
1726 assem_debug("bmi %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x4a000000|offset);
1729}
1730void emit_jns(int a)
1731{
1732 assem_debug("bpl %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x5a000000|offset);
1735}
1736void emit_jl(int a)
1737{
1738 assem_debug("blt %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0xba000000|offset);
1741}
1742void emit_jge(int a)
1743{
1744 assem_debug("bge %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0xaa000000|offset);
1747}
1748void emit_jno(int a)
1749{
1750 assem_debug("bvc %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0x7a000000|offset);
1753}
1754void emit_jc(int a)
1755{
1756 assem_debug("bcs %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0x2a000000|offset);
1759}
1760void emit_jcc(int a)
1761{
1762 assem_debug("bcc %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x3a000000|offset);
1765}
1766
1767void emit_pushimm(int imm)
1768{
1769 assem_debug("push $%x\n",imm);
1770 assert(0);
1771}
1772void emit_pusha()
1773{
1774 assem_debug("pusha\n");
1775 assert(0);
1776}
1777void emit_popa()
1778{
1779 assem_debug("popa\n");
1780 assert(0);
1781}
1782void emit_pushreg(u_int r)
1783{
1784 assem_debug("push %%%s\n",regname[r]);
1785 assert(0);
1786}
1787void emit_popreg(u_int r)
1788{
1789 assem_debug("pop %%%s\n",regname[r]);
1790 assert(0);
1791}
1792void emit_callreg(u_int r)
1793{
c6c3b1b3 1794 assert(r<15);
1795 assem_debug("blx %s\n",regname[r]);
1796 output_w32(0xe12fff30|r);
57871462 1797}
1798void emit_jmpreg(u_int r)
1799{
1800 assem_debug("mov pc,%s\n",regname[r]);
1801 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1802}
1803
1804void emit_readword_indexed(int offset, int rs, int rt)
1805{
1806 assert(offset>-4096&&offset<4096);
1807 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1808 if(offset>=0) {
1809 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1810 }else{
1811 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1812 }
1813}
1814void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1818}
c6c3b1b3 1819void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1823}
1824void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1830{
1831 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1832 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1833}
1834void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1835{
1836 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1837 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1838}
1839void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1843}
57871462 1844void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_readword_indexed(addr, rs, rt);
1847 else {
1848 assert(addr==0);
1849 emit_readword_dualindexedx4(rs, map, rt);
1850 }
1851}
1852void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1853{
1854 if(map<0) {
1855 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1856 emit_readword_indexed(addr+4, rs, rl);
1857 }else{
1858 assert(rh!=rs);
1859 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1860 emit_addimm(map,1,map);
1861 emit_readword_indexed_tlb(addr, rs, map, rl);
1862 }
1863}
1864void emit_movsbl_indexed(int offset, int rs, int rt)
1865{
1866 assert(offset>-256&&offset<256);
1867 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1868 if(offset>=0) {
1869 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1870 }else{
1871 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1872 }
1873}
1874void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1875{
1876 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1877 else {
1878 if(addr==0) {
1879 emit_shlimm(map,2,map);
1880 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1881 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1882 }else{
1883 assert(addr>-256&&addr<256);
1884 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1885 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1886 emit_movsbl_indexed(addr, rt, rt);
1887 }
1888 }
1889}
1890void emit_movswl_indexed(int offset, int rs, int rt)
1891{
1892 assert(offset>-256&&offset<256);
1893 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1894 if(offset>=0) {
1895 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1896 }else{
1897 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1898 }
1899}
1900void emit_movzbl_indexed(int offset, int rs, int rt)
1901{
1902 assert(offset>-4096&&offset<4096);
1903 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1906 }else{
1907 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1908 }
1909}
1910void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1911{
1912 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1914}
1915void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1916{
1917 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1918 else {
1919 if(addr==0) {
1920 emit_movzbl_dualindexedx4(rs, map, rt);
1921 }else{
1922 emit_addimm(rs,addr,rt);
1923 emit_movzbl_dualindexedx4(rt, map, rt);
1924 }
1925 }
1926}
1927void emit_movzwl_indexed(int offset, int rs, int rt)
1928{
1929 assert(offset>-256&&offset<256);
1930 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1933 }else{
1934 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1935 }
1936}
054175e9 1937static void emit_ldrd(int offset, int rs, int rt)
1938{
1939 assert(offset>-256&&offset<256);
1940 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1941 if(offset>=0) {
1942 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1943 }else{
1944 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1945 }
1946}
57871462 1947void emit_readword(int addr, int rt)
1948{
1949 u_int offset = addr-(u_int)&dynarec_local;
1950 assert(offset<4096);
1951 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1952 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1953}
1954void emit_movsbl(int addr, int rt)
1955{
1956 u_int offset = addr-(u_int)&dynarec_local;
1957 assert(offset<256);
1958 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1959 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1960}
1961void emit_movswl(int addr, int rt)
1962{
1963 u_int offset = addr-(u_int)&dynarec_local;
1964 assert(offset<256);
1965 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1966 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1967}
1968void emit_movzbl(int addr, int rt)
1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<4096);
1972 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1973 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1974}
1975void emit_movzwl(int addr, int rt)
1976{
1977 u_int offset = addr-(u_int)&dynarec_local;
1978 assert(offset<256);
1979 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1980 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1981}
1982void emit_movzwl_reg(int rs, int rt)
1983{
1984 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1985 assert(0);
1986}
1987
1988void emit_xchg(int rs, int rt)
1989{
1990 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1991 assert(0);
1992}
1993void emit_writeword_indexed(int rt, int offset, int rs)
1994{
1995 assert(offset>-4096&&offset<4096);
1996 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1997 if(offset>=0) {
1998 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1999 }else{
2000 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2001 }
2002}
2003void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2004{
2005 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2006 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2007}
2008void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2009{
2010 if(map<0) emit_writeword_indexed(rt, addr, rs);
2011 else {
2012 assert(addr==0);
2013 emit_writeword_dualindexedx4(rt, rs, map);
2014 }
2015}
2016void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2017{
2018 if(map<0) {
2019 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2020 emit_writeword_indexed(rl, addr+4, rs);
2021 }else{
2022 assert(rh>=0);
2023 if(temp!=rs) emit_addimm(map,1,temp);
2024 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2025 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2026 else {
2027 emit_addimm(rs,4,rs);
2028 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2029 }
2030 }
2031}
2032void emit_writehword_indexed(int rt, int offset, int rs)
2033{
2034 assert(offset>-256&&offset<256);
2035 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2036 if(offset>=0) {
2037 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2038 }else{
2039 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2040 }
2041}
2042void emit_writebyte_indexed(int rt, int offset, int rs)
2043{
2044 assert(offset>-4096&&offset<4096);
2045 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2046 if(offset>=0) {
2047 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2048 }else{
2049 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2050 }
2051}
2052void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2053{
2054 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2055 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2056}
2057void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2058{
2059 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2060 else {
2061 if(addr==0) {
2062 emit_writebyte_dualindexedx4(rt, rs, map);
2063 }else{
2064 emit_addimm(rs,addr,temp);
2065 emit_writebyte_dualindexedx4(rt, temp, map);
2066 }
2067 }
2068}
b96d3df7 2069void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2070{
2071 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2072 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2073}
2074void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2075{
2076 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2077 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2078}
2079void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2080{
2081 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2083}
57871462 2084void emit_writeword(int rt, int addr)
2085{
2086 u_int offset = addr-(u_int)&dynarec_local;
2087 assert(offset<4096);
2088 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2089 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2090}
2091void emit_writehword(int rt, int addr)
2092{
2093 u_int offset = addr-(u_int)&dynarec_local;
2094 assert(offset<256);
2095 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2096 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2097}
2098void emit_writebyte(int rt, int addr)
2099{
2100 u_int offset = addr-(u_int)&dynarec_local;
2101 assert(offset<4096);
74426039 2102 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2103 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2104}
2105void emit_writeword_imm(int imm, int addr)
2106{
2107 assem_debug("movl $%x,%x\n",imm,addr);
2108 assert(0);
2109}
2110void emit_writebyte_imm(int imm, int addr)
2111{
2112 assem_debug("movb $%x,%x\n",imm,addr);
2113 assert(0);
2114}
2115
2116void emit_mul(int rs)
2117{
2118 assem_debug("mul %%%s\n",regname[rs]);
2119 assert(0);
2120}
2121void emit_imul(int rs)
2122{
2123 assem_debug("imul %%%s\n",regname[rs]);
2124 assert(0);
2125}
2126void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2127{
2128 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2129 assert(rs1<16);
2130 assert(rs2<16);
2131 assert(hi<16);
2132 assert(lo<16);
2133 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2134}
2135void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2136{
2137 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2138 assert(rs1<16);
2139 assert(rs2<16);
2140 assert(hi<16);
2141 assert(lo<16);
2142 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2143}
2144
2145void emit_div(int rs)
2146{
2147 assem_debug("div %%%s\n",regname[rs]);
2148 assert(0);
2149}
2150void emit_idiv(int rs)
2151{
2152 assem_debug("idiv %%%s\n",regname[rs]);
2153 assert(0);
2154}
2155void emit_cdq()
2156{
2157 assem_debug("cdq\n");
2158 assert(0);
2159}
2160
2161void emit_clz(int rs,int rt)
2162{
2163 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2164 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2165}
2166
2167void emit_subcs(int rs1,int rs2,int rt)
2168{
2169 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2170 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2171}
2172
2173void emit_shrcc_imm(int rs,u_int imm,int rt)
2174{
2175 assert(imm>0);
2176 assert(imm<32);
2177 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2178 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2179}
2180
b1be1eee 2181void emit_shrne_imm(int rs,u_int imm,int rt)
2182{
2183 assert(imm>0);
2184 assert(imm<32);
2185 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2186 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2187}
2188
57871462 2189void emit_negmi(int rs, int rt)
2190{
2191 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2192 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2193}
2194
2195void emit_negsmi(int rs, int rt)
2196{
2197 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2198 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2199}
2200
2201void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2202{
2203 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2204 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2205}
2206
2207void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2208{
2209 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2210 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2211}
2212
2213void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2214{
2215 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2216 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2217}
2218
2219void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2220{
2221 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2223}
2224
2225void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2226{
2227 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2228 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2229}
2230
2231void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2232{
2233 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2234 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2235}
2236
2237void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2241}
2242
2243void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2244{
2245 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2246 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2247}
2248
2249void emit_teq(int rs, int rt)
2250{
2251 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2252 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2253}
2254
2255void emit_rsbimm(int rs, int imm, int rt)
2256{
2257 u_int armval;
cfbd3c6e 2258 genimm_checked(imm,&armval);
57871462 2259 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2260 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2261}
2262
2263// Load 2 immediates optimizing for small code size
2264void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2265{
2266 emit_movimm(imm1,rt1);
2267 u_int armval;
2268 if(genimm(imm2-imm1,&armval)) {
2269 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2270 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2271 }else if(genimm(imm1-imm2,&armval)) {
2272 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2273 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2274 }
2275 else emit_movimm(imm2,rt2);
2276}
2277
2278// Conditionally select one of two immediates, optimizing for small code size
2279// This will only be called if HAVE_CMOV_IMM is defined
2280void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2281{
2282 u_int armval;
2283 if(genimm(imm2-imm1,&armval)) {
2284 emit_movimm(imm1,rt);
2285 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2286 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2287 }else if(genimm(imm1-imm2,&armval)) {
2288 emit_movimm(imm1,rt);
2289 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2290 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2291 }
2292 else {
2293 #ifdef ARMv5_ONLY
2294 emit_movimm(imm1,rt);
2295 add_literal((int)out,imm2);
2296 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2297 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2298 #else
2299 emit_movw(imm1&0x0000FFFF,rt);
2300 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2301 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2302 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2303 }
2304 emit_movt(imm1&0xFFFF0000,rt);
2305 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2306 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2307 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2308 }
2309 #endif
2310 }
2311}
2312
2313// special case for checking invalid_code
2314void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2315{
2316 assert(0);
2317}
2318
2319// special case for checking invalid_code
2320void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2321{
2322 assert(imm<128&&imm>=0);
2323 assert(r>=0&&r<16);
2324 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2325 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2326 emit_cmpimm(HOST_TEMPREG,imm);
2327}
2328
2329// special case for tlb mapping
2330void emit_addsr12(int rs1,int rs2,int rt)
2331{
2332 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2333 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2334}
2335
0bbd1454 2336void emit_callne(int a)
2337{
2338 assem_debug("blne %x\n",a);
2339 u_int offset=genjmp(a);
2340 output_w32(0x1b000000|offset);
2341}
2342
57871462 2343// Used to preload hash table entries
2344void emit_prefetch(void *addr)
2345{
2346 assem_debug("prefetch %x\n",(int)addr);
2347 output_byte(0x0F);
2348 output_byte(0x18);
2349 output_modrm(0,5,1);
2350 output_w32((int)addr);
2351}
2352void emit_prefetchreg(int r)
2353{
2354 assem_debug("pld %s\n",regname[r]);
2355 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2356}
2357
2358// Special case for mini_ht
2359void emit_ldreq_indexed(int rs, u_int offset, int rt)
2360{
2361 assert(offset<4096);
2362 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2363 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2364}
2365
2366void emit_flds(int r,int sr)
2367{
2368 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2369 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2370}
2371
2372void emit_vldr(int r,int vr)
2373{
2374 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2375 output_w32(0xed900b00|(vr<<12)|(r<<16));
2376}
2377
2378void emit_fsts(int sr,int r)
2379{
2380 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2381 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2382}
2383
2384void emit_vstr(int vr,int r)
2385{
2386 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2387 output_w32(0xed800b00|(vr<<12)|(r<<16));
2388}
2389
2390void emit_ftosizs(int s,int d)
2391{
2392 assem_debug("ftosizs s%d,s%d\n",d,s);
2393 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2394}
2395
2396void emit_ftosizd(int s,int d)
2397{
2398 assem_debug("ftosizd s%d,d%d\n",d,s);
2399 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2400}
2401
2402void emit_fsitos(int s,int d)
2403{
2404 assem_debug("fsitos s%d,s%d\n",d,s);
2405 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2406}
2407
2408void emit_fsitod(int s,int d)
2409{
2410 assem_debug("fsitod d%d,s%d\n",d,s);
2411 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2412}
2413
2414void emit_fcvtds(int s,int d)
2415{
2416 assem_debug("fcvtds d%d,s%d\n",d,s);
2417 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2418}
2419
2420void emit_fcvtsd(int s,int d)
2421{
2422 assem_debug("fcvtsd s%d,d%d\n",d,s);
2423 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2424}
2425
2426void emit_fsqrts(int s,int d)
2427{
2428 assem_debug("fsqrts d%d,s%d\n",d,s);
2429 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2430}
2431
2432void emit_fsqrtd(int s,int d)
2433{
2434 assem_debug("fsqrtd s%d,d%d\n",d,s);
2435 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2436}
2437
2438void emit_fabss(int s,int d)
2439{
2440 assem_debug("fabss d%d,s%d\n",d,s);
2441 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2442}
2443
2444void emit_fabsd(int s,int d)
2445{
2446 assem_debug("fabsd s%d,d%d\n",d,s);
2447 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2448}
2449
2450void emit_fnegs(int s,int d)
2451{
2452 assem_debug("fnegs d%d,s%d\n",d,s);
2453 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2454}
2455
2456void emit_fnegd(int s,int d)
2457{
2458 assem_debug("fnegd s%d,d%d\n",d,s);
2459 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2460}
2461
2462void emit_fadds(int s1,int s2,int d)
2463{
2464 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2465 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2466}
2467
2468void emit_faddd(int s1,int s2,int d)
2469{
2470 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2471 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2472}
2473
2474void emit_fsubs(int s1,int s2,int d)
2475{
2476 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2477 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2478}
2479
2480void emit_fsubd(int s1,int s2,int d)
2481{
2482 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2483 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2484}
2485
2486void emit_fmuls(int s1,int s2,int d)
2487{
2488 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2489 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2490}
2491
2492void emit_fmuld(int s1,int s2,int d)
2493{
2494 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2495 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2496}
2497
2498void emit_fdivs(int s1,int s2,int d)
2499{
2500 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2501 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2502}
2503
2504void emit_fdivd(int s1,int s2,int d)
2505{
2506 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2507 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2508}
2509
2510void emit_fcmps(int x,int y)
2511{
2512 assem_debug("fcmps s14, s15\n");
2513 output_w32(0xeeb47a67);
2514}
2515
2516void emit_fcmpd(int x,int y)
2517{
2518 assem_debug("fcmpd d6, d7\n");
2519 output_w32(0xeeb46b47);
2520}
2521
2522void emit_fmstat()
2523{
2524 assem_debug("fmstat\n");
2525 output_w32(0xeef1fa10);
2526}
2527
2528void emit_bicne_imm(int rs,int imm,int rt)
2529{
2530 u_int armval;
cfbd3c6e 2531 genimm_checked(imm,&armval);
57871462 2532 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2533 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2534}
2535
2536void emit_biccs_imm(int rs,int imm,int rt)
2537{
2538 u_int armval;
cfbd3c6e 2539 genimm_checked(imm,&armval);
57871462 2540 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2541 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2542}
2543
2544void emit_bicvc_imm(int rs,int imm,int rt)
2545{
2546 u_int armval;
cfbd3c6e 2547 genimm_checked(imm,&armval);
57871462 2548 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2549 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2550}
2551
2552void emit_bichi_imm(int rs,int imm,int rt)
2553{
2554 u_int armval;
cfbd3c6e 2555 genimm_checked(imm,&armval);
57871462 2556 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2557 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2558}
2559
2560void emit_orrvs_imm(int rs,int imm,int rt)
2561{
2562 u_int armval;
cfbd3c6e 2563 genimm_checked(imm,&armval);
57871462 2564 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2565 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2566}
2567
b9b61529 2568void emit_orrne_imm(int rs,int imm,int rt)
2569{
2570 u_int armval;
cfbd3c6e 2571 genimm_checked(imm,&armval);
b9b61529 2572 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2573 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2574}
2575
2576void emit_andne_imm(int rs,int imm,int rt)
2577{
2578 u_int armval;
cfbd3c6e 2579 genimm_checked(imm,&armval);
b9b61529 2580 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2581 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2582}
2583
57871462 2584void emit_jno_unlikely(int a)
2585{
2586 //emit_jno(a);
2587 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2588 output_w32(0x72800000|rd_rn_rm(15,15,0));
2589}
2590
054175e9 2591static void save_regs_all(u_int reglist)
57871462 2592{
054175e9 2593 int i;
57871462 2594 if(!reglist) return;
2595 assem_debug("stmia fp,{");
054175e9 2596 for(i=0;i<16;i++)
2597 if(reglist&(1<<i))
2598 assem_debug("r%d,",i);
57871462 2599 assem_debug("}\n");
2600 output_w32(0xe88b0000|reglist);
2601}
054175e9 2602static void restore_regs_all(u_int reglist)
57871462 2603{
054175e9 2604 int i;
57871462 2605 if(!reglist) return;
2606 assem_debug("ldmia fp,{");
054175e9 2607 for(i=0;i<16;i++)
2608 if(reglist&(1<<i))
2609 assem_debug("r%d,",i);
57871462 2610 assem_debug("}\n");
2611 output_w32(0xe89b0000|reglist);
2612}
054175e9 2613// Save registers before function call
2614static void save_regs(u_int reglist)
2615{
2616 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2617 save_regs_all(reglist);
2618}
2619// Restore registers after function call
2620static void restore_regs(u_int reglist)
2621{
2622 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2623 restore_regs_all(reglist);
2624}
57871462 2625
2626// Write back consts using r14 so we don't disturb the other registers
2627void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2628{
2629 int hr;
2630 for(hr=0;hr<HOST_REGS;hr++) {
2631 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2632 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2633 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2634 int value=constmap[i][hr];
2635 if(value==0) {
2636 emit_zeroreg(HOST_TEMPREG);
2637 }
2638 else {
2639 emit_movimm(value,HOST_TEMPREG);
2640 }
2641 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2642#ifndef FORCE32
57871462 2643 if((i_is32>>i_regmap[hr])&1) {
2644 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2645 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2646 }
24385cae 2647#endif
57871462 2648 }
2649 }
2650 }
2651 }
2652}
2653
2654/* Stubs/epilogue */
2655
2656void literal_pool(int n)
2657{
2658 if(!literalcount) return;
2659 if(n) {
2660 if((int)out-literals[0][0]<4096-n) return;
2661 }
2662 u_int *ptr;
2663 int i;
2664 for(i=0;i<literalcount;i++)
2665 {
77750690 2666 u_int l_addr=(u_int)out;
2667 int j;
2668 for(j=0;j<i;j++) {
2669 if(literals[j][1]==literals[i][1]) {
2670 //printf("dup %08x\n",literals[i][1]);
2671 l_addr=literals[j][0];
2672 break;
2673 }
2674 }
57871462 2675 ptr=(u_int *)literals[i][0];
77750690 2676 u_int offset=l_addr-(u_int)ptr-8;
57871462 2677 assert(offset<4096);
2678 assert(!(offset&3));
2679 *ptr|=offset;
77750690 2680 if(l_addr==(u_int)out) {
2681 literals[i][0]=l_addr; // remember for dupes
2682 output_w32(literals[i][1]);
2683 }
57871462 2684 }
2685 literalcount=0;
2686}
2687
2688void literal_pool_jumpover(int n)
2689{
2690 if(!literalcount) return;
2691 if(n) {
2692 if((int)out-literals[0][0]<4096-n) return;
2693 }
2694 int jaddr=(int)out;
2695 emit_jmp(0);
2696 literal_pool(0);
2697 set_jump_target(jaddr,(int)out);
2698}
2699
2700emit_extjump2(int addr, int target, int linker)
2701{
2702 u_char *ptr=(u_char *)addr;
2703 assert((ptr[3]&0x0e)==0xa);
2704 emit_loadlp(target,0);
2705 emit_loadlp(addr,1);
24385cae 2706 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2707 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2708//DEBUG >
2709#ifdef DEBUG_CYCLE_COUNT
2710 emit_readword((int)&last_count,ECX);
2711 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2712 emit_readword((int)&next_interupt,ECX);
2713 emit_writeword(HOST_CCREG,(int)&Count);
2714 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2715 emit_writeword(ECX,(int)&last_count);
2716#endif
2717//DEBUG <
2718 emit_jmp(linker);
2719}
2720
2721emit_extjump(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker);
2724}
2725emit_extjump_ds(int addr, int target)
2726{
2727 emit_extjump2(addr, target, (int)dyna_linker_ds);
2728}
2729
13e35c04 2730// put rt_val into rt, potentially making use of rs with value rs_val
2731static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2732{
8575a877 2733 u_int armval;
2734 int diff;
2735 if(genimm(rt_val,&armval)) {
2736 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2737 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2738 return;
2739 }
2740 if(genimm(~rt_val,&armval)) {
2741 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2742 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2743 return;
2744 }
2745 diff=rt_val-rs_val;
2746 if(genimm(diff,&armval)) {
2747 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2748 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2749 return;
2750 }else if(genimm(-diff,&armval)) {
2751 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2752 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2753 return;
2754 }
2755 emit_movimm(rt_val,rt);
2756}
2757
2758// return 1 if above function can do it's job cheaply
2759static int is_similar_value(u_int v1,u_int v2)
2760{
13e35c04 2761 u_int xs;
8575a877 2762 int diff;
2763 if(v1==v2) return 1;
2764 diff=v2-v1;
2765 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2766 ;
8575a877 2767 if(xs<0x100) return 1;
2768 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2769 ;
2770 if(xs<0x100) return 1;
2771 return 0;
13e35c04 2772}
cbbab9cd 2773
b96d3df7 2774// trashes r2
2775static void pass_args(int a0, int a1)
2776{
2777 if(a0==1&&a1==0) {
2778 // must swap
2779 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2780 }
2781 else if(a0!=0&&a1==0) {
2782 emit_mov(a1,1);
2783 if (a0>=0) emit_mov(a0,0);
2784 }
2785 else {
2786 if(a0>=0&&a0!=0) emit_mov(a0,0);
2787 if(a1>=0&&a1!=1) emit_mov(a1,1);
2788 }
2789}
2790
b1be1eee 2791static void mov_loadtype_adj(int type,int rs,int rt)
2792{
2793 switch(type) {
2794 case LOADB_STUB: emit_signextend8(rs,rt); break;
2795 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2796 case LOADH_STUB: emit_signextend16(rs,rt); break;
2797 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2798 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2799 default: assert(0);
2800 }
2801}
2802
2803#ifdef PCSX
2804#include "pcsxmem.h"
2805#include "pcsxmem_inline.c"
2806#endif
2807
57871462 2808do_readstub(int n)
2809{
2810 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2811 literal_pool(256);
2812 set_jump_target(stubs[n][1],(int)out);
2813 int type=stubs[n][0];
2814 int i=stubs[n][3];
2815 int rs=stubs[n][4];
2816 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2817 u_int reglist=stubs[n][7];
2818 signed char *i_regmap=i_regs->regmap;
2819 int addr=get_reg(i_regmap,AGEN1+(i&1));
2820 int rth,rt;
2821 int ds;
b9b61529 2822 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2823 rth=get_reg(i_regmap,FTEMP|64);
2824 rt=get_reg(i_regmap,FTEMP);
2825 }else{
2826 rth=get_reg(i_regmap,rt1[i]|64);
2827 rt=get_reg(i_regmap,rt1[i]);
2828 }
2829 assert(rs>=0);
c6c3b1b3 2830#ifdef PCSX
2831 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2832 reglist|=(1<<rs);
2833 for(r=0;r<=12;r++) {
2834 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2835 temp=r; break;
2836 }
2837 }
2838 if(rt>=0)
2839 reglist&=~(1<<rt);
2840 if(temp==-1) {
2841 save_regs(reglist);
2842 regs_saved=1;
2843 temp=(rs==0)?2:0;
2844 }
2845 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2846 temp2=1;
2847 emit_readword((int)&mem_rtab,temp);
2848 emit_shrimm(rs,12,temp2);
2849 emit_readword_dualindexedx4(temp,temp2,temp2);
2850 emit_lsls_imm(temp2,1,temp2);
2851 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2852 switch(type) {
2853 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2854 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2855 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2856 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2857 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2858 }
2859 }
2860 if(regs_saved) {
2861 restore_jump=(int)out;
2862 emit_jcc(0); // jump to reg restore
2863 }
2864 else
2865 emit_jcc(stubs[n][2]); // return address
2866
2867 if(!regs_saved)
2868 save_regs(reglist);
2869 int handler=0;
2870 if(type==LOADB_STUB||type==LOADBU_STUB)
2871 handler=(int)jump_handler_read8;
2872 if(type==LOADH_STUB||type==LOADHU_STUB)
2873 handler=(int)jump_handler_read16;
2874 if(type==LOADW_STUB)
2875 handler=(int)jump_handler_read32;
2876 assert(handler!=0);
b96d3df7 2877 pass_args(rs,temp2);
c6c3b1b3 2878 int cc=get_reg(i_regmap,CCREG);
2879 if(cc<0)
2880 emit_loadreg(CCREG,2);
2573466a 2881 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2882 emit_call(handler);
2883 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2884 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2885 }
2886 if(restore_jump)
2887 set_jump_target(restore_jump,(int)out);
2888 restore_regs(reglist);
2889 emit_jmp(stubs[n][2]); // return address
2890#else // !PCSX
57871462 2891 if(addr<0) addr=rt;
535d208a 2892 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2893 assert(addr>=0);
2894 int ftable=0;
2895 if(type==LOADB_STUB||type==LOADBU_STUB)
2896 ftable=(int)readmemb;
2897 if(type==LOADH_STUB||type==LOADHU_STUB)
2898 ftable=(int)readmemh;
2899 if(type==LOADW_STUB)
2900 ftable=(int)readmem;
24385cae 2901#ifndef FORCE32
57871462 2902 if(type==LOADD_STUB)
2903 ftable=(int)readmemd;
24385cae 2904#endif
2905 assert(ftable!=0);
57871462 2906 emit_writeword(rs,(int)&address);
2907 //emit_pusha();
2908 save_regs(reglist);
97a238a6 2909#ifndef PCSX
57871462 2910 ds=i_regs!=&regs[i];
2911 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2912 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2913 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2914 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2915 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2916#endif
57871462 2917 emit_shrimm(rs,16,1);
2918 int cc=get_reg(i_regmap,CCREG);
2919 if(cc<0) {
2920 emit_loadreg(CCREG,2);
2921 }
2922 emit_movimm(ftable,0);
2923 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2924#ifndef PCSX
57871462 2925 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2926#endif
57871462 2927 //emit_readword((int)&last_count,temp);
2928 //emit_add(cc,temp,cc);
2929 //emit_writeword(cc,(int)&Count);
2930 //emit_mov(15,14);
2931 emit_call((int)&indirect_jump_indexed);
2932 //emit_callreg(rs);
2933 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2934#ifndef PCSX
57871462 2935 // We really shouldn't need to update the count here,
2936 // but not doing so causes random crashes...
2937 emit_readword((int)&Count,HOST_TEMPREG);
2938 emit_readword((int)&next_interupt,2);
2939 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2940 emit_writeword(2,(int)&last_count);
2941 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2942 if(cc<0) {
2943 emit_storereg(CCREG,HOST_TEMPREG);
2944 }
f51dc36c 2945#endif
57871462 2946 //emit_popa();
2947 restore_regs(reglist);
2948 //if((cc=get_reg(regmap,CCREG))>=0) {
2949 // emit_loadreg(CCREG,cc);
2950 //}
f18c0f46 2951 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2952 assert(rt>=0);
2953 if(type==LOADB_STUB)
2954 emit_movsbl((int)&readmem_dword,rt);
2955 if(type==LOADBU_STUB)
2956 emit_movzbl((int)&readmem_dword,rt);
2957 if(type==LOADH_STUB)
2958 emit_movswl((int)&readmem_dword,rt);
2959 if(type==LOADHU_STUB)
2960 emit_movzwl((int)&readmem_dword,rt);
2961 if(type==LOADW_STUB)
2962 emit_readword((int)&readmem_dword,rt);
2963 if(type==LOADD_STUB) {
2964 emit_readword((int)&readmem_dword,rt);
2965 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2966 }
57871462 2967 }
2968 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2969#endif // !PCSX
57871462 2970}
2971
c6c3b1b3 2972#ifdef PCSX
2973// return memhandler, or get directly accessable address and return 0
2974u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2975{
2976 u_int l1,l2=0;
2977 l1=((u_int *)table)[addr>>12];
2978 if((l1&(1<<31))==0) {
2979 u_int v=l1<<1;
2980 *addr_host=v+addr;
2981 return 0;
2982 }
2983 else {
2984 l1<<=1;
2985 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2986 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2987 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2988 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2989 else
2990 l2=((u_int *)l1)[(addr&0xfff)/4];
2991 if((l2&(1<<31))==0) {
2992 u_int v=l2<<1;
2993 *addr_host=v+(addr&0xfff);
2994 return 0;
2995 }
2996 return l2<<1;
2997 }
2998}
2999#endif
3000
57871462 3001inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3002{
3003 int rs=get_reg(regmap,target);
3004 int rth=get_reg(regmap,target|64);
3005 int rt=get_reg(regmap,target);
535d208a 3006 if(rs<0) rs=get_reg(regmap,-1);
57871462 3007 assert(rs>=0);
c6c3b1b3 3008#ifdef PCSX
b1be1eee 3009 u_int handler,host_addr=0,is_dynamic,far_call=0;
3010 int cc=get_reg(regmap,CCREG);
3011 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3012 return;
c6c3b1b3 3013 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3014 if (handler==0) {
3015 if(rt<0)
3016 return;
13e35c04 3017 if(addr!=host_addr)
3018 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3019 switch(type) {
3020 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3021 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3022 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3023 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3024 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3025 default: assert(0);
3026 }
3027 return;
3028 }
b1be1eee 3029 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3030 if(is_dynamic) {
3031 if(type==LOADB_STUB||type==LOADBU_STUB)
3032 handler=(int)jump_handler_read8;
3033 if(type==LOADH_STUB||type==LOADHU_STUB)
3034 handler=(int)jump_handler_read16;
3035 if(type==LOADW_STUB)
3036 handler=(int)jump_handler_read32;
3037 }
c6c3b1b3 3038
3039 // call a memhandler
3040 if(rt>=0)
3041 reglist&=~(1<<rt);
3042 save_regs(reglist);
3043 if(target==0)
3044 emit_movimm(addr,0);
3045 else if(rs!=0)
3046 emit_mov(rs,0);
c6c3b1b3 3047 int offset=(int)handler-(int)out-8;
3048 if(offset<-33554432||offset>=33554432) {
3049 // unreachable memhandler, a plugin func perhaps
b1be1eee 3050 emit_movimm(handler,12);
3051 far_call=1;
3052 }
3053 if(cc<0)
3054 emit_loadreg(CCREG,2);
3055 if(is_dynamic) {
3056 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3057 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3058 }
b1be1eee 3059 else {
3060 emit_readword((int)&last_count,3);
3061 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3062 emit_add(2,3,2);
3063 emit_writeword(2,(int)&Count);
3064 }
3065
3066 if(far_call)
3067 emit_callreg(12);
c6c3b1b3 3068 else
3069 emit_call(handler);
b1be1eee 3070
c6c3b1b3 3071 if(rt>=0) {
3072 switch(type) {
3073 case LOADB_STUB: emit_signextend8(0,rt); break;
3074 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3075 case LOADH_STUB: emit_signextend16(0,rt); break;
3076 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3077 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3078 default: assert(0);
3079 }
3080 }
3081 restore_regs(reglist);
3082#else // if !PCSX
57871462 3083 int ftable=0;
3084 if(type==LOADB_STUB||type==LOADBU_STUB)
3085 ftable=(int)readmemb;
3086 if(type==LOADH_STUB||type==LOADHU_STUB)
3087 ftable=(int)readmemh;
3088 if(type==LOADW_STUB)
3089 ftable=(int)readmem;
24385cae 3090#ifndef FORCE32
57871462 3091 if(type==LOADD_STUB)
3092 ftable=(int)readmemd;
24385cae 3093#endif
3094 assert(ftable!=0);
fd99c415 3095 if(target==0)
3096 emit_movimm(addr,rs);
57871462 3097 emit_writeword(rs,(int)&address);
3098 //emit_pusha();
3099 save_regs(reglist);
0c1fe38b 3100#ifndef PCSX
3101 if((signed int)addr>=(signed int)0xC0000000) {
3102 // Theoretically we can have a pagefault here, if the TLB has never
3103 // been enabled and the address is outside the range 80000000..BFFFFFFF
3104 // Write out the registers so the pagefault can be handled. This is
3105 // a very rare case and likely represents a bug.
3106 int ds=regmap!=regs[i].regmap;
3107 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3108 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3109 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3110 }
3111#endif
57871462 3112 //emit_shrimm(rs,16,1);
3113 int cc=get_reg(regmap,CCREG);
3114 if(cc<0) {
3115 emit_loadreg(CCREG,2);
3116 }
3117 //emit_movimm(ftable,0);
3118 emit_movimm(((u_int *)ftable)[addr>>16],0);
3119 //emit_readword((int)&last_count,12);
2573466a 3120 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3121#ifndef PCSX
57871462 3122 if((signed int)addr>=(signed int)0xC0000000) {
3123 // Pagefault address
3124 int ds=regmap!=regs[i].regmap;
3125 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3126 }
f51dc36c 3127#endif
57871462 3128 //emit_add(12,2,2);
3129 //emit_writeword(2,(int)&Count);
3130 //emit_call(((u_int *)ftable)[addr>>16]);
3131 emit_call((int)&indirect_jump);
f51dc36c 3132#ifndef PCSX
57871462 3133 // We really shouldn't need to update the count here,
3134 // but not doing so causes random crashes...
3135 emit_readword((int)&Count,HOST_TEMPREG);
3136 emit_readword((int)&next_interupt,2);
2573466a 3137 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3138 emit_writeword(2,(int)&last_count);
3139 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3140 if(cc<0) {
3141 emit_storereg(CCREG,HOST_TEMPREG);
3142 }
f51dc36c 3143#endif
57871462 3144 //emit_popa();
3145 restore_regs(reglist);
fd99c415 3146 if(rt>=0) {
3147 if(type==LOADB_STUB)
3148 emit_movsbl((int)&readmem_dword,rt);
3149 if(type==LOADBU_STUB)
3150 emit_movzbl((int)&readmem_dword,rt);
3151 if(type==LOADH_STUB)
3152 emit_movswl((int)&readmem_dword,rt);
3153 if(type==LOADHU_STUB)
3154 emit_movzwl((int)&readmem_dword,rt);
3155 if(type==LOADW_STUB)
3156 emit_readword((int)&readmem_dword,rt);
3157 if(type==LOADD_STUB) {
3158 emit_readword((int)&readmem_dword,rt);
3159 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3160 }
57871462 3161 }
c6c3b1b3 3162#endif // !PCSX
57871462 3163}
3164
3165do_writestub(int n)
3166{
3167 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3168 literal_pool(256);
3169 set_jump_target(stubs[n][1],(int)out);
3170 int type=stubs[n][0];
3171 int i=stubs[n][3];
3172 int rs=stubs[n][4];
3173 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3174 u_int reglist=stubs[n][7];
3175 signed char *i_regmap=i_regs->regmap;
3176 int addr=get_reg(i_regmap,AGEN1+(i&1));
3177 int rth,rt,r;
3178 int ds;
b9b61529 3179 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3180 rth=get_reg(i_regmap,FTEMP|64);
3181 rt=get_reg(i_regmap,r=FTEMP);
3182 }else{
3183 rth=get_reg(i_regmap,rs2[i]|64);
3184 rt=get_reg(i_regmap,r=rs2[i]);
3185 }
3186 assert(rs>=0);
3187 assert(rt>=0);
b96d3df7 3188#ifdef PCSX
3189 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3190 int reglist2=reglist|(1<<rs)|(1<<rt);
3191 for(rtmp=0;rtmp<=12;rtmp++) {
3192 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3193 temp=rtmp; break;
3194 }
3195 }
3196 if(temp==-1) {
3197 save_regs(reglist);
3198 regs_saved=1;
3199 for(rtmp=0;rtmp<=3;rtmp++)
3200 if(rtmp!=rs&&rtmp!=rt)
3201 {temp=rtmp;break;}
3202 }
3203 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3204 temp2=3;
3205 emit_readword((int)&mem_wtab,temp);
3206 emit_shrimm(rs,12,temp2);
3207 emit_readword_dualindexedx4(temp,temp2,temp2);
3208 emit_lsls_imm(temp2,1,temp2);
3209 switch(type) {
3210 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3211 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3212 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3213 default: assert(0);
3214 }
3215 if(regs_saved) {
3216 restore_jump=(int)out;
3217 emit_jcc(0); // jump to reg restore
3218 }
3219 else
3220 emit_jcc(stubs[n][2]); // return address (invcode check)
3221
3222 if(!regs_saved)
3223 save_regs(reglist);
3224 int handler=0;
3225 switch(type) {
3226 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3227 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3228 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3229 }
3230 assert(handler!=0);
3231 pass_args(rs,rt);
3232 if(temp2!=3)
3233 emit_mov(temp2,3);
3234 int cc=get_reg(i_regmap,CCREG);
3235 if(cc<0)
3236 emit_loadreg(CCREG,2);
2573466a 3237 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3238 // returns new cycle_count
3239 emit_call(handler);
2573466a 3240 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3241 if(cc<0)
3242 emit_storereg(CCREG,2);
3243 if(restore_jump)
3244 set_jump_target(restore_jump,(int)out);
3245 restore_regs(reglist);
3246 ra=stubs[n][2];
b96d3df7 3247 emit_jmp(ra);
3248#else // if !PCSX
57871462 3249 if(addr<0) addr=get_reg(i_regmap,-1);
3250 assert(addr>=0);
3251 int ftable=0;
3252 if(type==STOREB_STUB)
3253 ftable=(int)writememb;
3254 if(type==STOREH_STUB)
3255 ftable=(int)writememh;
3256 if(type==STOREW_STUB)
3257 ftable=(int)writemem;
24385cae 3258#ifndef FORCE32
57871462 3259 if(type==STORED_STUB)
3260 ftable=(int)writememd;
24385cae 3261#endif
3262 assert(ftable!=0);
57871462 3263 emit_writeword(rs,(int)&address);
3264 //emit_shrimm(rs,16,rs);
3265 //emit_movmem_indexedx4(ftable,rs,rs);
3266 if(type==STOREB_STUB)
3267 emit_writebyte(rt,(int)&byte);
3268 if(type==STOREH_STUB)
3269 emit_writehword(rt,(int)&hword);
3270 if(type==STOREW_STUB)
3271 emit_writeword(rt,(int)&word);
3272 if(type==STORED_STUB) {
3d624f89 3273#ifndef FORCE32
57871462 3274 emit_writeword(rt,(int)&dword);
3275 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3276#else
3277 printf("STORED_STUB\n");
3278#endif
57871462 3279 }
3280 //emit_pusha();
3281 save_regs(reglist);
97a238a6 3282#ifndef PCSX
57871462 3283 ds=i_regs!=&regs[i];
3284 int real_rs=get_reg(i_regmap,rs1[i]);
3285 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3286 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3287 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3288 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3289#endif
57871462 3290 emit_shrimm(rs,16,1);
3291 int cc=get_reg(i_regmap,CCREG);
3292 if(cc<0) {
3293 emit_loadreg(CCREG,2);
3294 }
3295 emit_movimm(ftable,0);
3296 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3297#ifndef PCSX
57871462 3298 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3299#endif
57871462 3300 //emit_readword((int)&last_count,temp);
3301 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3302 //emit_add(cc,temp,cc);
3303 //emit_writeword(cc,(int)&Count);
3304 emit_call((int)&indirect_jump_indexed);
3305 //emit_callreg(rs);
3306 emit_readword((int)&Count,HOST_TEMPREG);
3307 emit_readword((int)&next_interupt,2);
3308 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3309 emit_writeword(2,(int)&last_count);
3310 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3311 if(cc<0) {
3312 emit_storereg(CCREG,HOST_TEMPREG);
3313 }
3314 //emit_popa();
3315 restore_regs(reglist);
3316 //if((cc=get_reg(regmap,CCREG))>=0) {
3317 // emit_loadreg(CCREG,cc);
3318 //}
3319 emit_jmp(stubs[n][2]); // return address
b96d3df7 3320#endif // !PCSX
57871462 3321}
3322
3323inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3324{
3325 int rs=get_reg(regmap,-1);
3326 int rth=get_reg(regmap,target|64);
3327 int rt=get_reg(regmap,target);
3328 assert(rs>=0);
3329 assert(rt>=0);
cbbab9cd 3330#ifdef PCSX
b96d3df7 3331 u_int handler,host_addr=0;
b96d3df7 3332 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3333 if (handler==0) {
13e35c04 3334 if(addr!=host_addr)
3335 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3336 switch(type) {
3337 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3338 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3339 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3340 default: assert(0);
3341 }
3342 return;
3343 }
3344
3345 // call a memhandler
3346 save_regs(reglist);
13e35c04 3347 pass_args(rs,rt);
b96d3df7 3348 int cc=get_reg(regmap,CCREG);
3349 if(cc<0)
3350 emit_loadreg(CCREG,2);
2573466a 3351 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3352 emit_movimm(handler,3);
3353 // returns new cycle_count
3354 emit_call((int)jump_handler_write_h);
2573466a 3355 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3356 if(cc<0)
3357 emit_storereg(CCREG,2);
3358 restore_regs(reglist);
3359#else // if !pcsx
57871462 3360 int ftable=0;
3361 if(type==STOREB_STUB)
3362 ftable=(int)writememb;
3363 if(type==STOREH_STUB)
3364 ftable=(int)writememh;
3365 if(type==STOREW_STUB)
3366 ftable=(int)writemem;
24385cae 3367#ifndef FORCE32
57871462 3368 if(type==STORED_STUB)
3369 ftable=(int)writememd;
24385cae 3370#endif
3371 assert(ftable!=0);
57871462 3372 emit_writeword(rs,(int)&address);
3373 //emit_shrimm(rs,16,rs);
3374 //emit_movmem_indexedx4(ftable,rs,rs);
3375 if(type==STOREB_STUB)
3376 emit_writebyte(rt,(int)&byte);
3377 if(type==STOREH_STUB)
3378 emit_writehword(rt,(int)&hword);
3379 if(type==STOREW_STUB)
3380 emit_writeword(rt,(int)&word);
3381 if(type==STORED_STUB) {
3d624f89 3382#ifndef FORCE32
57871462 3383 emit_writeword(rt,(int)&dword);
3384 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3385#else
3386 printf("STORED_STUB\n");
3387#endif
57871462 3388 }
3389 //emit_pusha();
3390 save_regs(reglist);
0c1fe38b 3391#ifndef PCSX
3392 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3393 if((signed int)addr>=(signed int)0xC0000000) {
3394 // Theoretically we can have a pagefault here, if the TLB has never
3395 // been enabled and the address is outside the range 80000000..BFFFFFFF
3396 // Write out the registers so the pagefault can be handled. This is
3397 // a very rare case and likely represents a bug.
3398 int ds=regmap!=regs[i].regmap;
3399 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3400 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3401 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3402 }
3403#endif
57871462 3404 //emit_shrimm(rs,16,1);
3405 int cc=get_reg(regmap,CCREG);
3406 if(cc<0) {
3407 emit_loadreg(CCREG,2);
3408 }
3409 //emit_movimm(ftable,0);
3410 emit_movimm(((u_int *)ftable)[addr>>16],0);
3411 //emit_readword((int)&last_count,12);
2573466a 3412 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3413#ifndef PCSX
57871462 3414 if((signed int)addr>=(signed int)0xC0000000) {
3415 // Pagefault address
3416 int ds=regmap!=regs[i].regmap;
3417 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3418 }
f51dc36c 3419#endif
57871462 3420 //emit_add(12,2,2);
3421 //emit_writeword(2,(int)&Count);
3422 //emit_call(((u_int *)ftable)[addr>>16]);
3423 emit_call((int)&indirect_jump);
3424 emit_readword((int)&Count,HOST_TEMPREG);
3425 emit_readword((int)&next_interupt,2);
2573466a 3426 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3427 emit_writeword(2,(int)&last_count);
3428 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3429 if(cc<0) {
3430 emit_storereg(CCREG,HOST_TEMPREG);
3431 }
3432 //emit_popa();
3433 restore_regs(reglist);
b96d3df7 3434#endif
57871462 3435}
3436
3437do_unalignedwritestub(int n)
3438{
b7918751 3439 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3440 literal_pool(256);
57871462 3441 set_jump_target(stubs[n][1],(int)out);
b7918751 3442
3443 int i=stubs[n][3];
3444 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3445 int addr=stubs[n][5];
3446 u_int reglist=stubs[n][7];
3447 signed char *i_regmap=i_regs->regmap;
3448 int temp2=get_reg(i_regmap,FTEMP);
3449 int rt;
3450 int ds, real_rs;
3451 rt=get_reg(i_regmap,rs2[i]);
3452 assert(rt>=0);
3453 assert(addr>=0);
3454 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3455 reglist|=(1<<addr);
3456 reglist&=~(1<<temp2);
3457
b96d3df7 3458#if 1
3459 // don't bother with it and call write handler
3460 save_regs(reglist);
3461 pass_args(addr,rt);
3462 int cc=get_reg(i_regmap,CCREG);
3463 if(cc<0)
3464 emit_loadreg(CCREG,2);
2573466a 3465 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3466 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3467 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3468 if(cc<0)
3469 emit_storereg(CCREG,2);
3470 restore_regs(reglist);
3471 emit_jmp(stubs[n][2]); // return address
3472#else
b7918751 3473 emit_andimm(addr,0xfffffffc,temp2);
3474 emit_writeword(temp2,(int)&address);
3475
3476 save_regs(reglist);
97a238a6 3477#ifndef PCSX
b7918751 3478 ds=i_regs!=&regs[i];
3479 real_rs=get_reg(i_regmap,rs1[i]);
3480 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3481 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3482 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3483 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3484#endif
b7918751 3485 emit_shrimm(addr,16,1);
3486 int cc=get_reg(i_regmap,CCREG);
3487 if(cc<0) {
3488 emit_loadreg(CCREG,2);
3489 }
3490 emit_movimm((u_int)readmem,0);
3491 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3492#ifndef PCSX
3493 // pagefault address
3494 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3495#endif
b7918751 3496 emit_call((int)&indirect_jump_indexed);
3497 restore_regs(reglist);
3498
3499 emit_readword((int)&readmem_dword,temp2);
3500 int temp=addr; //hmh
3501 emit_shlimm(addr,3,temp);
3502 emit_andimm(temp,24,temp);
3503#ifdef BIG_ENDIAN_MIPS
3504 if (opcode[i]==0x2e) // SWR
3505#else
3506 if (opcode[i]==0x2a) // SWL
3507#endif
3508 emit_xorimm(temp,24,temp);
3509 emit_movimm(-1,HOST_TEMPREG);
55439448 3510 if (opcode[i]==0x2a) { // SWL
b7918751 3511 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3512 emit_orrshr(rt,temp,temp2);
3513 }else{
3514 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3515 emit_orrshl(rt,temp,temp2);
3516 }
3517 emit_readword((int)&address,addr);
3518 emit_writeword(temp2,(int)&word);
3519 //save_regs(reglist); // don't need to, no state changes
3520 emit_shrimm(addr,16,1);
3521 emit_movimm((u_int)writemem,0);
3522 //emit_call((int)&indirect_jump_indexed);
3523 emit_mov(15,14);
3524 emit_readword_dualindexedx4(0,1,15);
3525 emit_readword((int)&Count,HOST_TEMPREG);
3526 emit_readword((int)&next_interupt,2);
3527 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3528 emit_writeword(2,(int)&last_count);
3529 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3530 if(cc<0) {
3531 emit_storereg(CCREG,HOST_TEMPREG);
3532 }
3533 restore_regs(reglist);
57871462 3534 emit_jmp(stubs[n][2]); // return address
b96d3df7 3535#endif
57871462 3536}
3537
3538void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3539{
3540 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3541}
3542
3543do_invstub(int n)
3544{
3545 literal_pool(20);
3546 u_int reglist=stubs[n][3];
3547 set_jump_target(stubs[n][1],(int)out);
3548 save_regs(reglist);
3549 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3550 emit_call((int)&invalidate_addr);
3551 restore_regs(reglist);
3552 emit_jmp(stubs[n][2]); // return address
3553}
3554
3555int do_dirty_stub(int i)
3556{
3557 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3558 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3559 #ifdef PCSX
3560 addr=(u_int)source;
3561 #endif
57871462 3562 // Careful about the code output here, verify_dirty needs to parse it.
3563 #ifdef ARMv5_ONLY
ac545b3a 3564 emit_loadlp(addr,1);
57871462 3565 emit_loadlp((int)copy,2);
3566 emit_loadlp(slen*4,3);
3567 #else
ac545b3a 3568 emit_movw(addr&0x0000FFFF,1);
57871462 3569 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3570 emit_movt(addr&0xFFFF0000,1);
57871462 3571 emit_movt(((u_int)copy)&0xFFFF0000,2);
3572 emit_movw(slen*4,3);
3573 #endif
3574 emit_movimm(start+i*4,0);
3575 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3576 int entry=(int)out;
3577 load_regs_entry(i);
3578 if(entry==(int)out) entry=instr_addr[i];
3579 emit_jmp(instr_addr[i]);
3580 return entry;
3581}
3582
3583void do_dirty_stub_ds()
3584{
3585 // Careful about the code output here, verify_dirty needs to parse it.
3586 #ifdef ARMv5_ONLY
3587 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3588 emit_loadlp((int)copy,2);
3589 emit_loadlp(slen*4,3);
3590 #else
3591 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3592 emit_movw(((u_int)copy)&0x0000FFFF,2);
3593 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3594 emit_movt(((u_int)copy)&0xFFFF0000,2);
3595 emit_movw(slen*4,3);
3596 #endif
3597 emit_movimm(start+1,0);
3598 emit_call((int)&verify_code_ds);
3599}
3600
3601do_cop1stub(int n)
3602{
3603 literal_pool(256);
3604 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3605 set_jump_target(stubs[n][1],(int)out);
3606 int i=stubs[n][3];
3d624f89 3607// int rs=stubs[n][4];
57871462 3608 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3609 int ds=stubs[n][6];
3610 if(!ds) {
3611 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3612 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3613 }
3614 //else {printf("fp exception in delay slot\n");}
3615 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3616 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3617 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3618 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3619 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3620}
3621
63cb0298 3622#ifndef DISABLE_TLB
3623
57871462 3624/* TLB */
3625
3626int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3627{
3628 if(c) {
3629 if((signed int)addr>=(signed int)0xC0000000) {
3630 // address_generation already loaded the const
3631 emit_readword_dualindexedx4(FP,map,map);
3632 }
3633 else
3634 return -1; // No mapping
3635 }
3636 else {
3637 assert(s!=map);
3638 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3639 emit_addsr12(map,s,map);
3640 // Schedule this while we wait on the load
3641 //if(x) emit_xorimm(s,x,ar);
3642 if(shift>=0) emit_shlimm(s,3,shift);
3643 if(~a) emit_andimm(s,a,ar);
3644 emit_readword_dualindexedx4(FP,map,map);
3645 }
3646 return map;
3647}
3648int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3649{
3650 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3651 emit_test(map,map);
3652 *jaddr=(int)out;
3653 emit_js(0);
3654 }
3655 return map;
3656}
3657
3658int gen_tlb_addr_r(int ar, int map) {
3659 if(map>=0) {
3660 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3661 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3662 }
3663}
3664
3665int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3666{
3667 if(c) {
3668 if(addr<0x80800000||addr>=0xC0000000) {
3669 // address_generation already loaded the const
3670 emit_readword_dualindexedx4(FP,map,map);
3671 }
3672 else
3673 return -1; // No mapping
3674 }
3675 else {
3676 assert(s!=map);
3677 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3678 emit_addsr12(map,s,map);
3679 // Schedule this while we wait on the load
3680 //if(x) emit_xorimm(s,x,ar);
3681 emit_readword_dualindexedx4(FP,map,map);
3682 }
3683 return map;
3684}
3685int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3686{
3687 if(!c||addr<0x80800000||addr>=0xC0000000) {
3688 emit_testimm(map,0x40000000);
3689 *jaddr=(int)out;
3690 emit_jne(0);
3691 }
3692}
3693
3694int gen_tlb_addr_w(int ar, int map) {
3695 if(map>=0) {
3696 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3697 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3698 }
3699}
3700
3701// Generate the address of the memory_map entry, relative to dynarec_local
3702generate_map_const(u_int addr,int reg) {
3703 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3704 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3705}
3706
63cb0298 3707#else
3708
3709static int do_tlb_r() { return 0; }
3710static int do_tlb_r_branch() { return 0; }
3711static int gen_tlb_addr_r() { return 0; }
3712static int do_tlb_w() { return 0; }
3713static int do_tlb_w_branch() { return 0; }
3714static int gen_tlb_addr_w() { return 0; }
3715
3716#endif // DISABLE_TLB
3717
57871462 3718/* Special assem */
3719
3720void shift_assemble_arm(int i,struct regstat *i_regs)
3721{
3722 if(rt1[i]) {
3723 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3724 {
3725 signed char s,t,shift;
3726 t=get_reg(i_regs->regmap,rt1[i]);
3727 s=get_reg(i_regs->regmap,rs1[i]);
3728 shift=get_reg(i_regs->regmap,rs2[i]);
3729 if(t>=0){
3730 if(rs1[i]==0)
3731 {
3732 emit_zeroreg(t);
3733 }
3734 else if(rs2[i]==0)
3735 {
3736 assert(s>=0);
3737 if(s!=t) emit_mov(s,t);
3738 }
3739 else
3740 {
3741 emit_andimm(shift,31,HOST_TEMPREG);
3742 if(opcode2[i]==4) // SLLV
3743 {
3744 emit_shl(s,HOST_TEMPREG,t);
3745 }
3746 if(opcode2[i]==6) // SRLV
3747 {
3748 emit_shr(s,HOST_TEMPREG,t);
3749 }
3750 if(opcode2[i]==7) // SRAV
3751 {
3752 emit_sar(s,HOST_TEMPREG,t);
3753 }
3754 }
3755 }
3756 } else { // DSLLV/DSRLV/DSRAV
3757 signed char sh,sl,th,tl,shift;
3758 th=get_reg(i_regs->regmap,rt1[i]|64);
3759 tl=get_reg(i_regs->regmap,rt1[i]);
3760 sh=get_reg(i_regs->regmap,rs1[i]|64);
3761 sl=get_reg(i_regs->regmap,rs1[i]);
3762 shift=get_reg(i_regs->regmap,rs2[i]);
3763 if(tl>=0){
3764 if(rs1[i]==0)
3765 {
3766 emit_zeroreg(tl);
3767 if(th>=0) emit_zeroreg(th);
3768 }
3769 else if(rs2[i]==0)
3770 {
3771 assert(sl>=0);
3772 if(sl!=tl) emit_mov(sl,tl);
3773 if(th>=0&&sh!=th) emit_mov(sh,th);
3774 }
3775 else
3776 {
3777 // FIXME: What if shift==tl ?
3778 assert(shift!=tl);
3779 int temp=get_reg(i_regs->regmap,-1);
3780 int real_th=th;
3781 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3782 assert(sl>=0);
3783 assert(sh>=0);
3784 emit_andimm(shift,31,HOST_TEMPREG);
3785 if(opcode2[i]==0x14) // DSLLV
3786 {
3787 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3788 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3789 emit_orrshr(sl,HOST_TEMPREG,th);
3790 emit_andimm(shift,31,HOST_TEMPREG);
3791 emit_testimm(shift,32);
3792 emit_shl(sl,HOST_TEMPREG,tl);
3793 if(th>=0) emit_cmovne_reg(tl,th);
3794 emit_cmovne_imm(0,tl);
3795 }
3796 if(opcode2[i]==0x16) // DSRLV
3797 {
3798 assert(th>=0);
3799 emit_shr(sl,HOST_TEMPREG,tl);
3800 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3801 emit_orrshl(sh,HOST_TEMPREG,tl);
3802 emit_andimm(shift,31,HOST_TEMPREG);
3803 emit_testimm(shift,32);
3804 emit_shr(sh,HOST_TEMPREG,th);
3805 emit_cmovne_reg(th,tl);
3806 if(real_th>=0) emit_cmovne_imm(0,th);
3807 }
3808 if(opcode2[i]==0x17) // DSRAV
3809 {
3810 assert(th>=0);
3811 emit_shr(sl,HOST_TEMPREG,tl);
3812 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3813 if(real_th>=0) {
3814 assert(temp>=0);
3815 emit_sarimm(th,31,temp);
3816 }
3817 emit_orrshl(sh,HOST_TEMPREG,tl);
3818 emit_andimm(shift,31,HOST_TEMPREG);
3819 emit_testimm(shift,32);
3820 emit_sar(sh,HOST_TEMPREG,th);
3821 emit_cmovne_reg(th,tl);
3822 if(real_th>=0) emit_cmovne_reg(temp,th);
3823 }
3824 }
3825 }
3826 }
3827 }
3828}
ffb0b9e0 3829
3830#ifdef PCSX
3831static void speculate_mov(int rs,int rt)
3832{
3833 if(rt!=0) {
3834 smrv_strong_next|=1<<rt;
3835 smrv[rt]=smrv[rs];
3836 }
3837}
3838
3839static void speculate_mov_weak(int rs,int rt)
3840{
3841 if(rt!=0) {
3842 smrv_weak_next|=1<<rt;
3843 smrv[rt]=smrv[rs];
3844 }
3845}
3846
3847static void speculate_register_values(int i)
3848{
3849 if(i==0) {
3850 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3851 // gp,sp are likely to stay the same throughout the block
3852 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3853 smrv_weak_next=~smrv_strong_next;
3854 //printf(" llr %08x\n", smrv[4]);
3855 }
3856 smrv_strong=smrv_strong_next;
3857 smrv_weak=smrv_weak_next;
3858 switch(itype[i]) {
3859 case ALU:
3860 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3861 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3862 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3863 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3864 else {
3865 smrv_strong_next&=~(1<<rt1[i]);
3866 smrv_weak_next&=~(1<<rt1[i]);
3867 }
3868 break;
3869 case SHIFTIMM:
3870 smrv_strong_next&=~(1<<rt1[i]);
3871 smrv_weak_next&=~(1<<rt1[i]);
3872 // fallthrough
3873 case IMM16:
3874 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3875 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3876 if(hr>=0) {
3877 if(get_final_value(hr,i,&value))
3878 smrv[rt1[i]]=value;
3879 else smrv[rt1[i]]=constmap[i][hr];
3880 smrv_strong_next|=1<<rt1[i];
3881 }
3882 }
3883 else {
3884 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3885 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3886 }
3887 break;
3888 case LOAD:
3889 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3890 // special case for BIOS
3891 smrv[rt1[i]]=0xa0000000;
3892 smrv_strong_next|=1<<rt1[i];
3893 break;
3894 }
3895 // fallthrough
3896 case SHIFT:
3897 case LOADLR:
3898 case MOV:
3899 smrv_strong_next&=~(1<<rt1[i]);
3900 smrv_weak_next&=~(1<<rt1[i]);
3901 break;
3902 case COP0:
3903 case COP2:
3904 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3905 smrv_strong_next&=~(1<<rt1[i]);
3906 smrv_weak_next&=~(1<<rt1[i]);
3907 }
3908 break;
3909 case C2LS:
3910 if (opcode[i]==0x32) { // LWC2
3911 smrv_strong_next&=~(1<<rt1[i]);
3912 smrv_weak_next&=~(1<<rt1[i]);
3913 }
3914 break;
3915 }
3916#if 0
3917 int r=4;
3918 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3919 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3920#endif
3921}
3922
3923enum {
3924 MTYPE_8000 = 0,
3925 MTYPE_8020,
3926 MTYPE_0000,
3927 MTYPE_A000,
3928 MTYPE_1F80,
3929};
3930
3931static int get_ptr_mem_type(u_int a)
3932{
3933 if(a < 0x00200000) {
3934 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3935 // return wrong, must use memhandler for BIOS self-test to pass
3936 // 007 does similar stuff from a00 mirror, weird stuff
3937 return MTYPE_8000;
3938 return MTYPE_0000;
3939 }
3940 if(0x1f800000 <= a && a < 0x1f801000)
3941 return MTYPE_1F80;
3942 if(0x80200000 <= a && a < 0x80800000)
3943 return MTYPE_8020;
3944 if(0xa0000000 <= a && a < 0xa0200000)
3945 return MTYPE_A000;
3946 return MTYPE_8000;
3947}
3948#endif
3949
3950static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3951{
3952 int jaddr,type=0;
3953
3954#ifdef PCSX
3955 int mr=rs1[i];
3956 if(((smrv_strong|smrv_weak)>>mr)&1) {
3957 type=get_ptr_mem_type(smrv[mr]);
3958 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3959 }
3960 else {
3961 // use the mirror we are running on
3962 type=get_ptr_mem_type(start);
3963 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3964 }
3965
3966 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3967 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3968 addr=*addr_reg_override=HOST_TEMPREG;
3969 type=0;
3970 }
3971 else if(type==MTYPE_0000) { // RAM 0 mirror
3972 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3973 addr=*addr_reg_override=HOST_TEMPREG;
3974 type=0;
3975 }
3976 else if(type==MTYPE_A000) { // RAM A mirror
3977 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3978 addr=*addr_reg_override=HOST_TEMPREG;
3979 type=0;
3980 }
3981 else if(type==MTYPE_1F80) { // scratchpad
3982 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3983 emit_cmpimm(HOST_TEMPREG,0x1000);
3984 jaddr=(int)out;
3985 emit_jc(0);
3986 }
3987#endif
3988
3989 if(type==0)
3990 {
3991 emit_cmpimm(addr,RAM_SIZE);
3992 jaddr=(int)out;
3993 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3994 // Hint to branch predictor that the branch is unlikely to be taken
3995 if(rs1[i]>=28)
3996 emit_jno_unlikely(0);
3997 else
3998 #endif
3999 emit_jno(0);
4000 }
4001
4002 return jaddr;
4003}
4004
57871462 4005#define shift_assemble shift_assemble_arm
4006
4007void loadlr_assemble_arm(int i,struct regstat *i_regs)
4008{
4009 int s,th,tl,temp,temp2,addr,map=-1;
4010 int offset;
4011 int jaddr=0;
af4ee1fe 4012 int memtarget=0,c=0;
ffb0b9e0 4013 int fastload_reg_override=0;
57871462 4014 u_int hr,reglist=0;
4015 th=get_reg(i_regs->regmap,rt1[i]|64);
4016 tl=get_reg(i_regs->regmap,rt1[i]);
4017 s=get_reg(i_regs->regmap,rs1[i]);
4018 temp=get_reg(i_regs->regmap,-1);
4019 temp2=get_reg(i_regs->regmap,FTEMP);
4020 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4021 assert(addr<0);
4022 offset=imm[i];
4023 for(hr=0;hr<HOST_REGS;hr++) {
4024 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4025 }
4026 reglist|=1<<temp;
4027 if(offset||s<0||c) addr=temp2;
4028 else addr=s;
4029 if(s>=0) {
4030 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4031 if(c) {
4032 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4033 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4034 }
57871462 4035 }
535d208a 4036 if(!using_tlb) {
4037 if(!c) {
4038 #ifdef RAM_OFFSET
4039 map=get_reg(i_regs->regmap,ROREG);
4040 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4041 #endif
4042 emit_shlimm(addr,3,temp);
4043 if (opcode[i]==0x22||opcode[i]==0x26) {
4044 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4045 }else{
535d208a 4046 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4047 }
ffb0b9e0 4048 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4049 }
4050 else {
4051 if (opcode[i]==0x22||opcode[i]==0x26) {
4052 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4053 }else{
4054 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4055 }
57871462 4056 }
535d208a 4057 }else{ // using tlb
4058 int a;
4059 if(c) {
4060 a=-1;
4061 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4062 a=0xFFFFFFFC; // LWL/LWR
4063 }else{
4064 a=0xFFFFFFF8; // LDL/LDR
4065 }
4066 map=get_reg(i_regs->regmap,TLREG);
4067 assert(map>=0);
ea3d2e6e 4068 reglist&=~(1<<map);
535d208a 4069 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4070 if(c) {
4071 if (opcode[i]==0x22||opcode[i]==0x26) {
4072 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4073 }else{
4074 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4075 }
535d208a 4076 }
4077 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4078 }
4079 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4080 if(!c||memtarget) {
ffb0b9e0 4081 int a=temp2;
4082 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4083 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4084 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4085 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4086 }
4087 else
4088 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4089 if(rt1[i]) {
4090 assert(tl>=0);
57871462 4091 emit_andimm(temp,24,temp);
2002a1db 4092#ifdef BIG_ENDIAN_MIPS
4093 if (opcode[i]==0x26) // LWR
4094#else
4095 if (opcode[i]==0x22) // LWL
4096#endif
4097 emit_xorimm(temp,24,temp);
57871462 4098 emit_movimm(-1,HOST_TEMPREG);
4099 if (opcode[i]==0x26) {
4100 emit_shr(temp2,temp,temp2);
4101 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4102 }else{
4103 emit_shl(temp2,temp,temp2);
4104 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4105 }
4106 emit_or(temp2,tl,tl);
57871462 4107 }
535d208a 4108 //emit_storereg(rt1[i],tl); // DEBUG
4109 }
4110 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4111 // FIXME: little endian, fastload_reg_override
535d208a 4112 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4113 if(!c||memtarget) {
4114 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4115 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4116 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4117 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4118 }
4119 else
4120 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4121 if(rt1[i]) {
4122 assert(th>=0);
4123 assert(tl>=0);
57871462 4124 emit_testimm(temp,32);
4125 emit_andimm(temp,24,temp);
4126 if (opcode[i]==0x1A) { // LDL
4127 emit_rsbimm(temp,32,HOST_TEMPREG);
4128 emit_shl(temp2h,temp,temp2h);
4129 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4130 emit_movimm(-1,HOST_TEMPREG);
4131 emit_shl(temp2,temp,temp2);
4132 emit_cmove_reg(temp2h,th);
4133 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4134 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4135 emit_orreq(temp2,tl,tl);
4136 emit_orrne(temp2,th,th);
4137 }
4138 if (opcode[i]==0x1B) { // LDR
4139 emit_xorimm(temp,24,temp);
4140 emit_rsbimm(temp,32,HOST_TEMPREG);
4141 emit_shr(temp2,temp,temp2);
4142 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4143 emit_movimm(-1,HOST_TEMPREG);
4144 emit_shr(temp2h,temp,temp2h);
4145 emit_cmovne_reg(temp2,tl);
4146 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4147 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4148 emit_orrne(temp2h,th,th);
4149 emit_orreq(temp2h,tl,tl);
4150 }
4151 }
4152 }
4153}
4154#define loadlr_assemble loadlr_assemble_arm
4155
4156void cop0_assemble(int i,struct regstat *i_regs)
4157{
4158 if(opcode2[i]==0) // MFC0
4159 {
4160 signed char t=get_reg(i_regs->regmap,rt1[i]);
4161 char copr=(source[i]>>11)&0x1f;
4162 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4163 if(t>=0&&rt1[i]!=0) {
7139f3c8 4164#ifdef MUPEN64
57871462 4165 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4166 emit_movimm((source[i]>>11)&0x1f,1);
4167 emit_writeword(0,(int)&PC);
4168 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4169 if(copr==9) {
4170 emit_readword((int)&last_count,ECX);
4171 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4172 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4173 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4174 emit_writeword(HOST_CCREG,(int)&Count);
4175 }
4176 emit_call((int)MFC0);
4177 emit_readword((int)&readmem_dword,t);
7139f3c8 4178#else
4179 emit_readword((int)&reg_cop0+copr*4,t);
4180#endif
57871462 4181 }
4182 }
4183 else if(opcode2[i]==4) // MTC0
4184 {
4185 signed char s=get_reg(i_regs->regmap,rs1[i]);
4186 char copr=(source[i]>>11)&0x1f;
4187 assert(s>=0);
63cb0298 4188#ifdef MUPEN64
57871462 4189 emit_writeword(s,(int)&readmem_dword);
4190 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4191 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4192 emit_movimm((source[i]>>11)&0x1f,1);
4193 emit_writeword(0,(int)&PC);
4194 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4195#else
4196 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4197#endif
4198 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4199 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4200 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4201 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4202 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4203 emit_writeword(HOST_CCREG,(int)&Count);
4204 }
4205 // What a mess. The status register (12) can enable interrupts,
4206 // so needs a special case to handle a pending interrupt.
4207 // The interrupt must be taken immediately, because a subsequent
4208 // instruction might disable interrupts again.
7139f3c8 4209 if(copr==12||copr==13) {
fca1aef2 4210#ifdef PCSX
4211 if (is_delayslot) {
4212 // burn cycles to cause cc_interrupt, which will
4213 // reschedule next_interupt. Relies on CCREG from above.
4214 assem_debug("MTC0 DS %d\n", copr);
4215 emit_writeword(HOST_CCREG,(int)&last_count);
4216 emit_movimm(0,HOST_CCREG);
4217 emit_storereg(CCREG,HOST_CCREG);
63cb0298 4218 if(s!=1)
4219 emit_mov(s,1);
fca1aef2 4220 emit_movimm(copr,0);
4221 emit_call((int)pcsx_mtc0_ds);
4222 return;
4223 }
4224#endif
63cb0298 4225 emit_movimm(start+i*4+4,HOST_TEMPREG);
4226 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4227 emit_movimm(0,HOST_TEMPREG);
4228 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4229 }
4230 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4231 //else
fca1aef2 4232#ifdef PCSX
63cb0298 4233 if(s!=1)
4234 emit_mov(s,1);
fca1aef2 4235 emit_movimm(copr,0);
4236 emit_call((int)pcsx_mtc0);
4237#else
57871462 4238 emit_call((int)MTC0);
fca1aef2 4239#endif
7139f3c8 4240 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4241 emit_readword((int)&Count,HOST_CCREG);
4242 emit_readword((int)&next_interupt,ECX);
2573466a 4243 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4244 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4245 emit_writeword(ECX,(int)&last_count);
4246 emit_storereg(CCREG,HOST_CCREG);
4247 }
7139f3c8 4248 if(copr==12||copr==13) {
57871462 4249 assert(!is_delayslot);
4250 emit_readword((int)&pending_exception,14);
4251 }
4252 emit_loadreg(rs1[i],s);
4253 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4254 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4255 if(copr==12||copr==13) {
57871462 4256 emit_test(14,14);
4257 emit_jne((int)&do_interrupt);
4258 }
4259 cop1_usable=0;
4260 }
4261 else
4262 {
4263 assert(opcode2[i]==0x10);
3d624f89 4264#ifndef DISABLE_TLB
57871462 4265 if((source[i]&0x3f)==0x01) // TLBR
4266 emit_call((int)TLBR);
4267 if((source[i]&0x3f)==0x02) // TLBWI
4268 emit_call((int)TLBWI_new);
4269 if((source[i]&0x3f)==0x06) { // TLBWR
4270 // The TLB entry written by TLBWR is dependent on the count,
4271 // so update the cycle count
4272 emit_readword((int)&last_count,ECX);
4273 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4274 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4275 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4276 emit_writeword(HOST_CCREG,(int)&Count);
4277 emit_call((int)TLBWR_new);
4278 }
4279 if((source[i]&0x3f)==0x08) // TLBP
4280 emit_call((int)TLBP);
3d624f89 4281#endif
576bbd8f 4282#ifdef PCSX
4283 if((source[i]&0x3f)==0x10) // RFE
4284 {
4285 emit_readword((int)&Status,0);
4286 emit_andimm(0,0x3c,1);
4287 emit_andimm(0,~0xf,0);
4288 emit_orrshr_imm(1,2,0);
4289 emit_writeword(0,(int)&Status);
4290 }
4291#else
57871462 4292 if((source[i]&0x3f)==0x18) // ERET
4293 {
4294 int count=ccadj[i];
4295 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4296 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4297 emit_jmp((int)jump_eret);
4298 }
576bbd8f 4299#endif
57871462 4300 }
4301}
4302
b9b61529 4303static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4304{
4305 switch (copr) {
4306 case 1:
4307 case 3:
4308 case 5:
4309 case 8:
4310 case 9:
4311 case 10:
4312 case 11:
4313 emit_readword((int)&reg_cop2d[copr],tl);
4314 emit_signextend16(tl,tl);
4315 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4316 break;
4317 case 7:
4318 case 16:
4319 case 17:
4320 case 18:
4321 case 19:
4322 emit_readword((int)&reg_cop2d[copr],tl);
4323 emit_andimm(tl,0xffff,tl);
4324 emit_writeword(tl,(int)&reg_cop2d[copr]);
4325 break;
4326 case 15:
4327 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4328 emit_writeword(tl,(int)&reg_cop2d[copr]);
4329 break;
4330 case 28:
b9b61529 4331 case 29:
4332 emit_readword((int)&reg_cop2d[9],temp);
4333 emit_testimm(temp,0x8000); // do we need this?
4334 emit_andimm(temp,0xf80,temp);
4335 emit_andne_imm(temp,0,temp);
f70d384d 4336 emit_shrimm(temp,7,tl);
b9b61529 4337 emit_readword((int)&reg_cop2d[10],temp);
4338 emit_testimm(temp,0x8000);
4339 emit_andimm(temp,0xf80,temp);
4340 emit_andne_imm(temp,0,temp);
f70d384d 4341 emit_orrshr_imm(temp,2,tl);
b9b61529 4342 emit_readword((int)&reg_cop2d[11],temp);
4343 emit_testimm(temp,0x8000);
4344 emit_andimm(temp,0xf80,temp);
4345 emit_andne_imm(temp,0,temp);
f70d384d 4346 emit_orrshl_imm(temp,3,tl);
b9b61529 4347 emit_writeword(tl,(int)&reg_cop2d[copr]);
4348 break;
4349 default:
4350 emit_readword((int)&reg_cop2d[copr],tl);
4351 break;
4352 }
4353}
4354
4355static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4356{
4357 switch (copr) {
4358 case 15:
4359 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4360 emit_writeword(sl,(int)&reg_cop2d[copr]);
4361 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4362 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4363 emit_writeword(sl,(int)&reg_cop2d[14]);
4364 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4365 break;
4366 case 28:
4367 emit_andimm(sl,0x001f,temp);
f70d384d 4368 emit_shlimm(temp,7,temp);
b9b61529 4369 emit_writeword(temp,(int)&reg_cop2d[9]);
4370 emit_andimm(sl,0x03e0,temp);
f70d384d 4371 emit_shlimm(temp,2,temp);
b9b61529 4372 emit_writeword(temp,(int)&reg_cop2d[10]);
4373 emit_andimm(sl,0x7c00,temp);
f70d384d 4374 emit_shrimm(temp,3,temp);
b9b61529 4375 emit_writeword(temp,(int)&reg_cop2d[11]);
4376 emit_writeword(sl,(int)&reg_cop2d[28]);
4377 break;
4378 case 30:
4379 emit_movs(sl,temp);
4380 emit_mvnmi(temp,temp);
4381 emit_clz(temp,temp);
4382 emit_writeword(sl,(int)&reg_cop2d[30]);
4383 emit_writeword(temp,(int)&reg_cop2d[31]);
4384 break;
b9b61529 4385 case 31:
4386 break;
4387 default:
4388 emit_writeword(sl,(int)&reg_cop2d[copr]);
4389 break;
4390 }
4391}
4392
4393void cop2_assemble(int i,struct regstat *i_regs)
4394{
4395 u_int copr=(source[i]>>11)&0x1f;
4396 signed char temp=get_reg(i_regs->regmap,-1);
4397 if (opcode2[i]==0) { // MFC2
4398 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4399 if(tl>=0&&rt1[i]!=0)
b9b61529 4400 cop2_get_dreg(copr,tl,temp);
4401 }
4402 else if (opcode2[i]==4) { // MTC2
4403 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4404 cop2_put_dreg(copr,sl,temp);
4405 }
4406 else if (opcode2[i]==2) // CFC2
4407 {
4408 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4409 if(tl>=0&&rt1[i]!=0)
b9b61529 4410 emit_readword((int)&reg_cop2c[copr],tl);
4411 }
4412 else if (opcode2[i]==6) // CTC2
4413 {
4414 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4415 switch(copr) {
4416 case 4:
4417 case 12:
4418 case 20:
4419 case 26:
4420 case 27:
4421 case 29:
4422 case 30:
4423 emit_signextend16(sl,temp);
4424 break;
4425 case 31:
4426 //value = value & 0x7ffff000;
4427 //if (value & 0x7f87e000) value |= 0x80000000;
4428 emit_shrimm(sl,12,temp);
4429 emit_shlimm(temp,12,temp);
4430 emit_testimm(temp,0x7f000000);
4431 emit_testeqimm(temp,0x00870000);
4432 emit_testeqimm(temp,0x0000e000);
4433 emit_orrne_imm(temp,0x80000000,temp);
4434 break;
4435 default:
4436 temp=sl;
4437 break;
4438 }
4439 emit_writeword(temp,(int)&reg_cop2c[copr]);
4440 assert(sl>=0);
4441 }
4442}
4443
054175e9 4444static void c2op_prologue(u_int op,u_int reglist)
4445{
4446 save_regs_all(reglist);
82ed88eb 4447#ifdef PCNT
4448 emit_movimm(op,0);
4449 emit_call((int)pcnt_gte_start);
4450#endif
054175e9 4451 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4452}
4453
4454static void c2op_epilogue(u_int op,u_int reglist)
4455{
82ed88eb 4456#ifdef PCNT
4457 emit_movimm(op,0);
4458 emit_call((int)pcnt_gte_end);
4459#endif
054175e9 4460 restore_regs_all(reglist);
4461}
4462
6c0eefaf 4463static void c2op_call_MACtoIR(int lm,int need_flags)
4464{
4465 if(need_flags)
4466 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4467 else
4468 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4469}
4470
4471static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4472{
4473 emit_call((int)func);
4474 // func is C code and trashes r0
4475 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4476 if(need_flags||need_ir)
4477 c2op_call_MACtoIR(lm,need_flags);
4478 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4479}
4480
054175e9 4481static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4482{
4483 signed char temp=get_reg(i_regs->regmap,-1);
4484 u_int c2op=source[i]&0x3f;
6c0eefaf 4485 u_int hr,reglist_full=0,reglist;
054175e9 4486 int need_flags,need_ir;
b9b61529 4487 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4488 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4489 }
6c0eefaf 4490 reglist=reglist_full&0x100f;
b9b61529 4491
4492 if (gte_handlers[c2op]!=NULL) {
bedfea38 4493 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4494 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
4495 assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n",
4496 gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4497 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4498 need_flags=0;
6c0eefaf 4499 int shift = (source[i] >> 19) & 1;
4500 int lm = (source[i] >> 10) & 1;
054175e9 4501 switch(c2op) {
4502 case GTE_MVMVA: {
054175e9 4503 int v = (source[i] >> 15) & 3;
4504 int cv = (source[i] >> 13) & 3;
4505 int mx = (source[i] >> 17) & 3;
6c0eefaf 4506 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4507 c2op_prologue(c2op,reglist);
4508 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4509 if(v<3)
4510 emit_ldrd(v*8,0,4);
4511 else {
4512 emit_movzwl_indexed(9*4,0,4); // gteIR
4513 emit_movzwl_indexed(10*4,0,6);
4514 emit_movzwl_indexed(11*4,0,5);
4515 emit_orrshl_imm(6,16,4);
4516 }
4517 if(mx<3)
4518 emit_addimm(0,32*4+mx*8*4,6);
4519 else
4520 emit_readword((int)&zeromem_ptr,6);
4521 if(cv<3)
4522 emit_addimm(0,32*4+(cv*8+5)*4,7);
4523 else
4524 emit_readword((int)&zeromem_ptr,7);
4525#ifdef __ARM_NEON__
4526 emit_movimm(source[i],1); // opcode
4527 emit_call((int)gteMVMVA_part_neon);
4528 if(need_flags) {
4529 emit_movimm(lm,1);
4530 emit_call((int)gteMACtoIR_flags_neon);
4531 }
4532#else
4533 if(cv==3&&shift)
4534 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4535 else {
4536 emit_movimm(shift,1);
4537 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4538 }
6c0eefaf 4539 if(need_flags||need_ir)
4540 c2op_call_MACtoIR(lm,need_flags);
054175e9 4541#endif
4542 break;
4543 }
6c0eefaf 4544 case GTE_OP:
4545 c2op_prologue(c2op,reglist);
4546 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4547 if(need_flags||need_ir) {
4548 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4549 c2op_call_MACtoIR(lm,need_flags);
4550 }
4551 break;
4552 case GTE_DPCS:
4553 c2op_prologue(c2op,reglist);
4554 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4555 break;
4556 case GTE_INTPL:
4557 c2op_prologue(c2op,reglist);
4558 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4559 break;
4560 case GTE_SQR:
4561 c2op_prologue(c2op,reglist);
4562 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4563 if(need_flags||need_ir) {
4564 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4565 c2op_call_MACtoIR(lm,need_flags);
4566 }
4567 break;
4568 case GTE_DCPL:
4569 c2op_prologue(c2op,reglist);
4570 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4571 break;
4572 case GTE_GPF:
4573 c2op_prologue(c2op,reglist);
4574 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4575 break;
4576 case GTE_GPL:
4577 c2op_prologue(c2op,reglist);
4578 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4579 break;
b9b61529 4580
054175e9 4581 default:
054175e9 4582 c2op_prologue(c2op,reglist);
6c0eefaf 4583 //emit_movimm(source[i],1); // opcode
4584 //emit_writeword(1,(int)&psxRegs.code);
054175e9 4585 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4586 break;
4587 }
4588 c2op_epilogue(c2op,reglist);
4589 }
b9b61529 4590}
4591
4592void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4593{
4594 // XXX: should just just do the exception instead
4595 if(!cop1_usable) {
4596 int jaddr=(int)out;
4597 emit_jmp(0);
4598 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4599 cop1_usable=1;
4600 }
4601}
4602
57871462 4603void cop1_assemble(int i,struct regstat *i_regs)
4604{
3d624f89 4605#ifndef DISABLE_COP1
57871462 4606 // Check cop1 unusable
4607 if(!cop1_usable) {
4608 signed char rs=get_reg(i_regs->regmap,CSREG);
4609 assert(rs>=0);
4610 emit_testimm(rs,0x20000000);
4611 int jaddr=(int)out;
4612 emit_jeq(0);
4613 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4614 cop1_usable=1;
4615 }
4616 if (opcode2[i]==0) { // MFC1
4617 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4618 if(tl>=0) {
4619 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4620 emit_readword_indexed(0,tl,tl);
4621 }
4622 }
4623 else if (opcode2[i]==1) { // DMFC1
4624 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4625 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4626 if(tl>=0) {
4627 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4628 if(th>=0) emit_readword_indexed(4,tl,th);
4629 emit_readword_indexed(0,tl,tl);
4630 }
4631 }
4632 else if (opcode2[i]==4) { // MTC1
4633 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4634 signed char temp=get_reg(i_regs->regmap,-1);
4635 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4636 emit_writeword_indexed(sl,0,temp);
4637 }
4638 else if (opcode2[i]==5) { // DMTC1
4639 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4640 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4641 signed char temp=get_reg(i_regs->regmap,-1);
4642 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4643 emit_writeword_indexed(sh,4,temp);
4644 emit_writeword_indexed(sl,0,temp);
4645 }
4646 else if (opcode2[i]==2) // CFC1
4647 {
4648 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4649 if(tl>=0) {
4650 u_int copr=(source[i]>>11)&0x1f;
4651 if(copr==0) emit_readword((int)&FCR0,tl);
4652 if(copr==31) emit_readword((int)&FCR31,tl);
4653 }
4654 }
4655 else if (opcode2[i]==6) // CTC1
4656 {
4657 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4658 u_int copr=(source[i]>>11)&0x1f;
4659 assert(sl>=0);
4660 if(copr==31)
4661 {
4662 emit_writeword(sl,(int)&FCR31);
4663 // Set the rounding mode
4664 //FIXME
4665 //char temp=get_reg(i_regs->regmap,-1);
4666 //emit_andimm(sl,3,temp);
4667 //emit_fldcw_indexed((int)&rounding_modes,temp);
4668 }
4669 }
3d624f89 4670#else
4671 cop1_unusable(i, i_regs);
4672#endif
57871462 4673}
4674
4675void fconv_assemble_arm(int i,struct regstat *i_regs)
4676{
3d624f89 4677#ifndef DISABLE_COP1
57871462 4678 signed char temp=get_reg(i_regs->regmap,-1);
4679 assert(temp>=0);
4680 // Check cop1 unusable
4681 if(!cop1_usable) {
4682 signed char rs=get_reg(i_regs->regmap,CSREG);
4683 assert(rs>=0);
4684 emit_testimm(rs,0x20000000);
4685 int jaddr=(int)out;
4686 emit_jeq(0);
4687 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4688 cop1_usable=1;
4689 }
4690
4691 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4692 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4693 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4694 emit_flds(temp,15);
4695 emit_ftosizs(15,15); // float->int, truncate
4696 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4697 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4698 emit_fsts(15,temp);
4699 return;
4700 }
4701 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4702 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4703 emit_vldr(temp,7);
4704 emit_ftosizd(7,13); // double->int, truncate
4705 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4706 emit_fsts(13,temp);
4707 return;
4708 }
4709
4710 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4711 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4712 emit_flds(temp,13);
4713 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4714 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4715 emit_fsitos(13,15);
4716 emit_fsts(15,temp);
4717 return;
4718 }
4719 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4720 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4721 emit_flds(temp,13);
4722 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4723 emit_fsitod(13,7);
4724 emit_vstr(7,temp);
4725 return;
4726 }
4727
4728 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4729 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4730 emit_flds(temp,13);
4731 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4732 emit_fcvtds(13,7);
4733 emit_vstr(7,temp);
4734 return;
4735 }
4736 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4737 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4738 emit_vldr(temp,7);
4739 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4740 emit_fcvtsd(7,13);
4741 emit_fsts(13,temp);
4742 return;
4743 }
4744 #endif
4745
4746 // C emulation code
4747
4748 u_int hr,reglist=0;
4749 for(hr=0;hr<HOST_REGS;hr++) {
4750 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4751 }
4752 save_regs(reglist);
4753
4754 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4755 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4756 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4757 emit_call((int)cvt_s_w);
4758 }
4759 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4760 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4761 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4762 emit_call((int)cvt_d_w);
4763 }
4764 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4765 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4766 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4767 emit_call((int)cvt_s_l);
4768 }
4769 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4770 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4771 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4772 emit_call((int)cvt_d_l);
4773 }
4774
4775 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4776 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4777 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4778 emit_call((int)cvt_d_s);
4779 }
4780 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4781 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4782 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4783 emit_call((int)cvt_w_s);
4784 }
4785 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4786 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4787 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4788 emit_call((int)cvt_l_s);
4789 }
4790
4791 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4792 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4793 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4794 emit_call((int)cvt_s_d);
4795 }
4796 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4797 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4798 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4799 emit_call((int)cvt_w_d);
4800 }
4801 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4802 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4803 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4804 emit_call((int)cvt_l_d);
4805 }
4806
4807 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4808 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4809 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4810 emit_call((int)round_l_s);
4811 }
4812 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4813 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4814 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4815 emit_call((int)trunc_l_s);
4816 }
4817 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4818 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4819 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4820 emit_call((int)ceil_l_s);
4821 }
4822 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4823 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4824 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4825 emit_call((int)floor_l_s);
4826 }
4827 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4828 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4829 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4830 emit_call((int)round_w_s);
4831 }
4832 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4833 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4834 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4835 emit_call((int)trunc_w_s);
4836 }
4837 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4838 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4839 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4840 emit_call((int)ceil_w_s);
4841 }
4842 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4843 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4844 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4845 emit_call((int)floor_w_s);
4846 }
4847
4848 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4849 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4850 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4851 emit_call((int)round_l_d);
4852 }
4853 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4854 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4855 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4856 emit_call((int)trunc_l_d);
4857 }
4858 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4859 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4860 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4861 emit_call((int)ceil_l_d);
4862 }
4863 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4864 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4865 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4866 emit_call((int)floor_l_d);
4867 }
4868 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4869 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4870 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4871 emit_call((int)round_w_d);
4872 }
4873 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4874 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4875 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4876 emit_call((int)trunc_w_d);
4877 }
4878 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4879 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4880 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4881 emit_call((int)ceil_w_d);
4882 }
4883 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4884 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4885 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4886 emit_call((int)floor_w_d);
4887 }
4888
4889 restore_regs(reglist);
3d624f89 4890#else
4891 cop1_unusable(i, i_regs);
4892#endif
57871462 4893}
4894#define fconv_assemble fconv_assemble_arm
4895
4896void fcomp_assemble(int i,struct regstat *i_regs)
4897{
3d624f89 4898#ifndef DISABLE_COP1
57871462 4899 signed char fs=get_reg(i_regs->regmap,FSREG);
4900 signed char temp=get_reg(i_regs->regmap,-1);
4901 assert(temp>=0);
4902 // Check cop1 unusable
4903 if(!cop1_usable) {
4904 signed char cs=get_reg(i_regs->regmap,CSREG);
4905 assert(cs>=0);
4906 emit_testimm(cs,0x20000000);
4907 int jaddr=(int)out;
4908 emit_jeq(0);
4909 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4910 cop1_usable=1;
4911 }
4912
4913 if((source[i]&0x3f)==0x30) {
4914 emit_andimm(fs,~0x800000,fs);
4915 return;
4916 }
4917
4918 if((source[i]&0x3e)==0x38) {
4919 // sf/ngle - these should throw exceptions for NaNs
4920 emit_andimm(fs,~0x800000,fs);
4921 return;
4922 }
4923
4924 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4925 if(opcode2[i]==0x10) {
4926 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4927 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4928 emit_orimm(fs,0x800000,fs);
4929 emit_flds(temp,14);
4930 emit_flds(HOST_TEMPREG,15);
4931 emit_fcmps(14,15);
4932 emit_fmstat();
4933 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4934 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4935 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4936 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4937 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4938 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4939 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4940 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4941 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4942 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4943 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4944 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4945 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4946 return;
4947 }
4948 if(opcode2[i]==0x11) {
4949 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4950 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4951 emit_orimm(fs,0x800000,fs);
4952 emit_vldr(temp,6);
4953 emit_vldr(HOST_TEMPREG,7);
4954 emit_fcmpd(6,7);
4955 emit_fmstat();
4956 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4957 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4958 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4959 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4960 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4961 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4962 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4963 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4964 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4965 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4966 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4967 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4968 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4969 return;
4970 }
4971 #endif
4972
4973 // C only
4974
4975 u_int hr,reglist=0;
4976 for(hr=0;hr<HOST_REGS;hr++) {
4977 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4978 }
4979 reglist&=~(1<<fs);
4980 save_regs(reglist);
4981 if(opcode2[i]==0x10) {
4982 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4983 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4984 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4985 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4986 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4987 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4988 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4989 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4990 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4991 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4992 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4993 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4994 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4995 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
4996 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
4997 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
4998 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
4999 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5000 }
5001 if(opcode2[i]==0x11) {
5002 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5003 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5004 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5005 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5006 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5007 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5008 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5009 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5010 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5011 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5012 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5013 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5014 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5015 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5016 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5017 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5018 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5019 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5020 }
5021 restore_regs(reglist);
5022 emit_loadreg(FSREG,fs);
3d624f89 5023#else
5024 cop1_unusable(i, i_regs);
5025#endif
57871462 5026}
5027
5028void float_assemble(int i,struct regstat *i_regs)
5029{
3d624f89 5030#ifndef DISABLE_COP1
57871462 5031 signed char temp=get_reg(i_regs->regmap,-1);
5032 assert(temp>=0);
5033 // Check cop1 unusable
5034 if(!cop1_usable) {
5035 signed char cs=get_reg(i_regs->regmap,CSREG);
5036 assert(cs>=0);
5037 emit_testimm(cs,0x20000000);
5038 int jaddr=(int)out;
5039 emit_jeq(0);
5040 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5041 cop1_usable=1;
5042 }
5043
5044 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5045 if((source[i]&0x3f)==6) // mov
5046 {
5047 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5048 if(opcode2[i]==0x10) {
5049 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5050 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5051 emit_readword_indexed(0,temp,temp);
5052 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5053 }
5054 if(opcode2[i]==0x11) {
5055 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5056 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5057 emit_vldr(temp,7);
5058 emit_vstr(7,HOST_TEMPREG);
5059 }
5060 }
5061 return;
5062 }
5063
5064 if((source[i]&0x3f)>3)
5065 {
5066 if(opcode2[i]==0x10) {
5067 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5068 emit_flds(temp,15);
5069 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5070 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5071 }
5072 if((source[i]&0x3f)==4) // sqrt
5073 emit_fsqrts(15,15);
5074 if((source[i]&0x3f)==5) // abs
5075 emit_fabss(15,15);
5076 if((source[i]&0x3f)==7) // neg
5077 emit_fnegs(15,15);
5078 emit_fsts(15,temp);
5079 }
5080 if(opcode2[i]==0x11) {
5081 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5082 emit_vldr(temp,7);
5083 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5084 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5085 }
5086 if((source[i]&0x3f)==4) // sqrt
5087 emit_fsqrtd(7,7);
5088 if((source[i]&0x3f)==5) // abs
5089 emit_fabsd(7,7);
5090 if((source[i]&0x3f)==7) // neg
5091 emit_fnegd(7,7);
5092 emit_vstr(7,temp);
5093 }
5094 return;
5095 }
5096 if((source[i]&0x3f)<4)
5097 {
5098 if(opcode2[i]==0x10) {
5099 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5100 }
5101 if(opcode2[i]==0x11) {
5102 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5103 }
5104 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5105 if(opcode2[i]==0x10) {
5106 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5107 emit_flds(temp,15);
5108 emit_flds(HOST_TEMPREG,13);
5109 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5110 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5111 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5112 }
5113 }
5114 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5115 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5116 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5117 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5118 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5119 emit_fsts(15,HOST_TEMPREG);
5120 }else{
5121 emit_fsts(15,temp);
5122 }
5123 }
5124 else if(opcode2[i]==0x11) {
5125 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5126 emit_vldr(temp,7);
5127 emit_vldr(HOST_TEMPREG,6);
5128 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5129 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5130 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5131 }
5132 }
5133 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5134 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5135 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5136 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5137 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5138 emit_vstr(7,HOST_TEMPREG);
5139 }else{
5140 emit_vstr(7,temp);
5141 }
5142 }
5143 }
5144 else {
5145 if(opcode2[i]==0x10) {
5146 emit_flds(temp,15);
5147 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5148 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5149 }
5150 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5151 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5152 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5153 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5154 emit_fsts(15,temp);
5155 }
5156 else if(opcode2[i]==0x11) {
5157 emit_vldr(temp,7);
5158 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5159 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5160 }
5161 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5162 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5163 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5164 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5165 emit_vstr(7,temp);
5166 }
5167 }
5168 return;
5169 }
5170 #endif
5171
5172 u_int hr,reglist=0;
5173 for(hr=0;hr<HOST_REGS;hr++) {
5174 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5175 }
5176 if(opcode2[i]==0x10) { // Single precision
5177 save_regs(reglist);
5178 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5179 if((source[i]&0x3f)<4) {
5180 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5181 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5182 }else{
5183 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5184 }
5185 switch(source[i]&0x3f)
5186 {
5187 case 0x00: emit_call((int)add_s);break;
5188 case 0x01: emit_call((int)sub_s);break;
5189 case 0x02: emit_call((int)mul_s);break;
5190 case 0x03: emit_call((int)div_s);break;
5191 case 0x04: emit_call((int)sqrt_s);break;
5192 case 0x05: emit_call((int)abs_s);break;
5193 case 0x06: emit_call((int)mov_s);break;
5194 case 0x07: emit_call((int)neg_s);break;
5195 }
5196 restore_regs(reglist);
5197 }
5198 if(opcode2[i]==0x11) { // Double precision
5199 save_regs(reglist);
5200 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5201 if((source[i]&0x3f)<4) {
5202 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5203 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5204 }else{
5205 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5206 }
5207 switch(source[i]&0x3f)
5208 {
5209 case 0x00: emit_call((int)add_d);break;
5210 case 0x01: emit_call((int)sub_d);break;
5211 case 0x02: emit_call((int)mul_d);break;
5212 case 0x03: emit_call((int)div_d);break;
5213 case 0x04: emit_call((int)sqrt_d);break;
5214 case 0x05: emit_call((int)abs_d);break;
5215 case 0x06: emit_call((int)mov_d);break;
5216 case 0x07: emit_call((int)neg_d);break;
5217 }
5218 restore_regs(reglist);
5219 }
3d624f89 5220#else
5221 cop1_unusable(i, i_regs);
5222#endif
57871462 5223}
5224
5225void multdiv_assemble_arm(int i,struct regstat *i_regs)
5226{
5227 // case 0x18: MULT
5228 // case 0x19: MULTU
5229 // case 0x1A: DIV
5230 // case 0x1B: DIVU
5231 // case 0x1C: DMULT
5232 // case 0x1D: DMULTU
5233 // case 0x1E: DDIV
5234 // case 0x1F: DDIVU
5235 if(rs1[i]&&rs2[i])
5236 {
5237 if((opcode2[i]&4)==0) // 32-bit
5238 {
5239 if(opcode2[i]==0x18) // MULT
5240 {
5241 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5242 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5243 signed char hi=get_reg(i_regs->regmap,HIREG);
5244 signed char lo=get_reg(i_regs->regmap,LOREG);
5245 assert(m1>=0);
5246 assert(m2>=0);
5247 assert(hi>=0);
5248 assert(lo>=0);
5249 emit_smull(m1,m2,hi,lo);
5250 }
5251 if(opcode2[i]==0x19) // MULTU
5252 {
5253 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5254 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5255 signed char hi=get_reg(i_regs->regmap,HIREG);
5256 signed char lo=get_reg(i_regs->regmap,LOREG);
5257 assert(m1>=0);
5258 assert(m2>=0);
5259 assert(hi>=0);
5260 assert(lo>=0);
5261 emit_umull(m1,m2,hi,lo);
5262 }
5263 if(opcode2[i]==0x1A) // DIV
5264 {
5265 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5266 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5267 assert(d1>=0);
5268 assert(d2>=0);
5269 signed char quotient=get_reg(i_regs->regmap,LOREG);
5270 signed char remainder=get_reg(i_regs->regmap,HIREG);
5271 assert(quotient>=0);
5272 assert(remainder>=0);
5273 emit_movs(d1,remainder);
44a80f6a 5274 emit_movimm(0xffffffff,quotient);
5275 emit_negmi(quotient,quotient); // .. quotient and ..
5276 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5277 emit_movs(d2,HOST_TEMPREG);
5278 emit_jeq((int)out+52); // Division by zero
5279 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5280 emit_clz(HOST_TEMPREG,quotient);
5281 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5282 emit_orimm(quotient,1<<31,quotient);
5283 emit_shr(quotient,quotient,quotient);
5284 emit_cmp(remainder,HOST_TEMPREG);
5285 emit_subcs(remainder,HOST_TEMPREG,remainder);
5286 emit_adcs(quotient,quotient,quotient);
5287 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5288 emit_jcc((int)out-16); // -4
5289 emit_teq(d1,d2);
5290 emit_negmi(quotient,quotient);
5291 emit_test(d1,d1);
5292 emit_negmi(remainder,remainder);
5293 }
5294 if(opcode2[i]==0x1B) // DIVU
5295 {
5296 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5297 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5298 assert(d1>=0);
5299 assert(d2>=0);
5300 signed char quotient=get_reg(i_regs->regmap,LOREG);
5301 signed char remainder=get_reg(i_regs->regmap,HIREG);
5302 assert(quotient>=0);
5303 assert(remainder>=0);
44a80f6a 5304 emit_mov(d1,remainder);
5305 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5306 emit_test(d2,d2);
44a80f6a 5307 emit_jeq((int)out+40); // Division by zero
57871462 5308 emit_clz(d2,HOST_TEMPREG);
5309 emit_movimm(1<<31,quotient);
5310 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5311 emit_shr(quotient,HOST_TEMPREG,quotient);
5312 emit_cmp(remainder,d2);
5313 emit_subcs(remainder,d2,remainder);
5314 emit_adcs(quotient,quotient,quotient);
5315 emit_shrcc_imm(d2,1,d2);
5316 emit_jcc((int)out-16); // -4
5317 }
5318 }
5319 else // 64-bit
4600ba03 5320#ifndef FORCE32
57871462 5321 {
5322 if(opcode2[i]==0x1C) // DMULT
5323 {
5324 assert(opcode2[i]!=0x1C);
5325 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5326 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5327 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5328 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5329 assert(m1h>=0);
5330 assert(m2h>=0);
5331 assert(m1l>=0);
5332 assert(m2l>=0);
5333 emit_pushreg(m2h);
5334 emit_pushreg(m2l);
5335 emit_pushreg(m1h);
5336 emit_pushreg(m1l);
5337 emit_call((int)&mult64);
5338 emit_popreg(m1l);
5339 emit_popreg(m1h);
5340 emit_popreg(m2l);
5341 emit_popreg(m2h);
5342 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5343 signed char hil=get_reg(i_regs->regmap,HIREG);
5344 if(hih>=0) emit_loadreg(HIREG|64,hih);
5345 if(hil>=0) emit_loadreg(HIREG,hil);
5346 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5347 signed char lol=get_reg(i_regs->regmap,LOREG);
5348 if(loh>=0) emit_loadreg(LOREG|64,loh);
5349 if(lol>=0) emit_loadreg(LOREG,lol);
5350 }
5351 if(opcode2[i]==0x1D) // DMULTU
5352 {
5353 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5354 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5355 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5356 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5357 assert(m1h>=0);
5358 assert(m2h>=0);
5359 assert(m1l>=0);
5360 assert(m2l>=0);
5361 save_regs(0x100f);
5362 if(m1l!=0) emit_mov(m1l,0);
5363 if(m1h==0) emit_readword((int)&dynarec_local,1);
5364 else if(m1h>1) emit_mov(m1h,1);
5365 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5366 else if(m2l>2) emit_mov(m2l,2);
5367 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5368 else if(m2h>3) emit_mov(m2h,3);
5369 emit_call((int)&multu64);
5370 restore_regs(0x100f);
5371 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5372 signed char hil=get_reg(i_regs->regmap,HIREG);
5373 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5374 signed char lol=get_reg(i_regs->regmap,LOREG);
5375 /*signed char temp=get_reg(i_regs->regmap,-1);
5376 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5377 signed char rl=get_reg(i_regs->regmap,HIREG);
5378 assert(m1h>=0);
5379 assert(m2h>=0);
5380 assert(m1l>=0);
5381 assert(m2l>=0);
5382 assert(temp>=0);
5383 //emit_mov(m1l,EAX);
5384 //emit_mul(m2l);
5385 emit_umull(rl,rh,m1l,m2l);
5386 emit_storereg(LOREG,rl);
5387 emit_mov(rh,temp);
5388 //emit_mov(m1h,EAX);
5389 //emit_mul(m2l);
5390 emit_umull(rl,rh,m1h,m2l);
5391 emit_adds(rl,temp,temp);
5392 emit_adcimm(rh,0,rh);
5393 emit_storereg(HIREG,rh);
5394 //emit_mov(m2h,EAX);
5395 //emit_mul(m1l);
5396 emit_umull(rl,rh,m1l,m2h);
5397 emit_adds(rl,temp,temp);
5398 emit_adcimm(rh,0,rh);
5399 emit_storereg(LOREG|64,temp);
5400 emit_mov(rh,temp);
5401 //emit_mov(m2h,EAX);
5402 //emit_mul(m1h);
5403 emit_umull(rl,rh,m1h,m2h);
5404 emit_adds(rl,temp,rl);
5405 emit_loadreg(HIREG,temp);
5406 emit_adcimm(rh,0,rh);
5407 emit_adds(rl,temp,rl);
5408 emit_adcimm(rh,0,rh);
5409 // DEBUG
5410 /*
5411 emit_pushreg(m2h);
5412 emit_pushreg(m2l);
5413 emit_pushreg(m1h);
5414 emit_pushreg(m1l);
5415 emit_call((int)&multu64);
5416 emit_popreg(m1l);
5417 emit_popreg(m1h);
5418 emit_popreg(m2l);
5419 emit_popreg(m2h);
5420 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5421 signed char hil=get_reg(i_regs->regmap,HIREG);
5422 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5423 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5424 */
5425 // Shouldn't be necessary
5426 //char loh=get_reg(i_regs->regmap,LOREG|64);
5427 //char lol=get_reg(i_regs->regmap,LOREG);
5428 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5429 //if(lol>=0) emit_loadreg(LOREG,lol);
5430 }
5431 if(opcode2[i]==0x1E) // DDIV
5432 {
5433 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5434 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5435 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5436 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5437 assert(d1h>=0);
5438 assert(d2h>=0);
5439 assert(d1l>=0);
5440 assert(d2l>=0);
5441 save_regs(0x100f);
5442 if(d1l!=0) emit_mov(d1l,0);
5443 if(d1h==0) emit_readword((int)&dynarec_local,1);
5444 else if(d1h>1) emit_mov(d1h,1);
5445 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5446 else if(d2l>2) emit_mov(d2l,2);
5447 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5448 else if(d2h>3) emit_mov(d2h,3);
5449 emit_call((int)&div64);
5450 restore_regs(0x100f);
5451 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5452 signed char hil=get_reg(i_regs->regmap,HIREG);
5453 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5454 signed char lol=get_reg(i_regs->regmap,LOREG);
5455 if(hih>=0) emit_loadreg(HIREG|64,hih);
5456 if(hil>=0) emit_loadreg(HIREG,hil);
5457 if(loh>=0) emit_loadreg(LOREG|64,loh);
5458 if(lol>=0) emit_loadreg(LOREG,lol);
5459 }
5460 if(opcode2[i]==0x1F) // DDIVU
5461 {
5462 //u_int hr,reglist=0;
5463 //for(hr=0;hr<HOST_REGS;hr++) {
5464 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5465 //}
5466 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5467 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5468 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5469 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5470 assert(d1h>=0);
5471 assert(d2h>=0);
5472 assert(d1l>=0);
5473 assert(d2l>=0);
5474 save_regs(0x100f);
5475 if(d1l!=0) emit_mov(d1l,0);
5476 if(d1h==0) emit_readword((int)&dynarec_local,1);
5477 else if(d1h>1) emit_mov(d1h,1);
5478 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5479 else if(d2l>2) emit_mov(d2l,2);
5480 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5481 else if(d2h>3) emit_mov(d2h,3);
5482 emit_call((int)&divu64);
5483 restore_regs(0x100f);
5484 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5485 signed char hil=get_reg(i_regs->regmap,HIREG);
5486 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5487 signed char lol=get_reg(i_regs->regmap,LOREG);
5488 if(hih>=0) emit_loadreg(HIREG|64,hih);
5489 if(hil>=0) emit_loadreg(HIREG,hil);
5490 if(loh>=0) emit_loadreg(LOREG|64,loh);
5491 if(lol>=0) emit_loadreg(LOREG,lol);
5492 }
5493 }
4600ba03 5494#else
5495 assert(0);
5496#endif
57871462 5497 }
5498 else
5499 {
5500 // Multiply by zero is zero.
5501 // MIPS does not have a divide by zero exception.
5502 // The result is undefined, we return zero.
5503 signed char hr=get_reg(i_regs->regmap,HIREG);
5504 signed char lr=get_reg(i_regs->regmap,LOREG);
5505 if(hr>=0) emit_zeroreg(hr);
5506 if(lr>=0) emit_zeroreg(lr);
5507 }
5508}
5509#define multdiv_assemble multdiv_assemble_arm
5510
5511void do_preload_rhash(int r) {
5512 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5513 // register. On ARM the hash can be done with a single instruction (below)
5514}
5515
5516void do_preload_rhtbl(int ht) {
5517 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5518}
5519
5520void do_rhash(int rs,int rh) {
5521 emit_andimm(rs,0xf8,rh);
5522}
5523
5524void do_miniht_load(int ht,int rh) {
5525 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5526 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5527}
5528
5529void do_miniht_jump(int rs,int rh,int ht) {
5530 emit_cmp(rh,rs);
5531 emit_ldreq_indexed(ht,4,15);
5532 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5533 emit_mov(rs,7);
5534 emit_jmp(jump_vaddr_reg[7]);
5535 #else
5536 emit_jmp(jump_vaddr_reg[rs]);
5537 #endif
5538}
5539
5540void do_miniht_insert(u_int return_address,int rt,int temp) {
5541 #ifdef ARMv5_ONLY
5542 emit_movimm(return_address,rt); // PC into link register
5543 add_to_linker((int)out,return_address,1);
5544 emit_pcreladdr(temp);
5545 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5546 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5547 #else
5548 emit_movw(return_address&0x0000FFFF,rt);
5549 add_to_linker((int)out,return_address,1);
5550 emit_pcreladdr(temp);
5551 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5552 emit_movt(return_address&0xFFFF0000,rt);
5553 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5554 #endif
5555}
5556
5557// Sign-extend to 64 bits and write out upper half of a register
5558// This is useful where we have a 32-bit value in a register, and want to
5559// keep it in a 32-bit register, but can't guarantee that it won't be read
5560// as a 64-bit value later.
5561void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5562{
24385cae 5563#ifndef FORCE32
57871462 5564 if(is32_pre==is32) return;
5565 int hr,reg;
5566 for(hr=0;hr<HOST_REGS;hr++) {
5567 if(hr!=EXCLUDE_REG) {
5568 //if(pre[hr]==entry[hr]) {
5569 if((reg=pre[hr])>=0) {
5570 if((dirty>>hr)&1) {
5571 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5572 emit_sarimm(hr,31,HOST_TEMPREG);
5573 emit_storereg(reg|64,HOST_TEMPREG);
5574 }
5575 }
5576 }
5577 //}
5578 }
5579 }
24385cae 5580#endif
57871462 5581}
5582
5583void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5584{
5585 //if(dirty_pre==dirty) return;
5586 int hr,reg,new_hr;
5587 for(hr=0;hr<HOST_REGS;hr++) {
5588 if(hr!=EXCLUDE_REG) {
5589 reg=pre[hr];
5590 if(((~u)>>(reg&63))&1) {
f776eb14 5591 if(reg>0) {
57871462 5592 if(((dirty_pre&~dirty)>>hr)&1) {
5593 if(reg>0&&reg<34) {
5594 emit_storereg(reg,hr);
5595 if( ((is32_pre&~uu)>>reg)&1 ) {
5596 emit_sarimm(hr,31,HOST_TEMPREG);
5597 emit_storereg(reg|64,HOST_TEMPREG);
5598 }
5599 }
5600 else if(reg>=64) {
5601 emit_storereg(reg,hr);
5602 }
5603 }
5604 }
57871462 5605 }
5606 }
5607 }
5608}
5609
5610
5611/* using strd could possibly help but you'd have to allocate registers in pairs
5612void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5613{
5614 int hr;
5615 int wrote=-1;
5616 for(hr=HOST_REGS-1;hr>=0;hr--) {
5617 if(hr!=EXCLUDE_REG) {
5618 if(pre[hr]!=entry[hr]) {
5619 if(pre[hr]>=0) {
5620 if((dirty>>hr)&1) {
5621 if(get_reg(entry,pre[hr])<0) {
5622 if(pre[hr]<64) {
5623 if(!((u>>pre[hr])&1)) {
5624 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5625 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5626 emit_sarimm(hr,31,hr+1);
5627 emit_strdreg(pre[hr],hr);
5628 }
5629 else
5630 emit_storereg(pre[hr],hr);
5631 }else{
5632 emit_storereg(pre[hr],hr);
5633 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5634 emit_sarimm(hr,31,hr);
5635 emit_storereg(pre[hr]|64,hr);
5636 }
5637 }
5638 }
5639 }else{
5640 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5641 emit_storereg(pre[hr],hr);
5642 }
5643 }
5644 wrote=hr;
5645 }
5646 }
5647 }
5648 }
5649 }
5650 }
5651 for(hr=0;hr<HOST_REGS;hr++) {
5652 if(hr!=EXCLUDE_REG) {
5653 if(pre[hr]!=entry[hr]) {
5654 if(pre[hr]>=0) {
5655 int nr;
5656 if((nr=get_reg(entry,pre[hr]))>=0) {
5657 emit_mov(hr,nr);
5658 }
5659 }
5660 }
5661 }
5662 }
5663}
5664#define wb_invalidate wb_invalidate_arm
5665*/
5666
dd3a91a1 5667// Clearing the cache is rather slow on ARM Linux, so mark the areas
5668// that need to be cleared, and then only clear these areas once.
5669void do_clear_cache()
5670{
5671 int i,j;
5672 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5673 {
5674 u_int bitmap=needs_clear_cache[i];
5675 if(bitmap) {
5676 u_int start,end;
5677 for(j=0;j<32;j++)
5678 {
5679 if(bitmap&(1<<j)) {
5680 start=BASE_ADDR+i*131072+j*4096;
5681 end=start+4095;
5682 j++;
5683 while(j<32) {
5684 if(bitmap&(1<<j)) {
5685 end+=4096;
5686 j++;
5687 }else{
5688 __clear_cache((void *)start,(void *)end);
5689 break;
5690 }
5691 }
5692 }
5693 }
5694 needs_clear_cache[i]=0;
5695 }
5696 }
5697}
5698
57871462 5699// CPU-architecture-specific initialization
5700void arch_init() {
3d624f89 5701#ifndef DISABLE_COP1
57871462 5702 rounding_modes[0]=0x0<<22; // round
5703 rounding_modes[1]=0x3<<22; // trunc
5704 rounding_modes[2]=0x1<<22; // ceil
5705 rounding_modes[3]=0x2<<22; // floor
3d624f89 5706#endif
57871462 5707}
b9b61529 5708
5709// vim:shiftwidth=2:expandtab