drc: fix mtc0 argument
[pcsx_rearmed.git] / libpcsxcore / new_dynarec / assem_arm.c
CommitLineData
57871462 1/* * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * *
c6c3b1b3 2 * Mupen64plus/PCSX - assem_arm.c *
20d507ba 3 * Copyright (C) 2009-2011 Ari64 *
c6c3b1b3 4 * Copyright (C) 2010-2011 GraÅžvydas "notaz" Ignotas *
57871462 5 * *
6 * This program is free software; you can redistribute it and/or modify *
7 * it under the terms of the GNU General Public License as published by *
8 * the Free Software Foundation; either version 2 of the License, or *
9 * (at your option) any later version. *
10 * *
11 * This program is distributed in the hope that it will be useful, *
12 * but WITHOUT ANY WARRANTY; without even the implied warranty of *
13 * MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the *
14 * GNU General Public License for more details. *
15 * *
16 * You should have received a copy of the GNU General Public License *
17 * along with this program; if not, write to the *
18 * Free Software Foundation, Inc., *
19 * 51 Franklin Street, Fifth Floor, Boston, MA 02110-1301, USA. *
20 * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * * */
21
054175e9 22#ifdef PCSX
6c0eefaf 23#include "../gte.h"
24#define FLAGLESS
25#include "../gte.h"
26#undef FLAGLESS
054175e9 27#include "../gte_arm.h"
28#include "../gte_neon.h"
29#include "pcnt.h"
30#endif
31
57871462 32extern int cycle_count;
33extern int last_count;
34extern int pcaddr;
35extern int pending_exception;
36extern int branch_target;
37extern uint64_t readmem_dword;
3d624f89 38#ifdef MUPEN64
57871462 39extern precomp_instr fake_pc;
3d624f89 40#endif
57871462 41extern void *dynarec_local;
42extern u_int memory_map[1048576];
43extern u_int mini_ht[32][2];
44extern u_int rounding_modes[4];
45
46void indirect_jump_indexed();
47void indirect_jump();
48void do_interrupt();
49void jump_vaddr_r0();
50void jump_vaddr_r1();
51void jump_vaddr_r2();
52void jump_vaddr_r3();
53void jump_vaddr_r4();
54void jump_vaddr_r5();
55void jump_vaddr_r6();
56void jump_vaddr_r7();
57void jump_vaddr_r8();
58void jump_vaddr_r9();
59void jump_vaddr_r10();
60void jump_vaddr_r12();
61
62const u_int jump_vaddr_reg[16] = {
63 (int)jump_vaddr_r0,
64 (int)jump_vaddr_r1,
65 (int)jump_vaddr_r2,
66 (int)jump_vaddr_r3,
67 (int)jump_vaddr_r4,
68 (int)jump_vaddr_r5,
69 (int)jump_vaddr_r6,
70 (int)jump_vaddr_r7,
71 (int)jump_vaddr_r8,
72 (int)jump_vaddr_r9,
73 (int)jump_vaddr_r10,
74 0,
75 (int)jump_vaddr_r12,
76 0,
77 0,
78 0};
79
0bbd1454 80void invalidate_addr_r0();
81void invalidate_addr_r1();
82void invalidate_addr_r2();
83void invalidate_addr_r3();
84void invalidate_addr_r4();
85void invalidate_addr_r5();
86void invalidate_addr_r6();
87void invalidate_addr_r7();
88void invalidate_addr_r8();
89void invalidate_addr_r9();
90void invalidate_addr_r10();
91void invalidate_addr_r12();
92
93const u_int invalidate_addr_reg[16] = {
94 (int)invalidate_addr_r0,
95 (int)invalidate_addr_r1,
96 (int)invalidate_addr_r2,
97 (int)invalidate_addr_r3,
98 (int)invalidate_addr_r4,
99 (int)invalidate_addr_r5,
100 (int)invalidate_addr_r6,
101 (int)invalidate_addr_r7,
102 (int)invalidate_addr_r8,
103 (int)invalidate_addr_r9,
104 (int)invalidate_addr_r10,
105 0,
106 (int)invalidate_addr_r12,
107 0,
108 0,
109 0};
110
57871462 111#include "fpu.h"
112
dd3a91a1 113unsigned int needs_clear_cache[1<<(TARGET_SIZE_2-17)];
114
57871462 115/* Linker */
116
117void set_jump_target(int addr,u_int target)
118{
119 u_char *ptr=(u_char *)addr;
120 u_int *ptr2=(u_int *)ptr;
121 if(ptr[3]==0xe2) {
122 assert((target-(u_int)ptr2-8)<1024);
123 assert((addr&3)==0);
124 assert((target&3)==0);
125 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
126 //printf("target=%x addr=%x insn=%x\n",target,addr,*ptr2);
127 }
128 else if(ptr[3]==0x72) {
129 // generated by emit_jno_unlikely
130 if((target-(u_int)ptr2-8)<1024) {
131 assert((addr&3)==0);
132 assert((target&3)==0);
133 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>2)|0xF00;
134 }
135 else if((target-(u_int)ptr2-8)<4096&&!((target-(u_int)ptr2-8)&15)) {
136 assert((addr&3)==0);
137 assert((target&3)==0);
138 *ptr2=(*ptr2&0xFFFFF000)|((target-(u_int)ptr2-8)>>4)|0xE00;
139 }
140 else *ptr2=(0x7A000000)|(((target-(u_int)ptr2-8)<<6)>>8);
141 }
142 else {
143 assert((ptr[3]&0x0e)==0xa);
144 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
145 }
146}
147
148// This optionally copies the instruction from the target of the branch into
149// the space before the branch. Works, but the difference in speed is
150// usually insignificant.
151void set_jump_target_fillslot(int addr,u_int target,int copy)
152{
153 u_char *ptr=(u_char *)addr;
154 u_int *ptr2=(u_int *)ptr;
155 assert(!copy||ptr2[-1]==0xe28dd000);
156 if(ptr[3]==0xe2) {
157 assert(!copy);
158 assert((target-(u_int)ptr2-8)<4096);
159 *ptr2=(*ptr2&0xFFFFF000)|(target-(u_int)ptr2-8);
160 }
161 else {
162 assert((ptr[3]&0x0e)==0xa);
163 u_int target_insn=*(u_int *)target;
164 if((target_insn&0x0e100000)==0) { // ALU, no immediate, no flags
165 copy=0;
166 }
167 if((target_insn&0x0c100000)==0x04100000) { // Load
168 copy=0;
169 }
170 if(target_insn&0x08000000) {
171 copy=0;
172 }
173 if(copy) {
174 ptr2[-1]=target_insn;
175 target+=4;
176 }
177 *ptr2=(*ptr2&0xFF000000)|(((target-(u_int)ptr2-8)<<6)>>8);
178 }
179}
180
181/* Literal pool */
182add_literal(int addr,int val)
183{
15776b68 184 assert(literalcount<sizeof(literals)/sizeof(literals[0]));
57871462 185 literals[literalcount][0]=addr;
186 literals[literalcount][1]=val;
187 literalcount++;
188}
189
f76eeef9 190void *kill_pointer(void *stub)
57871462 191{
192 int *ptr=(int *)(stub+4);
193 assert((*ptr&0x0ff00000)==0x05900000);
194 u_int offset=*ptr&0xfff;
195 int **l_ptr=(void *)ptr+offset+8;
196 int *i_ptr=*l_ptr;
197 set_jump_target((int)i_ptr,(int)stub);
f76eeef9 198 return i_ptr;
57871462 199}
200
f968d35d 201// find where external branch is liked to using addr of it's stub:
202// get address that insn one after stub loads (dyna_linker arg1),
203// treat it as a pointer to branch insn,
204// return addr where that branch jumps to
57871462 205int get_pointer(void *stub)
206{
207 //printf("get_pointer(%x)\n",(int)stub);
208 int *ptr=(int *)(stub+4);
f968d35d 209 assert((*ptr&0x0fff0000)==0x059f0000);
57871462 210 u_int offset=*ptr&0xfff;
211 int **l_ptr=(void *)ptr+offset+8;
212 int *i_ptr=*l_ptr;
213 assert((*i_ptr&0x0f000000)==0x0a000000);
214 return (int)i_ptr+((*i_ptr<<8)>>6)+8;
215}
216
217// Find the "clean" entry point from a "dirty" entry point
218// by skipping past the call to verify_code
219u_int get_clean_addr(int addr)
220{
221 int *ptr=(int *)addr;
222 #ifdef ARMv5_ONLY
223 ptr+=4;
224 #else
225 ptr+=6;
226 #endif
227 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
228 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
229 ptr++;
230 if((*ptr&0xFF000000)==0xea000000) {
231 return (int)ptr+((*ptr<<8)>>6)+8; // follow jump
232 }
233 return (u_int)ptr;
234}
235
236int verify_dirty(int addr)
237{
238 u_int *ptr=(u_int *)addr;
239 #ifdef ARMv5_ONLY
240 // get from literal pool
15776b68 241 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 242 u_int offset=*ptr&0xfff;
243 u_int *l_ptr=(void *)ptr+offset+8;
244 u_int source=l_ptr[0];
245 u_int copy=l_ptr[1];
246 u_int len=l_ptr[2];
247 ptr+=4;
248 #else
249 // ARMv7 movw/movt
250 assert((*ptr&0xFFF00000)==0xe3000000);
251 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
252 u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
253 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
254 ptr+=6;
255 #endif
256 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
257 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 258#ifndef DISABLE_TLB
cfcba99a 259 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 260 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
261 unsigned int page=source>>12;
262 unsigned int map_value=memory_map[page];
263 if(map_value>=0x80000000) return 0;
264 while(page<((source+len-1)>>12)) {
265 if((memory_map[++page]<<2)!=(map_value<<2)) return 0;
266 }
267 source = source+(map_value<<2);
268 }
63cb0298 269#endif
57871462 270 //printf("verify_dirty: %x %x %x\n",source,copy,len);
271 return !memcmp((void *)source,(void *)copy,len);
272}
273
274// This doesn't necessarily find all clean entry points, just
275// guarantees that it's not dirty
276int isclean(int addr)
277{
278 #ifdef ARMv5_ONLY
279 int *ptr=((u_int *)addr)+4;
280 #else
281 int *ptr=((u_int *)addr)+6;
282 #endif
283 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
284 if((*ptr&0xFF000000)!=0xeb000000) return 1; // bl instruction
285 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code) return 0;
286 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_vm) return 0;
287 if((int)ptr+((*ptr<<8)>>6)+8==(int)verify_code_ds) return 0;
288 return 1;
289}
290
291void get_bounds(int addr,u_int *start,u_int *end)
292{
293 u_int *ptr=(u_int *)addr;
294 #ifdef ARMv5_ONLY
295 // get from literal pool
15776b68 296 assert((*ptr&0xFFFF0000)==0xe59f0000);
57871462 297 u_int offset=*ptr&0xfff;
298 u_int *l_ptr=(void *)ptr+offset+8;
299 u_int source=l_ptr[0];
300 //u_int copy=l_ptr[1];
301 u_int len=l_ptr[2];
302 ptr+=4;
303 #else
304 // ARMv7 movw/movt
305 assert((*ptr&0xFFF00000)==0xe3000000);
306 u_int source=(ptr[0]&0xFFF)+((ptr[0]>>4)&0xF000)+((ptr[2]<<16)&0xFFF0000)+((ptr[2]<<12)&0xF0000000);
307 //u_int copy=(ptr[1]&0xFFF)+((ptr[1]>>4)&0xF000)+((ptr[3]<<16)&0xFFF0000)+((ptr[3]<<12)&0xF0000000);
308 u_int len=(ptr[4]&0xFFF)+((ptr[4]>>4)&0xF000);
309 ptr+=6;
310 #endif
311 if((*ptr&0xFF000000)!=0xeb000000) ptr++;
312 assert((*ptr&0xFF000000)==0xeb000000); // bl instruction
63cb0298 313#ifndef DISABLE_TLB
cfcba99a 314 u_int verifier=(int)ptr+((signed int)(*ptr<<8)>>6)+8; // get target of bl
57871462 315 if(verifier==(u_int)verify_code_vm||verifier==(u_int)verify_code_ds) {
316 if(memory_map[source>>12]>=0x80000000) source = 0;
317 else source = source+(memory_map[source>>12]<<2);
318 }
63cb0298 319#endif
57871462 320 *start=source;
321 *end=source+len;
322}
323
324/* Register allocation */
325
326// Note: registers are allocated clean (unmodified state)
327// if you intend to modify the register, you must call dirty_reg().
328void alloc_reg(struct regstat *cur,int i,signed char reg)
329{
330 int r,hr;
331 int preferred_reg = (reg&7);
332 if(reg==CCREG) preferred_reg=HOST_CCREG;
333 if(reg==PTEMP||reg==FTEMP) preferred_reg=12;
334
335 // Don't allocate unused registers
336 if((cur->u>>reg)&1) return;
337
338 // see if it's already allocated
339 for(hr=0;hr<HOST_REGS;hr++)
340 {
341 if(cur->regmap[hr]==reg) return;
342 }
343
344 // Keep the same mapping if the register was already allocated in a loop
345 preferred_reg = loop_reg(i,reg,preferred_reg);
346
347 // Try to allocate the preferred register
348 if(cur->regmap[preferred_reg]==-1) {
349 cur->regmap[preferred_reg]=reg;
350 cur->dirty&=~(1<<preferred_reg);
351 cur->isconst&=~(1<<preferred_reg);
352 return;
353 }
354 r=cur->regmap[preferred_reg];
355 if(r<64&&((cur->u>>r)&1)) {
356 cur->regmap[preferred_reg]=reg;
357 cur->dirty&=~(1<<preferred_reg);
358 cur->isconst&=~(1<<preferred_reg);
359 return;
360 }
361 if(r>=64&&((cur->uu>>(r&63))&1)) {
362 cur->regmap[preferred_reg]=reg;
363 cur->dirty&=~(1<<preferred_reg);
364 cur->isconst&=~(1<<preferred_reg);
365 return;
366 }
367
368 // Clear any unneeded registers
369 // We try to keep the mapping consistent, if possible, because it
370 // makes branches easier (especially loops). So we try to allocate
371 // first (see above) before removing old mappings. If this is not
372 // possible then go ahead and clear out the registers that are no
373 // longer needed.
374 for(hr=0;hr<HOST_REGS;hr++)
375 {
376 r=cur->regmap[hr];
377 if(r>=0) {
378 if(r<64) {
379 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
380 }
381 else
382 {
383 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
384 }
385 }
386 }
387 // Try to allocate any available register, but prefer
388 // registers that have not been used recently.
389 if(i>0) {
390 for(hr=0;hr<HOST_REGS;hr++) {
391 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
392 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
393 cur->regmap[hr]=reg;
394 cur->dirty&=~(1<<hr);
395 cur->isconst&=~(1<<hr);
396 return;
397 }
398 }
399 }
400 }
401 // Try to allocate any available register
402 for(hr=0;hr<HOST_REGS;hr++) {
403 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
404 cur->regmap[hr]=reg;
405 cur->dirty&=~(1<<hr);
406 cur->isconst&=~(1<<hr);
407 return;
408 }
409 }
410
411 // Ok, now we have to evict someone
412 // Pick a register we hopefully won't need soon
413 u_char hsn[MAXREG+1];
414 memset(hsn,10,sizeof(hsn));
415 int j;
416 lsn(hsn,i,&preferred_reg);
417 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
418 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
419 if(i>0) {
420 // Don't evict the cycle count at entry points, otherwise the entry
421 // stub will have to write it.
422 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
423 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
424 for(j=10;j>=3;j--)
425 {
426 // Alloc preferred register if available
427 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
428 for(hr=0;hr<HOST_REGS;hr++) {
429 // Evict both parts of a 64-bit register
430 if((cur->regmap[hr]&63)==r) {
431 cur->regmap[hr]=-1;
432 cur->dirty&=~(1<<hr);
433 cur->isconst&=~(1<<hr);
434 }
435 }
436 cur->regmap[preferred_reg]=reg;
437 return;
438 }
439 for(r=1;r<=MAXREG;r++)
440 {
441 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
442 for(hr=0;hr<HOST_REGS;hr++) {
443 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
444 if(cur->regmap[hr]==r+64) {
445 cur->regmap[hr]=reg;
446 cur->dirty&=~(1<<hr);
447 cur->isconst&=~(1<<hr);
448 return;
449 }
450 }
451 }
452 for(hr=0;hr<HOST_REGS;hr++) {
453 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
454 if(cur->regmap[hr]==r) {
455 cur->regmap[hr]=reg;
456 cur->dirty&=~(1<<hr);
457 cur->isconst&=~(1<<hr);
458 return;
459 }
460 }
461 }
462 }
463 }
464 }
465 }
466 for(j=10;j>=0;j--)
467 {
468 for(r=1;r<=MAXREG;r++)
469 {
470 if(hsn[r]==j) {
471 for(hr=0;hr<HOST_REGS;hr++) {
472 if(cur->regmap[hr]==r+64) {
473 cur->regmap[hr]=reg;
474 cur->dirty&=~(1<<hr);
475 cur->isconst&=~(1<<hr);
476 return;
477 }
478 }
479 for(hr=0;hr<HOST_REGS;hr++) {
480 if(cur->regmap[hr]==r) {
481 cur->regmap[hr]=reg;
482 cur->dirty&=~(1<<hr);
483 cur->isconst&=~(1<<hr);
484 return;
485 }
486 }
487 }
488 }
489 }
490 printf("This shouldn't happen (alloc_reg)");exit(1);
491}
492
493void alloc_reg64(struct regstat *cur,int i,signed char reg)
494{
495 int preferred_reg = 8+(reg&1);
496 int r,hr;
497
498 // allocate the lower 32 bits
499 alloc_reg(cur,i,reg);
500
501 // Don't allocate unused registers
502 if((cur->uu>>reg)&1) return;
503
504 // see if the upper half is already allocated
505 for(hr=0;hr<HOST_REGS;hr++)
506 {
507 if(cur->regmap[hr]==reg+64) return;
508 }
509
510 // Keep the same mapping if the register was already allocated in a loop
511 preferred_reg = loop_reg(i,reg,preferred_reg);
512
513 // Try to allocate the preferred register
514 if(cur->regmap[preferred_reg]==-1) {
515 cur->regmap[preferred_reg]=reg|64;
516 cur->dirty&=~(1<<preferred_reg);
517 cur->isconst&=~(1<<preferred_reg);
518 return;
519 }
520 r=cur->regmap[preferred_reg];
521 if(r<64&&((cur->u>>r)&1)) {
522 cur->regmap[preferred_reg]=reg|64;
523 cur->dirty&=~(1<<preferred_reg);
524 cur->isconst&=~(1<<preferred_reg);
525 return;
526 }
527 if(r>=64&&((cur->uu>>(r&63))&1)) {
528 cur->regmap[preferred_reg]=reg|64;
529 cur->dirty&=~(1<<preferred_reg);
530 cur->isconst&=~(1<<preferred_reg);
531 return;
532 }
533
534 // Clear any unneeded registers
535 // We try to keep the mapping consistent, if possible, because it
536 // makes branches easier (especially loops). So we try to allocate
537 // first (see above) before removing old mappings. If this is not
538 // possible then go ahead and clear out the registers that are no
539 // longer needed.
540 for(hr=HOST_REGS-1;hr>=0;hr--)
541 {
542 r=cur->regmap[hr];
543 if(r>=0) {
544 if(r<64) {
545 if((cur->u>>r)&1) {cur->regmap[hr]=-1;break;}
546 }
547 else
548 {
549 if((cur->uu>>(r&63))&1) {cur->regmap[hr]=-1;break;}
550 }
551 }
552 }
553 // Try to allocate any available register, but prefer
554 // registers that have not been used recently.
555 if(i>0) {
556 for(hr=0;hr<HOST_REGS;hr++) {
557 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
558 if(regs[i-1].regmap[hr]!=rs1[i-1]&&regs[i-1].regmap[hr]!=rs2[i-1]&&regs[i-1].regmap[hr]!=rt1[i-1]&&regs[i-1].regmap[hr]!=rt2[i-1]) {
559 cur->regmap[hr]=reg|64;
560 cur->dirty&=~(1<<hr);
561 cur->isconst&=~(1<<hr);
562 return;
563 }
564 }
565 }
566 }
567 // Try to allocate any available register
568 for(hr=0;hr<HOST_REGS;hr++) {
569 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
570 cur->regmap[hr]=reg|64;
571 cur->dirty&=~(1<<hr);
572 cur->isconst&=~(1<<hr);
573 return;
574 }
575 }
576
577 // Ok, now we have to evict someone
578 // Pick a register we hopefully won't need soon
579 u_char hsn[MAXREG+1];
580 memset(hsn,10,sizeof(hsn));
581 int j;
582 lsn(hsn,i,&preferred_reg);
583 //printf("eax=%d ecx=%d edx=%d ebx=%d ebp=%d esi=%d edi=%d\n",cur->regmap[0],cur->regmap[1],cur->regmap[2],cur->regmap[3],cur->regmap[5],cur->regmap[6],cur->regmap[7]);
584 //printf("hsn(%x): %d %d %d %d %d %d %d\n",start+i*4,hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
585 if(i>0) {
586 // Don't evict the cycle count at entry points, otherwise the entry
587 // stub will have to write it.
588 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
589 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
590 for(j=10;j>=3;j--)
591 {
592 // Alloc preferred register if available
593 if(hsn[r=cur->regmap[preferred_reg]&63]==j) {
594 for(hr=0;hr<HOST_REGS;hr++) {
595 // Evict both parts of a 64-bit register
596 if((cur->regmap[hr]&63)==r) {
597 cur->regmap[hr]=-1;
598 cur->dirty&=~(1<<hr);
599 cur->isconst&=~(1<<hr);
600 }
601 }
602 cur->regmap[preferred_reg]=reg|64;
603 return;
604 }
605 for(r=1;r<=MAXREG;r++)
606 {
607 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
608 for(hr=0;hr<HOST_REGS;hr++) {
609 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
610 if(cur->regmap[hr]==r+64) {
611 cur->regmap[hr]=reg|64;
612 cur->dirty&=~(1<<hr);
613 cur->isconst&=~(1<<hr);
614 return;
615 }
616 }
617 }
618 for(hr=0;hr<HOST_REGS;hr++) {
619 if(hr!=HOST_CCREG||j<hsn[CCREG]) {
620 if(cur->regmap[hr]==r) {
621 cur->regmap[hr]=reg|64;
622 cur->dirty&=~(1<<hr);
623 cur->isconst&=~(1<<hr);
624 return;
625 }
626 }
627 }
628 }
629 }
630 }
631 }
632 for(j=10;j>=0;j--)
633 {
634 for(r=1;r<=MAXREG;r++)
635 {
636 if(hsn[r]==j) {
637 for(hr=0;hr<HOST_REGS;hr++) {
638 if(cur->regmap[hr]==r+64) {
639 cur->regmap[hr]=reg|64;
640 cur->dirty&=~(1<<hr);
641 cur->isconst&=~(1<<hr);
642 return;
643 }
644 }
645 for(hr=0;hr<HOST_REGS;hr++) {
646 if(cur->regmap[hr]==r) {
647 cur->regmap[hr]=reg|64;
648 cur->dirty&=~(1<<hr);
649 cur->isconst&=~(1<<hr);
650 return;
651 }
652 }
653 }
654 }
655 }
656 printf("This shouldn't happen");exit(1);
657}
658
659// Allocate a temporary register. This is done without regard to
660// dirty status or whether the register we request is on the unneeded list
661// Note: This will only allocate one register, even if called multiple times
662void alloc_reg_temp(struct regstat *cur,int i,signed char reg)
663{
664 int r,hr;
665 int preferred_reg = -1;
666
667 // see if it's already allocated
668 for(hr=0;hr<HOST_REGS;hr++)
669 {
670 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==reg) return;
671 }
672
673 // Try to allocate any available register
674 for(hr=HOST_REGS-1;hr>=0;hr--) {
675 if(hr!=EXCLUDE_REG&&cur->regmap[hr]==-1) {
676 cur->regmap[hr]=reg;
677 cur->dirty&=~(1<<hr);
678 cur->isconst&=~(1<<hr);
679 return;
680 }
681 }
682
683 // Find an unneeded register
684 for(hr=HOST_REGS-1;hr>=0;hr--)
685 {
686 r=cur->regmap[hr];
687 if(r>=0) {
688 if(r<64) {
689 if((cur->u>>r)&1) {
690 if(i==0||((unneeded_reg[i-1]>>r)&1)) {
691 cur->regmap[hr]=reg;
692 cur->dirty&=~(1<<hr);
693 cur->isconst&=~(1<<hr);
694 return;
695 }
696 }
697 }
698 else
699 {
700 if((cur->uu>>(r&63))&1) {
701 if(i==0||((unneeded_reg_upper[i-1]>>(r&63))&1)) {
702 cur->regmap[hr]=reg;
703 cur->dirty&=~(1<<hr);
704 cur->isconst&=~(1<<hr);
705 return;
706 }
707 }
708 }
709 }
710 }
711
712 // Ok, now we have to evict someone
713 // Pick a register we hopefully won't need soon
714 // TODO: we might want to follow unconditional jumps here
715 // TODO: get rid of dupe code and make this into a function
716 u_char hsn[MAXREG+1];
717 memset(hsn,10,sizeof(hsn));
718 int j;
719 lsn(hsn,i,&preferred_reg);
720 //printf("hsn: %d %d %d %d %d %d %d\n",hsn[cur->regmap[0]&63],hsn[cur->regmap[1]&63],hsn[cur->regmap[2]&63],hsn[cur->regmap[3]&63],hsn[cur->regmap[5]&63],hsn[cur->regmap[6]&63],hsn[cur->regmap[7]&63]);
721 if(i>0) {
722 // Don't evict the cycle count at entry points, otherwise the entry
723 // stub will have to write it.
724 if(bt[i]&&hsn[CCREG]>2) hsn[CCREG]=2;
725 if(i>1&&hsn[CCREG]>2&&(itype[i-2]==RJUMP||itype[i-2]==UJUMP||itype[i-2]==CJUMP||itype[i-2]==SJUMP||itype[i-2]==FJUMP)) hsn[CCREG]=2;
726 for(j=10;j>=3;j--)
727 {
728 for(r=1;r<=MAXREG;r++)
729 {
730 if(hsn[r]==j&&r!=rs1[i-1]&&r!=rs2[i-1]&&r!=rt1[i-1]&&r!=rt2[i-1]) {
731 for(hr=0;hr<HOST_REGS;hr++) {
732 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
733 if(cur->regmap[hr]==r+64) {
734 cur->regmap[hr]=reg;
735 cur->dirty&=~(1<<hr);
736 cur->isconst&=~(1<<hr);
737 return;
738 }
739 }
740 }
741 for(hr=0;hr<HOST_REGS;hr++) {
742 if(hr!=HOST_CCREG||hsn[CCREG]>2) {
743 if(cur->regmap[hr]==r) {
744 cur->regmap[hr]=reg;
745 cur->dirty&=~(1<<hr);
746 cur->isconst&=~(1<<hr);
747 return;
748 }
749 }
750 }
751 }
752 }
753 }
754 }
755 for(j=10;j>=0;j--)
756 {
757 for(r=1;r<=MAXREG;r++)
758 {
759 if(hsn[r]==j) {
760 for(hr=0;hr<HOST_REGS;hr++) {
761 if(cur->regmap[hr]==r+64) {
762 cur->regmap[hr]=reg;
763 cur->dirty&=~(1<<hr);
764 cur->isconst&=~(1<<hr);
765 return;
766 }
767 }
768 for(hr=0;hr<HOST_REGS;hr++) {
769 if(cur->regmap[hr]==r) {
770 cur->regmap[hr]=reg;
771 cur->dirty&=~(1<<hr);
772 cur->isconst&=~(1<<hr);
773 return;
774 }
775 }
776 }
777 }
778 }
779 printf("This shouldn't happen");exit(1);
780}
781// Allocate a specific ARM register.
782void alloc_arm_reg(struct regstat *cur,int i,signed char reg,char hr)
783{
784 int n;
f776eb14 785 int dirty=0;
57871462 786
787 // see if it's already allocated (and dealloc it)
788 for(n=0;n<HOST_REGS;n++)
789 {
f776eb14 790 if(n!=EXCLUDE_REG&&cur->regmap[n]==reg) {
791 dirty=(cur->dirty>>n)&1;
792 cur->regmap[n]=-1;
793 }
57871462 794 }
795
796 cur->regmap[hr]=reg;
797 cur->dirty&=~(1<<hr);
f776eb14 798 cur->dirty|=dirty<<hr;
57871462 799 cur->isconst&=~(1<<hr);
800}
801
802// Alloc cycle count into dedicated register
803alloc_cc(struct regstat *cur,int i)
804{
805 alloc_arm_reg(cur,i,CCREG,HOST_CCREG);
806}
807
808/* Special alloc */
809
810
811/* Assembler */
812
813char regname[16][4] = {
814 "r0",
815 "r1",
816 "r2",
817 "r3",
818 "r4",
819 "r5",
820 "r6",
821 "r7",
822 "r8",
823 "r9",
824 "r10",
825 "fp",
826 "r12",
827 "sp",
828 "lr",
829 "pc"};
830
831void output_byte(u_char byte)
832{
833 *(out++)=byte;
834}
835void output_modrm(u_char mod,u_char rm,u_char ext)
836{
837 assert(mod<4);
838 assert(rm<8);
839 assert(ext<8);
840 u_char byte=(mod<<6)|(ext<<3)|rm;
841 *(out++)=byte;
842}
843void output_sib(u_char scale,u_char index,u_char base)
844{
845 assert(scale<4);
846 assert(index<8);
847 assert(base<8);
848 u_char byte=(scale<<6)|(index<<3)|base;
849 *(out++)=byte;
850}
851void output_w32(u_int word)
852{
853 *((u_int *)out)=word;
854 out+=4;
855}
856u_int rd_rn_rm(u_int rd, u_int rn, u_int rm)
857{
858 assert(rd<16);
859 assert(rn<16);
860 assert(rm<16);
861 return((rn<<16)|(rd<<12)|rm);
862}
863u_int rd_rn_imm_shift(u_int rd, u_int rn, u_int imm, u_int shift)
864{
865 assert(rd<16);
866 assert(rn<16);
867 assert(imm<256);
868 assert((shift&1)==0);
869 return((rn<<16)|(rd<<12)|(((32-shift)&30)<<7)|imm);
870}
871u_int genimm(u_int imm,u_int *encoded)
872{
c2e3bd42 873 *encoded=0;
874 if(imm==0) return 1;
57871462 875 int i=32;
876 while(i>0)
877 {
878 if(imm<256) {
879 *encoded=((i&30)<<7)|imm;
880 return 1;
881 }
882 imm=(imm>>2)|(imm<<30);i-=2;
883 }
884 return 0;
885}
cfbd3c6e 886void genimm_checked(u_int imm,u_int *encoded)
887{
888 u_int ret=genimm(imm,encoded);
889 assert(ret);
890}
57871462 891u_int genjmp(u_int addr)
892{
893 int offset=addr-(int)out-8;
e80343e2 894 if(offset<-33554432||offset>=33554432) {
895 if (addr>2) {
896 printf("genjmp: out of range: %08x\n", offset);
897 exit(1);
898 }
899 return 0;
900 }
57871462 901 return ((u_int)offset>>2)&0xffffff;
902}
903
904void emit_mov(int rs,int rt)
905{
906 assem_debug("mov %s,%s\n",regname[rt],regname[rs]);
907 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs));
908}
909
910void emit_movs(int rs,int rt)
911{
912 assem_debug("movs %s,%s\n",regname[rt],regname[rs]);
913 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs));
914}
915
916void emit_add(int rs1,int rs2,int rt)
917{
918 assem_debug("add %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
919 output_w32(0xe0800000|rd_rn_rm(rt,rs1,rs2));
920}
921
922void emit_adds(int rs1,int rs2,int rt)
923{
924 assem_debug("adds %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
925 output_w32(0xe0900000|rd_rn_rm(rt,rs1,rs2));
926}
927
928void emit_adcs(int rs1,int rs2,int rt)
929{
930 assem_debug("adcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
931 output_w32(0xe0b00000|rd_rn_rm(rt,rs1,rs2));
932}
933
934void emit_sbc(int rs1,int rs2,int rt)
935{
936 assem_debug("sbc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
937 output_w32(0xe0c00000|rd_rn_rm(rt,rs1,rs2));
938}
939
940void emit_sbcs(int rs1,int rs2,int rt)
941{
942 assem_debug("sbcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
943 output_w32(0xe0d00000|rd_rn_rm(rt,rs1,rs2));
944}
945
946void emit_neg(int rs, int rt)
947{
948 assem_debug("rsb %s,%s,#0\n",regname[rt],regname[rs]);
949 output_w32(0xe2600000|rd_rn_rm(rt,rs,0));
950}
951
952void emit_negs(int rs, int rt)
953{
954 assem_debug("rsbs %s,%s,#0\n",regname[rt],regname[rs]);
955 output_w32(0xe2700000|rd_rn_rm(rt,rs,0));
956}
957
958void emit_sub(int rs1,int rs2,int rt)
959{
960 assem_debug("sub %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
961 output_w32(0xe0400000|rd_rn_rm(rt,rs1,rs2));
962}
963
964void emit_subs(int rs1,int rs2,int rt)
965{
966 assem_debug("subs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
967 output_w32(0xe0500000|rd_rn_rm(rt,rs1,rs2));
968}
969
970void emit_zeroreg(int rt)
971{
972 assem_debug("mov %s,#0\n",regname[rt]);
973 output_w32(0xe3a00000|rd_rn_rm(rt,0,0));
974}
975
790ee18e 976void emit_loadlp(u_int imm,u_int rt)
977{
978 add_literal((int)out,imm);
979 assem_debug("ldr %s,pc+? [=%x]\n",regname[rt],imm);
980 output_w32(0xe5900000|rd_rn_rm(rt,15,0));
981}
982void emit_movw(u_int imm,u_int rt)
983{
984 assert(imm<65536);
985 assem_debug("movw %s,#%d (0x%x)\n",regname[rt],imm,imm);
986 output_w32(0xe3000000|rd_rn_rm(rt,0,0)|(imm&0xfff)|((imm<<4)&0xf0000));
987}
988void emit_movt(u_int imm,u_int rt)
989{
990 assem_debug("movt %s,#%d (0x%x)\n",regname[rt],imm&0xffff0000,imm&0xffff0000);
991 output_w32(0xe3400000|rd_rn_rm(rt,0,0)|((imm>>16)&0xfff)|((imm>>12)&0xf0000));
992}
993void emit_movimm(u_int imm,u_int rt)
994{
995 u_int armval;
996 if(genimm(imm,&armval)) {
997 assem_debug("mov %s,#%d\n",regname[rt],imm);
998 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
999 }else if(genimm(~imm,&armval)) {
1000 assem_debug("mvn %s,#%d\n",regname[rt],imm);
1001 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
1002 }else if(imm<65536) {
1003 #ifdef ARMv5_ONLY
1004 assem_debug("mov %s,#%d\n",regname[rt],imm&0xFF00);
1005 output_w32(0xe3a00000|rd_rn_imm_shift(rt,0,imm>>8,8));
1006 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1007 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1008 #else
1009 emit_movw(imm,rt);
1010 #endif
1011 }else{
1012 #ifdef ARMv5_ONLY
1013 emit_loadlp(imm,rt);
1014 #else
1015 emit_movw(imm&0x0000FFFF,rt);
1016 emit_movt(imm&0xFFFF0000,rt);
1017 #endif
1018 }
1019}
1020void emit_pcreladdr(u_int rt)
1021{
1022 assem_debug("add %s,pc,#?\n",regname[rt]);
1023 output_w32(0xe2800000|rd_rn_rm(rt,15,0));
1024}
1025
57871462 1026void emit_loadreg(int r, int hr)
1027{
3d624f89 1028#ifdef FORCE32
1029 if(r&64) {
1030 printf("64bit load in 32bit mode!\n");
7f2607ea 1031 assert(0);
1032 return;
3d624f89 1033 }
1034#endif
57871462 1035 if((r&63)==0)
1036 emit_zeroreg(hr);
1037 else {
3d624f89 1038 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1039 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1040 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1041 if(r==CCREG) addr=(int)&cycle_count;
1042 if(r==CSREG) addr=(int)&Status;
1043 if(r==FSREG) addr=(int)&FCR31;
1044 if(r==INVCP) addr=(int)&invc_ptr;
1045 u_int offset = addr-(u_int)&dynarec_local;
1046 assert(offset<4096);
1047 assem_debug("ldr %s,fp+%d\n",regname[hr],offset);
1048 output_w32(0xe5900000|rd_rn_rm(hr,FP,0)|offset);
1049 }
1050}
1051void emit_storereg(int r, int hr)
1052{
3d624f89 1053#ifdef FORCE32
1054 if(r&64) {
1055 printf("64bit store in 32bit mode!\n");
7f2607ea 1056 assert(0);
1057 return;
3d624f89 1058 }
1059#endif
1060 int addr=((int)reg)+((r&63)<<REG_SHIFT)+((r&64)>>4);
57871462 1061 if((r&63)==HIREG) addr=(int)&hi+((r&64)>>4);
1062 if((r&63)==LOREG) addr=(int)&lo+((r&64)>>4);
1063 if(r==CCREG) addr=(int)&cycle_count;
1064 if(r==FSREG) addr=(int)&FCR31;
1065 u_int offset = addr-(u_int)&dynarec_local;
1066 assert(offset<4096);
1067 assem_debug("str %s,fp+%d\n",regname[hr],offset);
1068 output_w32(0xe5800000|rd_rn_rm(hr,FP,0)|offset);
1069}
1070
1071void emit_test(int rs, int rt)
1072{
1073 assem_debug("tst %s,%s\n",regname[rs],regname[rt]);
1074 output_w32(0xe1100000|rd_rn_rm(0,rs,rt));
1075}
1076
1077void emit_testimm(int rs,int imm)
1078{
1079 u_int armval;
5a05d80c 1080 assem_debug("tst %s,#%d\n",regname[rs],imm);
cfbd3c6e 1081 genimm_checked(imm,&armval);
57871462 1082 output_w32(0xe3100000|rd_rn_rm(0,rs,0)|armval);
1083}
1084
b9b61529 1085void emit_testeqimm(int rs,int imm)
1086{
1087 u_int armval;
1088 assem_debug("tsteq %s,$%d\n",regname[rs],imm);
cfbd3c6e 1089 genimm_checked(imm,&armval);
b9b61529 1090 output_w32(0x03100000|rd_rn_rm(0,rs,0)|armval);
1091}
1092
57871462 1093void emit_not(int rs,int rt)
1094{
1095 assem_debug("mvn %s,%s\n",regname[rt],regname[rs]);
1096 output_w32(0xe1e00000|rd_rn_rm(rt,0,rs));
1097}
1098
b9b61529 1099void emit_mvnmi(int rs,int rt)
1100{
1101 assem_debug("mvnmi %s,%s\n",regname[rt],regname[rs]);
1102 output_w32(0x41e00000|rd_rn_rm(rt,0,rs));
1103}
1104
57871462 1105void emit_and(u_int rs1,u_int rs2,u_int rt)
1106{
1107 assem_debug("and %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1108 output_w32(0xe0000000|rd_rn_rm(rt,rs1,rs2));
1109}
1110
1111void emit_or(u_int rs1,u_int rs2,u_int rt)
1112{
1113 assem_debug("orr %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1114 output_w32(0xe1800000|rd_rn_rm(rt,rs1,rs2));
1115}
1116void emit_or_and_set_flags(int rs1,int rs2,int rt)
1117{
1118 assem_debug("orrs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1119 output_w32(0xe1900000|rd_rn_rm(rt,rs1,rs2));
1120}
1121
f70d384d 1122void emit_orrshl_imm(u_int rs,u_int imm,u_int rt)
1123{
1124 assert(rs<16);
1125 assert(rt<16);
1126 assert(imm<32);
1127 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs],imm);
1128 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|(imm<<7));
1129}
1130
576bbd8f 1131void emit_orrshr_imm(u_int rs,u_int imm,u_int rt)
1132{
1133 assert(rs<16);
1134 assert(rt<16);
1135 assert(imm<32);
1136 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs],imm);
1137 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs)|(imm<<7));
1138}
1139
57871462 1140void emit_xor(u_int rs1,u_int rs2,u_int rt)
1141{
1142 assem_debug("eor %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1143 output_w32(0xe0200000|rd_rn_rm(rt,rs1,rs2));
1144}
1145
57871462 1146void emit_addimm(u_int rs,int imm,u_int rt)
1147{
1148 assert(rs<16);
1149 assert(rt<16);
1150 if(imm!=0) {
57871462 1151 u_int armval;
1152 if(genimm(imm,&armval)) {
1153 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm);
1154 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
1155 }else if(genimm(-imm,&armval)) {
8a0a8423 1156 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-imm);
57871462 1157 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
1158 }else if(imm<0) {
ffb0b9e0 1159 assert(imm>-65536);
57871462 1160 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],(-imm)&0xFF00);
1161 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1162 output_w32(0xe2400000|rd_rn_imm_shift(rt,rs,(-imm)>>8,8));
1163 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1164 }else{
ffb0b9e0 1165 assert(imm<65536);
57871462 1166 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1167 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1168 output_w32(0xe2800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1169 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1170 }
1171 }
1172 else if(rs!=rt) emit_mov(rs,rt);
1173}
1174
1175void emit_addimm_and_set_flags(int imm,int rt)
1176{
1177 assert(imm>-65536&&imm<65536);
1178 u_int armval;
1179 if(genimm(imm,&armval)) {
1180 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm);
1181 output_w32(0xe2900000|rd_rn_rm(rt,rt,0)|armval);
1182 }else if(genimm(-imm,&armval)) {
1183 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],imm);
1184 output_w32(0xe2500000|rd_rn_rm(rt,rt,0)|armval);
1185 }else if(imm<0) {
1186 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF00);
1187 assem_debug("subs %s,%s,#%d\n",regname[rt],regname[rt],(-imm)&0xFF);
1188 output_w32(0xe2400000|rd_rn_imm_shift(rt,rt,(-imm)>>8,8));
1189 output_w32(0xe2500000|rd_rn_imm_shift(rt,rt,(-imm)&0xff,0));
1190 }else{
1191 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF00);
1192 assem_debug("adds %s,%s,#%d\n",regname[rt],regname[rt],imm&0xFF);
1193 output_w32(0xe2800000|rd_rn_imm_shift(rt,rt,imm>>8,8));
1194 output_w32(0xe2900000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1195 }
1196}
1197void emit_addimm_no_flags(u_int imm,u_int rt)
1198{
1199 emit_addimm(rt,imm,rt);
1200}
1201
1202void emit_addnop(u_int r)
1203{
1204 assert(r<16);
1205 assem_debug("add %s,%s,#0 (nop)\n",regname[r],regname[r]);
1206 output_w32(0xe2800000|rd_rn_rm(r,r,0));
1207}
1208
1209void emit_adcimm(u_int rs,int imm,u_int rt)
1210{
1211 u_int armval;
cfbd3c6e 1212 genimm_checked(imm,&armval);
57871462 1213 assem_debug("adc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1214 output_w32(0xe2a00000|rd_rn_rm(rt,rs,0)|armval);
1215}
1216/*void emit_sbcimm(int imm,u_int rt)
1217{
1218 u_int armval;
cfbd3c6e 1219 genimm_checked(imm,&armval);
57871462 1220 assem_debug("sbc %s,%s,#%d\n",regname[rt],regname[rt],imm);
1221 output_w32(0xe2c00000|rd_rn_rm(rt,rt,0)|armval);
1222}*/
1223void emit_sbbimm(int imm,u_int rt)
1224{
1225 assem_debug("sbb $%d,%%%s\n",imm,regname[rt]);
1226 assert(rt<8);
1227 if(imm<128&&imm>=-128) {
1228 output_byte(0x83);
1229 output_modrm(3,rt,3);
1230 output_byte(imm);
1231 }
1232 else
1233 {
1234 output_byte(0x81);
1235 output_modrm(3,rt,3);
1236 output_w32(imm);
1237 }
1238}
1239void emit_rscimm(int rs,int imm,u_int rt)
1240{
1241 assert(0);
1242 u_int armval;
cfbd3c6e 1243 genimm_checked(imm,&armval);
57871462 1244 assem_debug("rsc %s,%s,#%d\n",regname[rt],regname[rs],imm);
1245 output_w32(0xe2e00000|rd_rn_rm(rt,rs,0)|armval);
1246}
1247
1248void emit_addimm64_32(int rsh,int rsl,int imm,int rth,int rtl)
1249{
1250 // TODO: if(genimm(imm,&armval)) ...
1251 // else
1252 emit_movimm(imm,HOST_TEMPREG);
1253 emit_adds(HOST_TEMPREG,rsl,rtl);
1254 emit_adcimm(rsh,0,rth);
1255}
1256
1257void emit_sbb(int rs1,int rs2)
1258{
1259 assem_debug("sbb %%%s,%%%s\n",regname[rs2],regname[rs1]);
1260 output_byte(0x19);
1261 output_modrm(3,rs1,rs2);
1262}
1263
1264void emit_andimm(int rs,int imm,int rt)
1265{
1266 u_int armval;
790ee18e 1267 if(imm==0) {
1268 emit_zeroreg(rt);
1269 }else if(genimm(imm,&armval)) {
57871462 1270 assem_debug("and %s,%s,#%d\n",regname[rt],regname[rs],imm);
1271 output_w32(0xe2000000|rd_rn_rm(rt,rs,0)|armval);
1272 }else if(genimm(~imm,&armval)) {
1273 assem_debug("bic %s,%s,#%d\n",regname[rt],regname[rs],imm);
1274 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|armval);
1275 }else if(imm==65535) {
1276 #ifdef ARMv5_ONLY
1277 assem_debug("bic %s,%s,#FF000000\n",regname[rt],regname[rs]);
1278 output_w32(0xe3c00000|rd_rn_rm(rt,rs,0)|0x4FF);
1279 assem_debug("bic %s,%s,#00FF0000\n",regname[rt],regname[rt]);
1280 output_w32(0xe3c00000|rd_rn_rm(rt,rt,0)|0x8FF);
1281 #else
1282 assem_debug("uxth %s,%s\n",regname[rt],regname[rs]);
1283 output_w32(0xe6ff0070|rd_rn_rm(rt,0,rs));
1284 #endif
1285 }else{
1286 assert(imm>0&&imm<65535);
1287 #ifdef ARMv5_ONLY
1288 assem_debug("mov r14,#%d\n",imm&0xFF00);
1289 output_w32(0xe3a00000|rd_rn_imm_shift(HOST_TEMPREG,0,imm>>8,8));
1290 assem_debug("add r14,r14,#%d\n",imm&0xFF);
1291 output_w32(0xe2800000|rd_rn_imm_shift(HOST_TEMPREG,HOST_TEMPREG,imm&0xff,0));
1292 #else
1293 emit_movw(imm,HOST_TEMPREG);
1294 #endif
1295 assem_debug("and %s,%s,r14\n",regname[rt],regname[rs]);
1296 output_w32(0xe0000000|rd_rn_rm(rt,rs,HOST_TEMPREG));
1297 }
1298}
1299
1300void emit_orimm(int rs,int imm,int rt)
1301{
1302 u_int armval;
790ee18e 1303 if(imm==0) {
1304 if(rs!=rt) emit_mov(rs,rt);
1305 }else if(genimm(imm,&armval)) {
57871462 1306 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1307 output_w32(0xe3800000|rd_rn_rm(rt,rs,0)|armval);
1308 }else{
1309 assert(imm>0&&imm<65536);
1310 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1311 assem_debug("orr %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1312 output_w32(0xe3800000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1313 output_w32(0xe3800000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1314 }
1315}
1316
1317void emit_xorimm(int rs,int imm,int rt)
1318{
57871462 1319 u_int armval;
790ee18e 1320 if(imm==0) {
1321 if(rs!=rt) emit_mov(rs,rt);
1322 }else if(genimm(imm,&armval)) {
57871462 1323 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm);
1324 output_w32(0xe2200000|rd_rn_rm(rt,rs,0)|armval);
1325 }else{
514ed0d9 1326 assert(imm>0&&imm<65536);
57871462 1327 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF00);
1328 assem_debug("eor %s,%s,#%d\n",regname[rt],regname[rs],imm&0xFF);
1329 output_w32(0xe2200000|rd_rn_imm_shift(rt,rs,imm>>8,8));
1330 output_w32(0xe2200000|rd_rn_imm_shift(rt,rt,imm&0xff,0));
1331 }
1332}
1333
1334void emit_shlimm(int rs,u_int imm,int rt)
1335{
1336 assert(imm>0);
1337 assert(imm<32);
1338 //if(imm==1) ...
1339 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1340 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1341}
1342
c6c3b1b3 1343void emit_lsls_imm(int rs,int imm,int rt)
1344{
1345 assert(imm>0);
1346 assert(imm<32);
1347 assem_debug("lsls %s,%s,#%d\n",regname[rt],regname[rs],imm);
1348 output_w32(0xe1b00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1349}
1350
57871462 1351void emit_shrimm(int rs,u_int imm,int rt)
1352{
1353 assert(imm>0);
1354 assert(imm<32);
1355 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1356 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
1357}
1358
1359void emit_sarimm(int rs,u_int imm,int rt)
1360{
1361 assert(imm>0);
1362 assert(imm<32);
1363 assem_debug("asr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1364 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x40|(imm<<7));
1365}
1366
1367void emit_rorimm(int rs,u_int imm,int rt)
1368{
1369 assert(imm>0);
1370 assert(imm<32);
1371 assem_debug("ror %s,%s,#%d\n",regname[rt],regname[rs],imm);
1372 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x60|(imm<<7));
1373}
1374
1375void emit_shldimm(int rs,int rs2,u_int imm,int rt)
1376{
1377 assem_debug("shld %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1378 assert(imm>0);
1379 assert(imm<32);
1380 //if(imm==1) ...
1381 assem_debug("lsl %s,%s,#%d\n",regname[rt],regname[rs],imm);
1382 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|(imm<<7));
1383 assem_debug("orr %s,%s,%s,lsr #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1384 output_w32(0xe1800020|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1385}
1386
1387void emit_shrdimm(int rs,int rs2,u_int imm,int rt)
1388{
1389 assem_debug("shrd %%%s,%%%s,%d\n",regname[rt],regname[rs2],imm);
1390 assert(imm>0);
1391 assert(imm<32);
1392 //if(imm==1) ...
1393 assem_debug("lsr %s,%s,#%d\n",regname[rt],regname[rs],imm);
1394 output_w32(0xe1a00020|rd_rn_rm(rt,0,rs)|(imm<<7));
1395 assem_debug("orr %s,%s,%s,lsl #%d\n",regname[rt],regname[rt],regname[rs2],32-imm);
1396 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs2)|((32-imm)<<7));
1397}
1398
b9b61529 1399void emit_signextend16(int rs,int rt)
1400{
1401 #ifdef ARMv5_ONLY
1402 emit_shlimm(rs,16,rt);
1403 emit_sarimm(rt,16,rt);
1404 #else
1405 assem_debug("sxth %s,%s\n",regname[rt],regname[rs]);
1406 output_w32(0xe6bf0070|rd_rn_rm(rt,0,rs));
1407 #endif
1408}
1409
c6c3b1b3 1410void emit_signextend8(int rs,int rt)
1411{
1412 #ifdef ARMv5_ONLY
1413 emit_shlimm(rs,24,rt);
1414 emit_sarimm(rt,24,rt);
1415 #else
1416 assem_debug("sxtb %s,%s\n",regname[rt],regname[rs]);
1417 output_w32(0xe6af0070|rd_rn_rm(rt,0,rs));
1418 #endif
1419}
1420
57871462 1421void emit_shl(u_int rs,u_int shift,u_int rt)
1422{
1423 assert(rs<16);
1424 assert(rt<16);
1425 assert(shift<16);
1426 //if(imm==1) ...
1427 assem_debug("lsl %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1428 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x10|(shift<<8));
1429}
1430void emit_shr(u_int rs,u_int shift,u_int rt)
1431{
1432 assert(rs<16);
1433 assert(rt<16);
1434 assert(shift<16);
1435 assem_debug("lsr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1436 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x30|(shift<<8));
1437}
1438void emit_sar(u_int rs,u_int shift,u_int rt)
1439{
1440 assert(rs<16);
1441 assert(rt<16);
1442 assert(shift<16);
1443 assem_debug("asr %s,%s,%s\n",regname[rt],regname[rs],regname[shift]);
1444 output_w32(0xe1a00000|rd_rn_rm(rt,0,rs)|0x50|(shift<<8));
1445}
1446void emit_shlcl(int r)
1447{
1448 assem_debug("shl %%%s,%%cl\n",regname[r]);
1449 assert(0);
1450}
1451void emit_shrcl(int r)
1452{
1453 assem_debug("shr %%%s,%%cl\n",regname[r]);
1454 assert(0);
1455}
1456void emit_sarcl(int r)
1457{
1458 assem_debug("sar %%%s,%%cl\n",regname[r]);
1459 assert(0);
1460}
1461
1462void emit_shldcl(int r1,int r2)
1463{
1464 assem_debug("shld %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1465 assert(0);
1466}
1467void emit_shrdcl(int r1,int r2)
1468{
1469 assem_debug("shrd %%%s,%%%s,%%cl\n",regname[r1],regname[r2]);
1470 assert(0);
1471}
1472void emit_orrshl(u_int rs,u_int shift,u_int rt)
1473{
1474 assert(rs<16);
1475 assert(rt<16);
1476 assert(shift<16);
1477 assem_debug("orr %s,%s,%s,lsl %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1478 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x10|(shift<<8));
1479}
1480void emit_orrshr(u_int rs,u_int shift,u_int rt)
1481{
1482 assert(rs<16);
1483 assert(rt<16);
1484 assert(shift<16);
1485 assem_debug("orr %s,%s,%s,lsr %s\n",regname[rt],regname[rt],regname[rs],regname[shift]);
1486 output_w32(0xe1800000|rd_rn_rm(rt,rt,rs)|0x30|(shift<<8));
1487}
1488
1489void emit_cmpimm(int rs,int imm)
1490{
1491 u_int armval;
1492 if(genimm(imm,&armval)) {
5a05d80c 1493 assem_debug("cmp %s,#%d\n",regname[rs],imm);
57871462 1494 output_w32(0xe3500000|rd_rn_rm(0,rs,0)|armval);
1495 }else if(genimm(-imm,&armval)) {
5a05d80c 1496 assem_debug("cmn %s,#%d\n",regname[rs],imm);
57871462 1497 output_w32(0xe3700000|rd_rn_rm(0,rs,0)|armval);
1498 }else if(imm>0) {
1499 assert(imm<65536);
1500 #ifdef ARMv5_ONLY
1501 emit_movimm(imm,HOST_TEMPREG);
1502 #else
1503 emit_movw(imm,HOST_TEMPREG);
1504 #endif
1505 assem_debug("cmp %s,r14\n",regname[rs]);
1506 output_w32(0xe1500000|rd_rn_rm(0,rs,HOST_TEMPREG));
1507 }else{
1508 assert(imm>-65536);
1509 #ifdef ARMv5_ONLY
1510 emit_movimm(-imm,HOST_TEMPREG);
1511 #else
1512 emit_movw(-imm,HOST_TEMPREG);
1513 #endif
1514 assem_debug("cmn %s,r14\n",regname[rs]);
1515 output_w32(0xe1700000|rd_rn_rm(0,rs,HOST_TEMPREG));
1516 }
1517}
1518
1519void emit_cmovne(u_int *addr,int rt)
1520{
1521 assem_debug("cmovne %x,%%%s",(int)addr,regname[rt]);
1522 assert(0);
1523}
1524void emit_cmovl(u_int *addr,int rt)
1525{
1526 assem_debug("cmovl %x,%%%s",(int)addr,regname[rt]);
1527 assert(0);
1528}
1529void emit_cmovs(u_int *addr,int rt)
1530{
1531 assem_debug("cmovs %x,%%%s",(int)addr,regname[rt]);
1532 assert(0);
1533}
1534void emit_cmovne_imm(int imm,int rt)
1535{
1536 assem_debug("movne %s,#%d\n",regname[rt],imm);
1537 u_int armval;
cfbd3c6e 1538 genimm_checked(imm,&armval);
57871462 1539 output_w32(0x13a00000|rd_rn_rm(rt,0,0)|armval);
1540}
1541void emit_cmovl_imm(int imm,int rt)
1542{
1543 assem_debug("movlt %s,#%d\n",regname[rt],imm);
1544 u_int armval;
cfbd3c6e 1545 genimm_checked(imm,&armval);
57871462 1546 output_w32(0xb3a00000|rd_rn_rm(rt,0,0)|armval);
1547}
1548void emit_cmovb_imm(int imm,int rt)
1549{
1550 assem_debug("movcc %s,#%d\n",regname[rt],imm);
1551 u_int armval;
cfbd3c6e 1552 genimm_checked(imm,&armval);
57871462 1553 output_w32(0x33a00000|rd_rn_rm(rt,0,0)|armval);
1554}
1555void emit_cmovs_imm(int imm,int rt)
1556{
1557 assem_debug("movmi %s,#%d\n",regname[rt],imm);
1558 u_int armval;
cfbd3c6e 1559 genimm_checked(imm,&armval);
57871462 1560 output_w32(0x43a00000|rd_rn_rm(rt,0,0)|armval);
1561}
1562void emit_cmove_reg(int rs,int rt)
1563{
1564 assem_debug("moveq %s,%s\n",regname[rt],regname[rs]);
1565 output_w32(0x01a00000|rd_rn_rm(rt,0,rs));
1566}
1567void emit_cmovne_reg(int rs,int rt)
1568{
1569 assem_debug("movne %s,%s\n",regname[rt],regname[rs]);
1570 output_w32(0x11a00000|rd_rn_rm(rt,0,rs));
1571}
1572void emit_cmovl_reg(int rs,int rt)
1573{
1574 assem_debug("movlt %s,%s\n",regname[rt],regname[rs]);
1575 output_w32(0xb1a00000|rd_rn_rm(rt,0,rs));
1576}
1577void emit_cmovs_reg(int rs,int rt)
1578{
1579 assem_debug("movmi %s,%s\n",regname[rt],regname[rs]);
1580 output_w32(0x41a00000|rd_rn_rm(rt,0,rs));
1581}
1582
1583void emit_slti32(int rs,int imm,int rt)
1584{
1585 if(rs!=rt) emit_zeroreg(rt);
1586 emit_cmpimm(rs,imm);
1587 if(rs==rt) emit_movimm(0,rt);
1588 emit_cmovl_imm(1,rt);
1589}
1590void emit_sltiu32(int rs,int imm,int rt)
1591{
1592 if(rs!=rt) emit_zeroreg(rt);
1593 emit_cmpimm(rs,imm);
1594 if(rs==rt) emit_movimm(0,rt);
1595 emit_cmovb_imm(1,rt);
1596}
1597void emit_slti64_32(int rsh,int rsl,int imm,int rt)
1598{
1599 assert(rsh!=rt);
1600 emit_slti32(rsl,imm,rt);
1601 if(imm>=0)
1602 {
1603 emit_test(rsh,rsh);
1604 emit_cmovne_imm(0,rt);
1605 emit_cmovs_imm(1,rt);
1606 }
1607 else
1608 {
1609 emit_cmpimm(rsh,-1);
1610 emit_cmovne_imm(0,rt);
1611 emit_cmovl_imm(1,rt);
1612 }
1613}
1614void emit_sltiu64_32(int rsh,int rsl,int imm,int rt)
1615{
1616 assert(rsh!=rt);
1617 emit_sltiu32(rsl,imm,rt);
1618 if(imm>=0)
1619 {
1620 emit_test(rsh,rsh);
1621 emit_cmovne_imm(0,rt);
1622 }
1623 else
1624 {
1625 emit_cmpimm(rsh,-1);
1626 emit_cmovne_imm(1,rt);
1627 }
1628}
1629
1630void emit_cmp(int rs,int rt)
1631{
1632 assem_debug("cmp %s,%s\n",regname[rs],regname[rt]);
1633 output_w32(0xe1500000|rd_rn_rm(0,rs,rt));
1634}
1635void emit_set_gz32(int rs, int rt)
1636{
1637 //assem_debug("set_gz32\n");
1638 emit_cmpimm(rs,1);
1639 emit_movimm(1,rt);
1640 emit_cmovl_imm(0,rt);
1641}
1642void emit_set_nz32(int rs, int rt)
1643{
1644 //assem_debug("set_nz32\n");
1645 if(rs!=rt) emit_movs(rs,rt);
1646 else emit_test(rs,rs);
1647 emit_cmovne_imm(1,rt);
1648}
1649void emit_set_gz64_32(int rsh, int rsl, int rt)
1650{
1651 //assem_debug("set_gz64\n");
1652 emit_set_gz32(rsl,rt);
1653 emit_test(rsh,rsh);
1654 emit_cmovne_imm(1,rt);
1655 emit_cmovs_imm(0,rt);
1656}
1657void emit_set_nz64_32(int rsh, int rsl, int rt)
1658{
1659 //assem_debug("set_nz64\n");
1660 emit_or_and_set_flags(rsh,rsl,rt);
1661 emit_cmovne_imm(1,rt);
1662}
1663void emit_set_if_less32(int rs1, int rs2, int rt)
1664{
1665 //assem_debug("set if less (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1666 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1667 emit_cmp(rs1,rs2);
1668 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1669 emit_cmovl_imm(1,rt);
1670}
1671void emit_set_if_carry32(int rs1, int rs2, int rt)
1672{
1673 //assem_debug("set if carry (%%%s,%%%s),%%%s\n",regname[rs1],regname[rs2],regname[rt]);
1674 if(rs1!=rt&&rs2!=rt) emit_zeroreg(rt);
1675 emit_cmp(rs1,rs2);
1676 if(rs1==rt||rs2==rt) emit_movimm(0,rt);
1677 emit_cmovb_imm(1,rt);
1678}
1679void emit_set_if_less64_32(int u1, int l1, int u2, int l2, int rt)
1680{
1681 //assem_debug("set if less64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1682 assert(u1!=rt);
1683 assert(u2!=rt);
1684 emit_cmp(l1,l2);
1685 emit_movimm(0,rt);
1686 emit_sbcs(u1,u2,HOST_TEMPREG);
1687 emit_cmovl_imm(1,rt);
1688}
1689void emit_set_if_carry64_32(int u1, int l1, int u2, int l2, int rt)
1690{
1691 //assem_debug("set if carry64 (%%%s,%%%s,%%%s,%%%s),%%%s\n",regname[u1],regname[l1],regname[u2],regname[l2],regname[rt]);
1692 assert(u1!=rt);
1693 assert(u2!=rt);
1694 emit_cmp(l1,l2);
1695 emit_movimm(0,rt);
1696 emit_sbcs(u1,u2,HOST_TEMPREG);
1697 emit_cmovb_imm(1,rt);
1698}
1699
1700void emit_call(int a)
1701{
1702 assem_debug("bl %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1703 u_int offset=genjmp(a);
1704 output_w32(0xeb000000|offset);
1705}
1706void emit_jmp(int a)
1707{
1708 assem_debug("b %x (%x+%x)\n",a,(int)out,a-(int)out-8);
1709 u_int offset=genjmp(a);
1710 output_w32(0xea000000|offset);
1711}
1712void emit_jne(int a)
1713{
1714 assem_debug("bne %x\n",a);
1715 u_int offset=genjmp(a);
1716 output_w32(0x1a000000|offset);
1717}
1718void emit_jeq(int a)
1719{
1720 assem_debug("beq %x\n",a);
1721 u_int offset=genjmp(a);
1722 output_w32(0x0a000000|offset);
1723}
1724void emit_js(int a)
1725{
1726 assem_debug("bmi %x\n",a);
1727 u_int offset=genjmp(a);
1728 output_w32(0x4a000000|offset);
1729}
1730void emit_jns(int a)
1731{
1732 assem_debug("bpl %x\n",a);
1733 u_int offset=genjmp(a);
1734 output_w32(0x5a000000|offset);
1735}
1736void emit_jl(int a)
1737{
1738 assem_debug("blt %x\n",a);
1739 u_int offset=genjmp(a);
1740 output_w32(0xba000000|offset);
1741}
1742void emit_jge(int a)
1743{
1744 assem_debug("bge %x\n",a);
1745 u_int offset=genjmp(a);
1746 output_w32(0xaa000000|offset);
1747}
1748void emit_jno(int a)
1749{
1750 assem_debug("bvc %x\n",a);
1751 u_int offset=genjmp(a);
1752 output_w32(0x7a000000|offset);
1753}
1754void emit_jc(int a)
1755{
1756 assem_debug("bcs %x\n",a);
1757 u_int offset=genjmp(a);
1758 output_w32(0x2a000000|offset);
1759}
1760void emit_jcc(int a)
1761{
1762 assem_debug("bcc %x\n",a);
1763 u_int offset=genjmp(a);
1764 output_w32(0x3a000000|offset);
1765}
1766
1767void emit_pushimm(int imm)
1768{
1769 assem_debug("push $%x\n",imm);
1770 assert(0);
1771}
1772void emit_pusha()
1773{
1774 assem_debug("pusha\n");
1775 assert(0);
1776}
1777void emit_popa()
1778{
1779 assem_debug("popa\n");
1780 assert(0);
1781}
1782void emit_pushreg(u_int r)
1783{
1784 assem_debug("push %%%s\n",regname[r]);
1785 assert(0);
1786}
1787void emit_popreg(u_int r)
1788{
1789 assem_debug("pop %%%s\n",regname[r]);
1790 assert(0);
1791}
1792void emit_callreg(u_int r)
1793{
c6c3b1b3 1794 assert(r<15);
1795 assem_debug("blx %s\n",regname[r]);
1796 output_w32(0xe12fff30|r);
57871462 1797}
1798void emit_jmpreg(u_int r)
1799{
1800 assem_debug("mov pc,%s\n",regname[r]);
1801 output_w32(0xe1a00000|rd_rn_rm(15,0,r));
1802}
1803
1804void emit_readword_indexed(int offset, int rs, int rt)
1805{
1806 assert(offset>-4096&&offset<4096);
1807 assem_debug("ldr %s,%s+%d\n",regname[rt],regname[rs],offset);
1808 if(offset>=0) {
1809 output_w32(0xe5900000|rd_rn_rm(rt,rs,0)|offset);
1810 }else{
1811 output_w32(0xe5100000|rd_rn_rm(rt,rs,0)|(-offset));
1812 }
1813}
1814void emit_readword_dualindexedx4(int rs1, int rs2, int rt)
1815{
1816 assem_debug("ldr %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1817 output_w32(0xe7900000|rd_rn_rm(rt,rs1,rs2)|0x100);
1818}
c6c3b1b3 1819void emit_ldrcc_dualindexed(int rs1, int rs2, int rt)
1820{
1821 assem_debug("ldrcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1822 output_w32(0x37900000|rd_rn_rm(rt,rs1,rs2));
1823}
1824void emit_ldrccb_dualindexed(int rs1, int rs2, int rt)
1825{
1826 assem_debug("ldrccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1827 output_w32(0x37d00000|rd_rn_rm(rt,rs1,rs2));
1828}
1829void emit_ldrccsb_dualindexed(int rs1, int rs2, int rt)
1830{
1831 assem_debug("ldrccsb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1832 output_w32(0x319000d0|rd_rn_rm(rt,rs1,rs2));
1833}
1834void emit_ldrcch_dualindexed(int rs1, int rs2, int rt)
1835{
1836 assem_debug("ldrcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1837 output_w32(0x319000b0|rd_rn_rm(rt,rs1,rs2));
1838}
1839void emit_ldrccsh_dualindexed(int rs1, int rs2, int rt)
1840{
1841 assem_debug("ldrccsh %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
1842 output_w32(0x319000f0|rd_rn_rm(rt,rs1,rs2));
1843}
57871462 1844void emit_readword_indexed_tlb(int addr, int rs, int map, int rt)
1845{
1846 if(map<0) emit_readword_indexed(addr, rs, rt);
1847 else {
1848 assert(addr==0);
1849 emit_readword_dualindexedx4(rs, map, rt);
1850 }
1851}
1852void emit_readdword_indexed_tlb(int addr, int rs, int map, int rh, int rl)
1853{
1854 if(map<0) {
1855 if(rh>=0) emit_readword_indexed(addr, rs, rh);
1856 emit_readword_indexed(addr+4, rs, rl);
1857 }else{
1858 assert(rh!=rs);
1859 if(rh>=0) emit_readword_indexed_tlb(addr, rs, map, rh);
1860 emit_addimm(map,1,map);
1861 emit_readword_indexed_tlb(addr, rs, map, rl);
1862 }
1863}
1864void emit_movsbl_indexed(int offset, int rs, int rt)
1865{
1866 assert(offset>-256&&offset<256);
1867 assem_debug("ldrsb %s,%s+%d\n",regname[rt],regname[rs],offset);
1868 if(offset>=0) {
1869 output_w32(0xe1d000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1870 }else{
1871 output_w32(0xe15000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1872 }
1873}
1874void emit_movsbl_indexed_tlb(int addr, int rs, int map, int rt)
1875{
1876 if(map<0) emit_movsbl_indexed(addr, rs, rt);
1877 else {
1878 if(addr==0) {
1879 emit_shlimm(map,2,map);
1880 assem_debug("ldrsb %s,%s+%s\n",regname[rt],regname[rs],regname[map]);
1881 output_w32(0xe19000d0|rd_rn_rm(rt,rs,map));
1882 }else{
1883 assert(addr>-256&&addr<256);
1884 assem_debug("add %s,%s,%s,lsl #2\n",regname[rt],regname[rs],regname[map]);
1885 output_w32(0xe0800000|rd_rn_rm(rt,rs,map)|(2<<7));
1886 emit_movsbl_indexed(addr, rt, rt);
1887 }
1888 }
1889}
1890void emit_movswl_indexed(int offset, int rs, int rt)
1891{
1892 assert(offset>-256&&offset<256);
1893 assem_debug("ldrsh %s,%s+%d\n",regname[rt],regname[rs],offset);
1894 if(offset>=0) {
1895 output_w32(0xe1d000f0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1896 }else{
1897 output_w32(0xe15000f0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1898 }
1899}
1900void emit_movzbl_indexed(int offset, int rs, int rt)
1901{
1902 assert(offset>-4096&&offset<4096);
1903 assem_debug("ldrb %s,%s+%d\n",regname[rt],regname[rs],offset);
1904 if(offset>=0) {
1905 output_w32(0xe5d00000|rd_rn_rm(rt,rs,0)|offset);
1906 }else{
1907 output_w32(0xe5500000|rd_rn_rm(rt,rs,0)|(-offset));
1908 }
1909}
1910void emit_movzbl_dualindexedx4(int rs1, int rs2, int rt)
1911{
1912 assem_debug("ldrb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
1913 output_w32(0xe7d00000|rd_rn_rm(rt,rs1,rs2)|0x100);
1914}
1915void emit_movzbl_indexed_tlb(int addr, int rs, int map, int rt)
1916{
1917 if(map<0) emit_movzbl_indexed(addr, rs, rt);
1918 else {
1919 if(addr==0) {
1920 emit_movzbl_dualindexedx4(rs, map, rt);
1921 }else{
1922 emit_addimm(rs,addr,rt);
1923 emit_movzbl_dualindexedx4(rt, map, rt);
1924 }
1925 }
1926}
1927void emit_movzwl_indexed(int offset, int rs, int rt)
1928{
1929 assert(offset>-256&&offset<256);
1930 assem_debug("ldrh %s,%s+%d\n",regname[rt],regname[rs],offset);
1931 if(offset>=0) {
1932 output_w32(0xe1d000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1933 }else{
1934 output_w32(0xe15000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1935 }
1936}
054175e9 1937static void emit_ldrd(int offset, int rs, int rt)
1938{
1939 assert(offset>-256&&offset<256);
1940 assem_debug("ldrd %s,%s+%d\n",regname[rt],regname[rs],offset);
1941 if(offset>=0) {
1942 output_w32(0xe1c000d0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
1943 }else{
1944 output_w32(0xe14000d0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
1945 }
1946}
57871462 1947void emit_readword(int addr, int rt)
1948{
1949 u_int offset = addr-(u_int)&dynarec_local;
1950 assert(offset<4096);
1951 assem_debug("ldr %s,fp+%d\n",regname[rt],offset);
1952 output_w32(0xe5900000|rd_rn_rm(rt,FP,0)|offset);
1953}
1954void emit_movsbl(int addr, int rt)
1955{
1956 u_int offset = addr-(u_int)&dynarec_local;
1957 assert(offset<256);
1958 assem_debug("ldrsb %s,fp+%d\n",regname[rt],offset);
1959 output_w32(0xe1d000d0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1960}
1961void emit_movswl(int addr, int rt)
1962{
1963 u_int offset = addr-(u_int)&dynarec_local;
1964 assert(offset<256);
1965 assem_debug("ldrsh %s,fp+%d\n",regname[rt],offset);
1966 output_w32(0xe1d000f0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1967}
1968void emit_movzbl(int addr, int rt)
1969{
1970 u_int offset = addr-(u_int)&dynarec_local;
1971 assert(offset<4096);
1972 assem_debug("ldrb %s,fp+%d\n",regname[rt],offset);
1973 output_w32(0xe5d00000|rd_rn_rm(rt,FP,0)|offset);
1974}
1975void emit_movzwl(int addr, int rt)
1976{
1977 u_int offset = addr-(u_int)&dynarec_local;
1978 assert(offset<256);
1979 assem_debug("ldrh %s,fp+%d\n",regname[rt],offset);
1980 output_w32(0xe1d000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
1981}
1982void emit_movzwl_reg(int rs, int rt)
1983{
1984 assem_debug("movzwl %%%s,%%%s\n",regname[rs]+1,regname[rt]);
1985 assert(0);
1986}
1987
1988void emit_xchg(int rs, int rt)
1989{
1990 assem_debug("xchg %%%s,%%%s\n",regname[rs],regname[rt]);
1991 assert(0);
1992}
1993void emit_writeword_indexed(int rt, int offset, int rs)
1994{
1995 assert(offset>-4096&&offset<4096);
1996 assem_debug("str %s,%s+%d\n",regname[rt],regname[rs],offset);
1997 if(offset>=0) {
1998 output_w32(0xe5800000|rd_rn_rm(rt,rs,0)|offset);
1999 }else{
2000 output_w32(0xe5000000|rd_rn_rm(rt,rs,0)|(-offset));
2001 }
2002}
2003void emit_writeword_dualindexedx4(int rt, int rs1, int rs2)
2004{
2005 assem_debug("str %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2006 output_w32(0xe7800000|rd_rn_rm(rt,rs1,rs2)|0x100);
2007}
2008void emit_writeword_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2009{
2010 if(map<0) emit_writeword_indexed(rt, addr, rs);
2011 else {
2012 assert(addr==0);
2013 emit_writeword_dualindexedx4(rt, rs, map);
2014 }
2015}
2016void emit_writedword_indexed_tlb(int rh, int rl, int addr, int rs, int map, int temp)
2017{
2018 if(map<0) {
2019 if(rh>=0) emit_writeword_indexed(rh, addr, rs);
2020 emit_writeword_indexed(rl, addr+4, rs);
2021 }else{
2022 assert(rh>=0);
2023 if(temp!=rs) emit_addimm(map,1,temp);
2024 emit_writeword_indexed_tlb(rh, addr, rs, map, temp);
2025 if(temp!=rs) emit_writeword_indexed_tlb(rl, addr, rs, temp, temp);
2026 else {
2027 emit_addimm(rs,4,rs);
2028 emit_writeword_indexed_tlb(rl, addr, rs, map, temp);
2029 }
2030 }
2031}
2032void emit_writehword_indexed(int rt, int offset, int rs)
2033{
2034 assert(offset>-256&&offset<256);
2035 assem_debug("strh %s,%s+%d\n",regname[rt],regname[rs],offset);
2036 if(offset>=0) {
2037 output_w32(0xe1c000b0|rd_rn_rm(rt,rs,0)|((offset<<4)&0xf00)|(offset&0xf));
2038 }else{
2039 output_w32(0xe14000b0|rd_rn_rm(rt,rs,0)|(((-offset)<<4)&0xf00)|((-offset)&0xf));
2040 }
2041}
2042void emit_writebyte_indexed(int rt, int offset, int rs)
2043{
2044 assert(offset>-4096&&offset<4096);
2045 assem_debug("strb %s,%s+%d\n",regname[rt],regname[rs],offset);
2046 if(offset>=0) {
2047 output_w32(0xe5c00000|rd_rn_rm(rt,rs,0)|offset);
2048 }else{
2049 output_w32(0xe5400000|rd_rn_rm(rt,rs,0)|(-offset));
2050 }
2051}
2052void emit_writebyte_dualindexedx4(int rt, int rs1, int rs2)
2053{
2054 assem_debug("strb %s,%s,%s lsl #2\n",regname[rt],regname[rs1],regname[rs2]);
2055 output_w32(0xe7c00000|rd_rn_rm(rt,rs1,rs2)|0x100);
2056}
2057void emit_writebyte_indexed_tlb(int rt, int addr, int rs, int map, int temp)
2058{
2059 if(map<0) emit_writebyte_indexed(rt, addr, rs);
2060 else {
2061 if(addr==0) {
2062 emit_writebyte_dualindexedx4(rt, rs, map);
2063 }else{
2064 emit_addimm(rs,addr,temp);
2065 emit_writebyte_dualindexedx4(rt, temp, map);
2066 }
2067 }
2068}
b96d3df7 2069void emit_strcc_dualindexed(int rs1, int rs2, int rt)
2070{
2071 assem_debug("strcc %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2072 output_w32(0x37800000|rd_rn_rm(rt,rs1,rs2));
2073}
2074void emit_strccb_dualindexed(int rs1, int rs2, int rt)
2075{
2076 assem_debug("strccb %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2077 output_w32(0x37c00000|rd_rn_rm(rt,rs1,rs2));
2078}
2079void emit_strcch_dualindexed(int rs1, int rs2, int rt)
2080{
2081 assem_debug("strcch %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2082 output_w32(0x318000b0|rd_rn_rm(rt,rs1,rs2));
2083}
57871462 2084void emit_writeword(int rt, int addr)
2085{
2086 u_int offset = addr-(u_int)&dynarec_local;
2087 assert(offset<4096);
2088 assem_debug("str %s,fp+%d\n",regname[rt],offset);
2089 output_w32(0xe5800000|rd_rn_rm(rt,FP,0)|offset);
2090}
2091void emit_writehword(int rt, int addr)
2092{
2093 u_int offset = addr-(u_int)&dynarec_local;
2094 assert(offset<256);
2095 assem_debug("strh %s,fp+%d\n",regname[rt],offset);
2096 output_w32(0xe1c000b0|rd_rn_rm(rt,FP,0)|((offset<<4)&0xf00)|(offset&0xf));
2097}
2098void emit_writebyte(int rt, int addr)
2099{
2100 u_int offset = addr-(u_int)&dynarec_local;
2101 assert(offset<4096);
74426039 2102 assem_debug("strb %s,fp+%d\n",regname[rt],offset);
57871462 2103 output_w32(0xe5c00000|rd_rn_rm(rt,FP,0)|offset);
2104}
2105void emit_writeword_imm(int imm, int addr)
2106{
2107 assem_debug("movl $%x,%x\n",imm,addr);
2108 assert(0);
2109}
2110void emit_writebyte_imm(int imm, int addr)
2111{
2112 assem_debug("movb $%x,%x\n",imm,addr);
2113 assert(0);
2114}
2115
2116void emit_mul(int rs)
2117{
2118 assem_debug("mul %%%s\n",regname[rs]);
2119 assert(0);
2120}
2121void emit_imul(int rs)
2122{
2123 assem_debug("imul %%%s\n",regname[rs]);
2124 assert(0);
2125}
2126void emit_umull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2127{
2128 assem_debug("umull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2129 assert(rs1<16);
2130 assert(rs2<16);
2131 assert(hi<16);
2132 assert(lo<16);
2133 output_w32(0xe0800090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2134}
2135void emit_smull(u_int rs1, u_int rs2, u_int hi, u_int lo)
2136{
2137 assem_debug("smull %s, %s, %s, %s\n",regname[lo],regname[hi],regname[rs1],regname[rs2]);
2138 assert(rs1<16);
2139 assert(rs2<16);
2140 assert(hi<16);
2141 assert(lo<16);
2142 output_w32(0xe0c00090|(hi<<16)|(lo<<12)|(rs2<<8)|rs1);
2143}
2144
2145void emit_div(int rs)
2146{
2147 assem_debug("div %%%s\n",regname[rs]);
2148 assert(0);
2149}
2150void emit_idiv(int rs)
2151{
2152 assem_debug("idiv %%%s\n",regname[rs]);
2153 assert(0);
2154}
2155void emit_cdq()
2156{
2157 assem_debug("cdq\n");
2158 assert(0);
2159}
2160
2161void emit_clz(int rs,int rt)
2162{
2163 assem_debug("clz %s,%s\n",regname[rt],regname[rs]);
2164 output_w32(0xe16f0f10|rd_rn_rm(rt,0,rs));
2165}
2166
2167void emit_subcs(int rs1,int rs2,int rt)
2168{
2169 assem_debug("subcs %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2170 output_w32(0x20400000|rd_rn_rm(rt,rs1,rs2));
2171}
2172
2173void emit_shrcc_imm(int rs,u_int imm,int rt)
2174{
2175 assert(imm>0);
2176 assert(imm<32);
2177 assem_debug("lsrcc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2178 output_w32(0x31a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2179}
2180
b1be1eee 2181void emit_shrne_imm(int rs,u_int imm,int rt)
2182{
2183 assert(imm>0);
2184 assert(imm<32);
2185 assem_debug("lsrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2186 output_w32(0x11a00000|rd_rn_rm(rt,0,rs)|0x20|(imm<<7));
2187}
2188
57871462 2189void emit_negmi(int rs, int rt)
2190{
2191 assem_debug("rsbmi %s,%s,#0\n",regname[rt],regname[rs]);
2192 output_w32(0x42600000|rd_rn_rm(rt,rs,0));
2193}
2194
2195void emit_negsmi(int rs, int rt)
2196{
2197 assem_debug("rsbsmi %s,%s,#0\n",regname[rt],regname[rs]);
2198 output_w32(0x42700000|rd_rn_rm(rt,rs,0));
2199}
2200
2201void emit_orreq(u_int rs1,u_int rs2,u_int rt)
2202{
2203 assem_debug("orreq %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2204 output_w32(0x01800000|rd_rn_rm(rt,rs1,rs2));
2205}
2206
2207void emit_orrne(u_int rs1,u_int rs2,u_int rt)
2208{
2209 assem_debug("orrne %s,%s,%s\n",regname[rt],regname[rs1],regname[rs2]);
2210 output_w32(0x11800000|rd_rn_rm(rt,rs1,rs2));
2211}
2212
2213void emit_bic_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2214{
2215 assem_debug("bic %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2216 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2217}
2218
2219void emit_biceq_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2220{
2221 assem_debug("biceq %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2222 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2223}
2224
2225void emit_bicne_lsl(u_int rs1,u_int rs2,u_int shift,u_int rt)
2226{
2227 assem_debug("bicne %s,%s,%s lsl %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2228 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x10|(shift<<8));
2229}
2230
2231void emit_bic_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2232{
2233 assem_debug("bic %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2234 output_w32(0xe1C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2235}
2236
2237void emit_biceq_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2238{
2239 assem_debug("biceq %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2240 output_w32(0x01C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2241}
2242
2243void emit_bicne_lsr(u_int rs1,u_int rs2,u_int shift,u_int rt)
2244{
2245 assem_debug("bicne %s,%s,%s lsr %s\n",regname[rt],regname[rs1],regname[rs2],regname[shift]);
2246 output_w32(0x11C00000|rd_rn_rm(rt,rs1,rs2)|0x30|(shift<<8));
2247}
2248
2249void emit_teq(int rs, int rt)
2250{
2251 assem_debug("teq %s,%s\n",regname[rs],regname[rt]);
2252 output_w32(0xe1300000|rd_rn_rm(0,rs,rt));
2253}
2254
2255void emit_rsbimm(int rs, int imm, int rt)
2256{
2257 u_int armval;
cfbd3c6e 2258 genimm_checked(imm,&armval);
57871462 2259 assem_debug("rsb %s,%s,#%d\n",regname[rt],regname[rs],imm);
2260 output_w32(0xe2600000|rd_rn_rm(rt,rs,0)|armval);
2261}
2262
2263// Load 2 immediates optimizing for small code size
2264void emit_mov2imm_compact(int imm1,u_int rt1,int imm2,u_int rt2)
2265{
2266 emit_movimm(imm1,rt1);
2267 u_int armval;
2268 if(genimm(imm2-imm1,&armval)) {
2269 assem_debug("add %s,%s,#%d\n",regname[rt2],regname[rt1],imm2-imm1);
2270 output_w32(0xe2800000|rd_rn_rm(rt2,rt1,0)|armval);
2271 }else if(genimm(imm1-imm2,&armval)) {
2272 assem_debug("sub %s,%s,#%d\n",regname[rt2],regname[rt1],imm1-imm2);
2273 output_w32(0xe2400000|rd_rn_rm(rt2,rt1,0)|armval);
2274 }
2275 else emit_movimm(imm2,rt2);
2276}
2277
2278// Conditionally select one of two immediates, optimizing for small code size
2279// This will only be called if HAVE_CMOV_IMM is defined
2280void emit_cmov2imm_e_ne_compact(int imm1,int imm2,u_int rt)
2281{
2282 u_int armval;
2283 if(genimm(imm2-imm1,&armval)) {
2284 emit_movimm(imm1,rt);
2285 assem_debug("addne %s,%s,#%d\n",regname[rt],regname[rt],imm2-imm1);
2286 output_w32(0x12800000|rd_rn_rm(rt,rt,0)|armval);
2287 }else if(genimm(imm1-imm2,&armval)) {
2288 emit_movimm(imm1,rt);
2289 assem_debug("subne %s,%s,#%d\n",regname[rt],regname[rt],imm1-imm2);
2290 output_w32(0x12400000|rd_rn_rm(rt,rt,0)|armval);
2291 }
2292 else {
2293 #ifdef ARMv5_ONLY
2294 emit_movimm(imm1,rt);
2295 add_literal((int)out,imm2);
2296 assem_debug("ldrne %s,pc+? [=%x]\n",regname[rt],imm2);
2297 output_w32(0x15900000|rd_rn_rm(rt,15,0));
2298 #else
2299 emit_movw(imm1&0x0000FFFF,rt);
2300 if((imm1&0xFFFF)!=(imm2&0xFFFF)) {
2301 assem_debug("movwne %s,#%d (0x%x)\n",regname[rt],imm2&0xFFFF,imm2&0xFFFF);
2302 output_w32(0x13000000|rd_rn_rm(rt,0,0)|(imm2&0xfff)|((imm2<<4)&0xf0000));
2303 }
2304 emit_movt(imm1&0xFFFF0000,rt);
2305 if((imm1&0xFFFF0000)!=(imm2&0xFFFF0000)) {
2306 assem_debug("movtne %s,#%d (0x%x)\n",regname[rt],imm2&0xffff0000,imm2&0xffff0000);
2307 output_w32(0x13400000|rd_rn_rm(rt,0,0)|((imm2>>16)&0xfff)|((imm2>>12)&0xf0000));
2308 }
2309 #endif
2310 }
2311}
2312
2313// special case for checking invalid_code
2314void emit_cmpmem_indexedsr12_imm(int addr,int r,int imm)
2315{
2316 assert(0);
2317}
2318
2319// special case for checking invalid_code
2320void emit_cmpmem_indexedsr12_reg(int base,int r,int imm)
2321{
2322 assert(imm<128&&imm>=0);
2323 assert(r>=0&&r<16);
2324 assem_debug("ldrb lr,%s,%s lsr #12\n",regname[base],regname[r]);
2325 output_w32(0xe7d00000|rd_rn_rm(HOST_TEMPREG,base,r)|0x620);
2326 emit_cmpimm(HOST_TEMPREG,imm);
2327}
2328
2329// special case for tlb mapping
2330void emit_addsr12(int rs1,int rs2,int rt)
2331{
2332 assem_debug("add %s,%s,%s lsr #12\n",regname[rt],regname[rs1],regname[rs2]);
2333 output_w32(0xe0800620|rd_rn_rm(rt,rs1,rs2));
2334}
2335
0bbd1454 2336void emit_callne(int a)
2337{
2338 assem_debug("blne %x\n",a);
2339 u_int offset=genjmp(a);
2340 output_w32(0x1b000000|offset);
2341}
2342
57871462 2343// Used to preload hash table entries
2344void emit_prefetch(void *addr)
2345{
2346 assem_debug("prefetch %x\n",(int)addr);
2347 output_byte(0x0F);
2348 output_byte(0x18);
2349 output_modrm(0,5,1);
2350 output_w32((int)addr);
2351}
2352void emit_prefetchreg(int r)
2353{
2354 assem_debug("pld %s\n",regname[r]);
2355 output_w32(0xf5d0f000|rd_rn_rm(0,r,0));
2356}
2357
2358// Special case for mini_ht
2359void emit_ldreq_indexed(int rs, u_int offset, int rt)
2360{
2361 assert(offset<4096);
2362 assem_debug("ldreq %s,[%s, #%d]\n",regname[rt],regname[rs],offset);
2363 output_w32(0x05900000|rd_rn_rm(rt,rs,0)|offset);
2364}
2365
2366void emit_flds(int r,int sr)
2367{
2368 assem_debug("flds s%d,[%s]\n",sr,regname[r]);
2369 output_w32(0xed900a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2370}
2371
2372void emit_vldr(int r,int vr)
2373{
2374 assem_debug("vldr d%d,[%s]\n",vr,regname[r]);
2375 output_w32(0xed900b00|(vr<<12)|(r<<16));
2376}
2377
2378void emit_fsts(int sr,int r)
2379{
2380 assem_debug("fsts s%d,[%s]\n",sr,regname[r]);
2381 output_w32(0xed800a00|((sr&14)<<11)|((sr&1)<<22)|(r<<16));
2382}
2383
2384void emit_vstr(int vr,int r)
2385{
2386 assem_debug("vstr d%d,[%s]\n",vr,regname[r]);
2387 output_w32(0xed800b00|(vr<<12)|(r<<16));
2388}
2389
2390void emit_ftosizs(int s,int d)
2391{
2392 assem_debug("ftosizs s%d,s%d\n",d,s);
2393 output_w32(0xeebd0ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2394}
2395
2396void emit_ftosizd(int s,int d)
2397{
2398 assem_debug("ftosizd s%d,d%d\n",d,s);
2399 output_w32(0xeebd0bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2400}
2401
2402void emit_fsitos(int s,int d)
2403{
2404 assem_debug("fsitos s%d,s%d\n",d,s);
2405 output_w32(0xeeb80ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2406}
2407
2408void emit_fsitod(int s,int d)
2409{
2410 assem_debug("fsitod d%d,s%d\n",d,s);
2411 output_w32(0xeeb80bc0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2412}
2413
2414void emit_fcvtds(int s,int d)
2415{
2416 assem_debug("fcvtds d%d,s%d\n",d,s);
2417 output_w32(0xeeb70ac0|((d&7)<<12)|((s&14)>>1)|((s&1)<<5));
2418}
2419
2420void emit_fcvtsd(int s,int d)
2421{
2422 assem_debug("fcvtsd s%d,d%d\n",d,s);
2423 output_w32(0xeeb70bc0|((d&14)<<11)|((d&1)<<22)|(s&7));
2424}
2425
2426void emit_fsqrts(int s,int d)
2427{
2428 assem_debug("fsqrts d%d,s%d\n",d,s);
2429 output_w32(0xeeb10ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2430}
2431
2432void emit_fsqrtd(int s,int d)
2433{
2434 assem_debug("fsqrtd s%d,d%d\n",d,s);
2435 output_w32(0xeeb10bc0|((d&7)<<12)|(s&7));
2436}
2437
2438void emit_fabss(int s,int d)
2439{
2440 assem_debug("fabss d%d,s%d\n",d,s);
2441 output_w32(0xeeb00ac0|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2442}
2443
2444void emit_fabsd(int s,int d)
2445{
2446 assem_debug("fabsd s%d,d%d\n",d,s);
2447 output_w32(0xeeb00bc0|((d&7)<<12)|(s&7));
2448}
2449
2450void emit_fnegs(int s,int d)
2451{
2452 assem_debug("fnegs d%d,s%d\n",d,s);
2453 output_w32(0xeeb10a40|((d&14)<<11)|((d&1)<<22)|((s&14)>>1)|((s&1)<<5));
2454}
2455
2456void emit_fnegd(int s,int d)
2457{
2458 assem_debug("fnegd s%d,d%d\n",d,s);
2459 output_w32(0xeeb10b40|((d&7)<<12)|(s&7));
2460}
2461
2462void emit_fadds(int s1,int s2,int d)
2463{
2464 assem_debug("fadds s%d,s%d,s%d\n",d,s1,s2);
2465 output_w32(0xee300a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2466}
2467
2468void emit_faddd(int s1,int s2,int d)
2469{
2470 assem_debug("faddd d%d,d%d,d%d\n",d,s1,s2);
2471 output_w32(0xee300b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2472}
2473
2474void emit_fsubs(int s1,int s2,int d)
2475{
2476 assem_debug("fsubs s%d,s%d,s%d\n",d,s1,s2);
2477 output_w32(0xee300a40|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2478}
2479
2480void emit_fsubd(int s1,int s2,int d)
2481{
2482 assem_debug("fsubd d%d,d%d,d%d\n",d,s1,s2);
2483 output_w32(0xee300b40|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2484}
2485
2486void emit_fmuls(int s1,int s2,int d)
2487{
2488 assem_debug("fmuls s%d,s%d,s%d\n",d,s1,s2);
2489 output_w32(0xee200a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2490}
2491
2492void emit_fmuld(int s1,int s2,int d)
2493{
2494 assem_debug("fmuld d%d,d%d,d%d\n",d,s1,s2);
2495 output_w32(0xee200b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2496}
2497
2498void emit_fdivs(int s1,int s2,int d)
2499{
2500 assem_debug("fdivs s%d,s%d,s%d\n",d,s1,s2);
2501 output_w32(0xee800a00|((d&14)<<11)|((d&1)<<22)|((s1&14)<<15)|((s1&1)<<7)|((s2&14)>>1)|((s2&1)<<5));
2502}
2503
2504void emit_fdivd(int s1,int s2,int d)
2505{
2506 assem_debug("fdivd d%d,d%d,d%d\n",d,s1,s2);
2507 output_w32(0xee800b00|((d&7)<<12)|((s1&7)<<16)|(s2&7));
2508}
2509
2510void emit_fcmps(int x,int y)
2511{
2512 assem_debug("fcmps s14, s15\n");
2513 output_w32(0xeeb47a67);
2514}
2515
2516void emit_fcmpd(int x,int y)
2517{
2518 assem_debug("fcmpd d6, d7\n");
2519 output_w32(0xeeb46b47);
2520}
2521
2522void emit_fmstat()
2523{
2524 assem_debug("fmstat\n");
2525 output_w32(0xeef1fa10);
2526}
2527
2528void emit_bicne_imm(int rs,int imm,int rt)
2529{
2530 u_int armval;
cfbd3c6e 2531 genimm_checked(imm,&armval);
57871462 2532 assem_debug("bicne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2533 output_w32(0x13c00000|rd_rn_rm(rt,rs,0)|armval);
2534}
2535
2536void emit_biccs_imm(int rs,int imm,int rt)
2537{
2538 u_int armval;
cfbd3c6e 2539 genimm_checked(imm,&armval);
57871462 2540 assem_debug("biccs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2541 output_w32(0x23c00000|rd_rn_rm(rt,rs,0)|armval);
2542}
2543
2544void emit_bicvc_imm(int rs,int imm,int rt)
2545{
2546 u_int armval;
cfbd3c6e 2547 genimm_checked(imm,&armval);
57871462 2548 assem_debug("bicvc %s,%s,#%d\n",regname[rt],regname[rs],imm);
2549 output_w32(0x73c00000|rd_rn_rm(rt,rs,0)|armval);
2550}
2551
2552void emit_bichi_imm(int rs,int imm,int rt)
2553{
2554 u_int armval;
cfbd3c6e 2555 genimm_checked(imm,&armval);
57871462 2556 assem_debug("bichi %s,%s,#%d\n",regname[rt],regname[rs],imm);
2557 output_w32(0x83c00000|rd_rn_rm(rt,rs,0)|armval);
2558}
2559
2560void emit_orrvs_imm(int rs,int imm,int rt)
2561{
2562 u_int armval;
cfbd3c6e 2563 genimm_checked(imm,&armval);
57871462 2564 assem_debug("orrvs %s,%s,#%d\n",regname[rt],regname[rs],imm);
2565 output_w32(0x63800000|rd_rn_rm(rt,rs,0)|armval);
2566}
2567
b9b61529 2568void emit_orrne_imm(int rs,int imm,int rt)
2569{
2570 u_int armval;
cfbd3c6e 2571 genimm_checked(imm,&armval);
b9b61529 2572 assem_debug("orrne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2573 output_w32(0x13800000|rd_rn_rm(rt,rs,0)|armval);
2574}
2575
2576void emit_andne_imm(int rs,int imm,int rt)
2577{
2578 u_int armval;
cfbd3c6e 2579 genimm_checked(imm,&armval);
b9b61529 2580 assem_debug("andne %s,%s,#%d\n",regname[rt],regname[rs],imm);
2581 output_w32(0x12000000|rd_rn_rm(rt,rs,0)|armval);
2582}
2583
57871462 2584void emit_jno_unlikely(int a)
2585{
2586 //emit_jno(a);
2587 assem_debug("addvc pc,pc,#? (%x)\n",/*a-(int)out-8,*/a);
2588 output_w32(0x72800000|rd_rn_rm(15,15,0));
2589}
2590
054175e9 2591static void save_regs_all(u_int reglist)
57871462 2592{
054175e9 2593 int i;
57871462 2594 if(!reglist) return;
2595 assem_debug("stmia fp,{");
054175e9 2596 for(i=0;i<16;i++)
2597 if(reglist&(1<<i))
2598 assem_debug("r%d,",i);
57871462 2599 assem_debug("}\n");
2600 output_w32(0xe88b0000|reglist);
2601}
054175e9 2602static void restore_regs_all(u_int reglist)
57871462 2603{
054175e9 2604 int i;
57871462 2605 if(!reglist) return;
2606 assem_debug("ldmia fp,{");
054175e9 2607 for(i=0;i<16;i++)
2608 if(reglist&(1<<i))
2609 assem_debug("r%d,",i);
57871462 2610 assem_debug("}\n");
2611 output_w32(0xe89b0000|reglist);
2612}
054175e9 2613// Save registers before function call
2614static void save_regs(u_int reglist)
2615{
2616 reglist&=0x100f; // only save the caller-save registers, r0-r3, r12
2617 save_regs_all(reglist);
2618}
2619// Restore registers after function call
2620static void restore_regs(u_int reglist)
2621{
2622 reglist&=0x100f; // only restore the caller-save registers, r0-r3, r12
2623 restore_regs_all(reglist);
2624}
57871462 2625
2626// Write back consts using r14 so we don't disturb the other registers
2627void wb_consts(signed char i_regmap[],uint64_t i_is32,u_int i_dirty,int i)
2628{
2629 int hr;
2630 for(hr=0;hr<HOST_REGS;hr++) {
2631 if(hr!=EXCLUDE_REG&&i_regmap[hr]>=0&&((i_dirty>>hr)&1)) {
2632 if(((regs[i].isconst>>hr)&1)&&i_regmap[hr]>0) {
2633 if(i_regmap[hr]<64 || !((i_is32>>(i_regmap[hr]&63))&1) ) {
2634 int value=constmap[i][hr];
2635 if(value==0) {
2636 emit_zeroreg(HOST_TEMPREG);
2637 }
2638 else {
2639 emit_movimm(value,HOST_TEMPREG);
2640 }
2641 emit_storereg(i_regmap[hr],HOST_TEMPREG);
24385cae 2642#ifndef FORCE32
57871462 2643 if((i_is32>>i_regmap[hr])&1) {
2644 if(value!=-1&&value!=0) emit_sarimm(HOST_TEMPREG,31,HOST_TEMPREG);
2645 emit_storereg(i_regmap[hr]|64,HOST_TEMPREG);
2646 }
24385cae 2647#endif
57871462 2648 }
2649 }
2650 }
2651 }
2652}
2653
2654/* Stubs/epilogue */
2655
2656void literal_pool(int n)
2657{
2658 if(!literalcount) return;
2659 if(n) {
2660 if((int)out-literals[0][0]<4096-n) return;
2661 }
2662 u_int *ptr;
2663 int i;
2664 for(i=0;i<literalcount;i++)
2665 {
77750690 2666 u_int l_addr=(u_int)out;
2667 int j;
2668 for(j=0;j<i;j++) {
2669 if(literals[j][1]==literals[i][1]) {
2670 //printf("dup %08x\n",literals[i][1]);
2671 l_addr=literals[j][0];
2672 break;
2673 }
2674 }
57871462 2675 ptr=(u_int *)literals[i][0];
77750690 2676 u_int offset=l_addr-(u_int)ptr-8;
57871462 2677 assert(offset<4096);
2678 assert(!(offset&3));
2679 *ptr|=offset;
77750690 2680 if(l_addr==(u_int)out) {
2681 literals[i][0]=l_addr; // remember for dupes
2682 output_w32(literals[i][1]);
2683 }
57871462 2684 }
2685 literalcount=0;
2686}
2687
2688void literal_pool_jumpover(int n)
2689{
2690 if(!literalcount) return;
2691 if(n) {
2692 if((int)out-literals[0][0]<4096-n) return;
2693 }
2694 int jaddr=(int)out;
2695 emit_jmp(0);
2696 literal_pool(0);
2697 set_jump_target(jaddr,(int)out);
2698}
2699
2700emit_extjump2(int addr, int target, int linker)
2701{
2702 u_char *ptr=(u_char *)addr;
2703 assert((ptr[3]&0x0e)==0xa);
2704 emit_loadlp(target,0);
2705 emit_loadlp(addr,1);
24385cae 2706 assert(addr>=BASE_ADDR&&addr<(BASE_ADDR+(1<<TARGET_SIZE_2)));
57871462 2707 //assert((target>=0x80000000&&target<0x80800000)||(target>0xA4000000&&target<0xA4001000));
2708//DEBUG >
2709#ifdef DEBUG_CYCLE_COUNT
2710 emit_readword((int)&last_count,ECX);
2711 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2712 emit_readword((int)&next_interupt,ECX);
2713 emit_writeword(HOST_CCREG,(int)&Count);
2714 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
2715 emit_writeword(ECX,(int)&last_count);
2716#endif
2717//DEBUG <
2718 emit_jmp(linker);
2719}
2720
2721emit_extjump(int addr, int target)
2722{
2723 emit_extjump2(addr, target, (int)dyna_linker);
2724}
2725emit_extjump_ds(int addr, int target)
2726{
2727 emit_extjump2(addr, target, (int)dyna_linker_ds);
2728}
2729
13e35c04 2730// put rt_val into rt, potentially making use of rs with value rs_val
2731static void emit_movimm_from(u_int rs_val,int rs,u_int rt_val,int rt)
2732{
8575a877 2733 u_int armval;
2734 int diff;
2735 if(genimm(rt_val,&armval)) {
2736 assem_debug("mov %s,#%d\n",regname[rt],rt_val);
2737 output_w32(0xe3a00000|rd_rn_rm(rt,0,0)|armval);
2738 return;
2739 }
2740 if(genimm(~rt_val,&armval)) {
2741 assem_debug("mvn %s,#%d\n",regname[rt],rt_val);
2742 output_w32(0xe3e00000|rd_rn_rm(rt,0,0)|armval);
2743 return;
2744 }
2745 diff=rt_val-rs_val;
2746 if(genimm(diff,&armval)) {
2747 assem_debug("add %s,%s,#%d\n",regname[rt],regname[rs],diff);
2748 output_w32(0xe2800000|rd_rn_rm(rt,rs,0)|armval);
2749 return;
2750 }else if(genimm(-diff,&armval)) {
2751 assem_debug("sub %s,%s,#%d\n",regname[rt],regname[rs],-diff);
2752 output_w32(0xe2400000|rd_rn_rm(rt,rs,0)|armval);
2753 return;
2754 }
2755 emit_movimm(rt_val,rt);
2756}
2757
2758// return 1 if above function can do it's job cheaply
2759static int is_similar_value(u_int v1,u_int v2)
2760{
13e35c04 2761 u_int xs;
8575a877 2762 int diff;
2763 if(v1==v2) return 1;
2764 diff=v2-v1;
2765 for(xs=diff;xs!=0&&(xs&3)==0;xs>>=2)
13e35c04 2766 ;
8575a877 2767 if(xs<0x100) return 1;
2768 for(xs=-diff;xs!=0&&(xs&3)==0;xs>>=2)
2769 ;
2770 if(xs<0x100) return 1;
2771 return 0;
13e35c04 2772}
cbbab9cd 2773
b96d3df7 2774// trashes r2
2775static void pass_args(int a0, int a1)
2776{
2777 if(a0==1&&a1==0) {
2778 // must swap
2779 emit_mov(a0,2); emit_mov(a1,1); emit_mov(2,0);
2780 }
2781 else if(a0!=0&&a1==0) {
2782 emit_mov(a1,1);
2783 if (a0>=0) emit_mov(a0,0);
2784 }
2785 else {
2786 if(a0>=0&&a0!=0) emit_mov(a0,0);
2787 if(a1>=0&&a1!=1) emit_mov(a1,1);
2788 }
2789}
2790
b1be1eee 2791static void mov_loadtype_adj(int type,int rs,int rt)
2792{
2793 switch(type) {
2794 case LOADB_STUB: emit_signextend8(rs,rt); break;
2795 case LOADBU_STUB: emit_andimm(rs,0xff,rt); break;
2796 case LOADH_STUB: emit_signextend16(rs,rt); break;
2797 case LOADHU_STUB: emit_andimm(rs,0xffff,rt); break;
2798 case LOADW_STUB: if(rs!=rt) emit_mov(rs,rt); break;
2799 default: assert(0);
2800 }
2801}
2802
2803#ifdef PCSX
2804#include "pcsxmem.h"
2805#include "pcsxmem_inline.c"
2806#endif
2807
57871462 2808do_readstub(int n)
2809{
2810 assem_debug("do_readstub %x\n",start+stubs[n][3]*4);
2811 literal_pool(256);
2812 set_jump_target(stubs[n][1],(int)out);
2813 int type=stubs[n][0];
2814 int i=stubs[n][3];
2815 int rs=stubs[n][4];
2816 struct regstat *i_regs=(struct regstat *)stubs[n][5];
2817 u_int reglist=stubs[n][7];
2818 signed char *i_regmap=i_regs->regmap;
2819 int addr=get_reg(i_regmap,AGEN1+(i&1));
2820 int rth,rt;
2821 int ds;
b9b61529 2822 if(itype[i]==C1LS||itype[i]==C2LS||itype[i]==LOADLR) {
57871462 2823 rth=get_reg(i_regmap,FTEMP|64);
2824 rt=get_reg(i_regmap,FTEMP);
2825 }else{
2826 rth=get_reg(i_regmap,rt1[i]|64);
2827 rt=get_reg(i_regmap,rt1[i]);
2828 }
2829 assert(rs>=0);
c6c3b1b3 2830#ifdef PCSX
2831 int r,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0;
2832 reglist|=(1<<rs);
2833 for(r=0;r<=12;r++) {
2834 if(((1<<r)&0x13ff)&&((1<<r)&reglist)==0) {
2835 temp=r; break;
2836 }
2837 }
2838 if(rt>=0)
2839 reglist&=~(1<<rt);
2840 if(temp==-1) {
2841 save_regs(reglist);
2842 regs_saved=1;
2843 temp=(rs==0)?2:0;
2844 }
2845 if((regs_saved||(reglist&2)==0)&&temp!=1&&rs!=1)
2846 temp2=1;
2847 emit_readword((int)&mem_rtab,temp);
2848 emit_shrimm(rs,12,temp2);
2849 emit_readword_dualindexedx4(temp,temp2,temp2);
2850 emit_lsls_imm(temp2,1,temp2);
2851 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2852 switch(type) {
2853 case LOADB_STUB: emit_ldrccsb_dualindexed(temp2,rs,rt); break;
2854 case LOADBU_STUB: emit_ldrccb_dualindexed(temp2,rs,rt); break;
2855 case LOADH_STUB: emit_ldrccsh_dualindexed(temp2,rs,rt); break;
2856 case LOADHU_STUB: emit_ldrcch_dualindexed(temp2,rs,rt); break;
2857 case LOADW_STUB: emit_ldrcc_dualindexed(temp2,rs,rt); break;
2858 }
2859 }
2860 if(regs_saved) {
2861 restore_jump=(int)out;
2862 emit_jcc(0); // jump to reg restore
2863 }
2864 else
2865 emit_jcc(stubs[n][2]); // return address
2866
2867 if(!regs_saved)
2868 save_regs(reglist);
2869 int handler=0;
2870 if(type==LOADB_STUB||type==LOADBU_STUB)
2871 handler=(int)jump_handler_read8;
2872 if(type==LOADH_STUB||type==LOADHU_STUB)
2873 handler=(int)jump_handler_read16;
2874 if(type==LOADW_STUB)
2875 handler=(int)jump_handler_read32;
2876 assert(handler!=0);
b96d3df7 2877 pass_args(rs,temp2);
c6c3b1b3 2878 int cc=get_reg(i_regmap,CCREG);
2879 if(cc<0)
2880 emit_loadreg(CCREG,2);
2573466a 2881 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
c6c3b1b3 2882 emit_call(handler);
2883 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
b1be1eee 2884 mov_loadtype_adj(type,0,rt);
c6c3b1b3 2885 }
2886 if(restore_jump)
2887 set_jump_target(restore_jump,(int)out);
2888 restore_regs(reglist);
2889 emit_jmp(stubs[n][2]); // return address
2890#else // !PCSX
57871462 2891 if(addr<0) addr=rt;
535d208a 2892 if(addr<0&&itype[i]!=C1LS&&itype[i]!=C2LS&&itype[i]!=LOADLR) addr=get_reg(i_regmap,-1);
57871462 2893 assert(addr>=0);
2894 int ftable=0;
2895 if(type==LOADB_STUB||type==LOADBU_STUB)
2896 ftable=(int)readmemb;
2897 if(type==LOADH_STUB||type==LOADHU_STUB)
2898 ftable=(int)readmemh;
2899 if(type==LOADW_STUB)
2900 ftable=(int)readmem;
24385cae 2901#ifndef FORCE32
57871462 2902 if(type==LOADD_STUB)
2903 ftable=(int)readmemd;
24385cae 2904#endif
2905 assert(ftable!=0);
57871462 2906 emit_writeword(rs,(int)&address);
2907 //emit_pusha();
2908 save_regs(reglist);
97a238a6 2909#ifndef PCSX
57871462 2910 ds=i_regs!=&regs[i];
2911 int real_rs=(itype[i]==LOADLR)?-1:get_reg(i_regmap,rs1[i]);
2912 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
2913 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
2914 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
2915 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 2916#endif
57871462 2917 emit_shrimm(rs,16,1);
2918 int cc=get_reg(i_regmap,CCREG);
2919 if(cc<0) {
2920 emit_loadreg(CCREG,2);
2921 }
2922 emit_movimm(ftable,0);
2923 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 2924#ifndef PCSX
57871462 2925 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 2926#endif
57871462 2927 //emit_readword((int)&last_count,temp);
2928 //emit_add(cc,temp,cc);
2929 //emit_writeword(cc,(int)&Count);
2930 //emit_mov(15,14);
2931 emit_call((int)&indirect_jump_indexed);
2932 //emit_callreg(rs);
2933 //emit_readword_dualindexedx4(rs,HOST_TEMPREG,15);
f51dc36c 2934#ifndef PCSX
57871462 2935 // We really shouldn't need to update the count here,
2936 // but not doing so causes random crashes...
2937 emit_readword((int)&Count,HOST_TEMPREG);
2938 emit_readword((int)&next_interupt,2);
2939 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
2940 emit_writeword(2,(int)&last_count);
2941 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
2942 if(cc<0) {
2943 emit_storereg(CCREG,HOST_TEMPREG);
2944 }
f51dc36c 2945#endif
57871462 2946 //emit_popa();
2947 restore_regs(reglist);
2948 //if((cc=get_reg(regmap,CCREG))>=0) {
2949 // emit_loadreg(CCREG,cc);
2950 //}
f18c0f46 2951 if(itype[i]==C1LS||itype[i]==C2LS||(rt>=0&&rt1[i]!=0)) {
2952 assert(rt>=0);
2953 if(type==LOADB_STUB)
2954 emit_movsbl((int)&readmem_dword,rt);
2955 if(type==LOADBU_STUB)
2956 emit_movzbl((int)&readmem_dword,rt);
2957 if(type==LOADH_STUB)
2958 emit_movswl((int)&readmem_dword,rt);
2959 if(type==LOADHU_STUB)
2960 emit_movzwl((int)&readmem_dword,rt);
2961 if(type==LOADW_STUB)
2962 emit_readword((int)&readmem_dword,rt);
2963 if(type==LOADD_STUB) {
2964 emit_readword((int)&readmem_dword,rt);
2965 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
2966 }
57871462 2967 }
2968 emit_jmp(stubs[n][2]); // return address
c6c3b1b3 2969#endif // !PCSX
57871462 2970}
2971
c6c3b1b3 2972#ifdef PCSX
2973// return memhandler, or get directly accessable address and return 0
2974u_int get_direct_memhandler(void *table,u_int addr,int type,u_int *addr_host)
2975{
2976 u_int l1,l2=0;
2977 l1=((u_int *)table)[addr>>12];
2978 if((l1&(1<<31))==0) {
2979 u_int v=l1<<1;
2980 *addr_host=v+addr;
2981 return 0;
2982 }
2983 else {
2984 l1<<=1;
2985 if(type==LOADB_STUB||type==LOADBU_STUB||type==STOREB_STUB)
2986 l2=((u_int *)l1)[0x1000/4 + 0x1000/2 + (addr&0xfff)];
b96d3df7 2987 else if(type==LOADH_STUB||type==LOADHU_STUB||type==STOREH_STUB)
c6c3b1b3 2988 l2=((u_int *)l1)[0x1000/4 + (addr&0xfff)/2];
2989 else
2990 l2=((u_int *)l1)[(addr&0xfff)/4];
2991 if((l2&(1<<31))==0) {
2992 u_int v=l2<<1;
2993 *addr_host=v+(addr&0xfff);
2994 return 0;
2995 }
2996 return l2<<1;
2997 }
2998}
2999#endif
3000
57871462 3001inline_readstub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3002{
3003 int rs=get_reg(regmap,target);
3004 int rth=get_reg(regmap,target|64);
3005 int rt=get_reg(regmap,target);
535d208a 3006 if(rs<0) rs=get_reg(regmap,-1);
57871462 3007 assert(rs>=0);
c6c3b1b3 3008#ifdef PCSX
b1be1eee 3009 u_int handler,host_addr=0,is_dynamic,far_call=0;
3010 int cc=get_reg(regmap,CCREG);
3011 if(pcsx_direct_read(type,addr,CLOCK_ADJUST(adj+1),cc,target?rs:-1,rt))
3012 return;
c6c3b1b3 3013 handler=get_direct_memhandler(mem_rtab,addr,type,&host_addr);
3014 if (handler==0) {
3015 if(rt<0)
3016 return;
13e35c04 3017 if(addr!=host_addr)
3018 emit_movimm_from(addr,rs,host_addr,rs);
c6c3b1b3 3019 switch(type) {
3020 case LOADB_STUB: emit_movsbl_indexed(0,rs,rt); break;
3021 case LOADBU_STUB: emit_movzbl_indexed(0,rs,rt); break;
3022 case LOADH_STUB: emit_movswl_indexed(0,rs,rt); break;
3023 case LOADHU_STUB: emit_movzwl_indexed(0,rs,rt); break;
3024 case LOADW_STUB: emit_readword_indexed(0,rs,rt); break;
3025 default: assert(0);
3026 }
3027 return;
3028 }
b1be1eee 3029 is_dynamic=pcsxmem_is_handler_dynamic(addr);
3030 if(is_dynamic) {
3031 if(type==LOADB_STUB||type==LOADBU_STUB)
3032 handler=(int)jump_handler_read8;
3033 if(type==LOADH_STUB||type==LOADHU_STUB)
3034 handler=(int)jump_handler_read16;
3035 if(type==LOADW_STUB)
3036 handler=(int)jump_handler_read32;
3037 }
c6c3b1b3 3038
3039 // call a memhandler
3040 if(rt>=0)
3041 reglist&=~(1<<rt);
3042 save_regs(reglist);
3043 if(target==0)
3044 emit_movimm(addr,0);
3045 else if(rs!=0)
3046 emit_mov(rs,0);
c6c3b1b3 3047 int offset=(int)handler-(int)out-8;
3048 if(offset<-33554432||offset>=33554432) {
3049 // unreachable memhandler, a plugin func perhaps
b1be1eee 3050 emit_movimm(handler,12);
3051 far_call=1;
3052 }
3053 if(cc<0)
3054 emit_loadreg(CCREG,2);
3055 if(is_dynamic) {
3056 emit_movimm(((u_int *)mem_rtab)[addr>>12]<<1,1);
3057 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
c6c3b1b3 3058 }
b1be1eee 3059 else {
3060 emit_readword((int)&last_count,3);
3061 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
3062 emit_add(2,3,2);
3063 emit_writeword(2,(int)&Count);
3064 }
3065
3066 if(far_call)
3067 emit_callreg(12);
c6c3b1b3 3068 else
3069 emit_call(handler);
b1be1eee 3070
c6c3b1b3 3071 if(rt>=0) {
3072 switch(type) {
3073 case LOADB_STUB: emit_signextend8(0,rt); break;
3074 case LOADBU_STUB: emit_andimm(0,0xff,rt); break;
3075 case LOADH_STUB: emit_signextend16(0,rt); break;
3076 case LOADHU_STUB: emit_andimm(0,0xffff,rt); break;
3077 case LOADW_STUB: if(rt!=0) emit_mov(0,rt); break;
3078 default: assert(0);
3079 }
3080 }
3081 restore_regs(reglist);
3082#else // if !PCSX
57871462 3083 int ftable=0;
3084 if(type==LOADB_STUB||type==LOADBU_STUB)
3085 ftable=(int)readmemb;
3086 if(type==LOADH_STUB||type==LOADHU_STUB)
3087 ftable=(int)readmemh;
3088 if(type==LOADW_STUB)
3089 ftable=(int)readmem;
24385cae 3090#ifndef FORCE32
57871462 3091 if(type==LOADD_STUB)
3092 ftable=(int)readmemd;
24385cae 3093#endif
3094 assert(ftable!=0);
fd99c415 3095 if(target==0)
3096 emit_movimm(addr,rs);
57871462 3097 emit_writeword(rs,(int)&address);
3098 //emit_pusha();
3099 save_regs(reglist);
0c1fe38b 3100#ifndef PCSX
3101 if((signed int)addr>=(signed int)0xC0000000) {
3102 // Theoretically we can have a pagefault here, if the TLB has never
3103 // been enabled and the address is outside the range 80000000..BFFFFFFF
3104 // Write out the registers so the pagefault can be handled. This is
3105 // a very rare case and likely represents a bug.
3106 int ds=regmap!=regs[i].regmap;
3107 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3108 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3109 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3110 }
3111#endif
57871462 3112 //emit_shrimm(rs,16,1);
3113 int cc=get_reg(regmap,CCREG);
3114 if(cc<0) {
3115 emit_loadreg(CCREG,2);
3116 }
3117 //emit_movimm(ftable,0);
3118 emit_movimm(((u_int *)ftable)[addr>>16],0);
3119 //emit_readword((int)&last_count,12);
2573466a 3120 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3121#ifndef PCSX
57871462 3122 if((signed int)addr>=(signed int)0xC0000000) {
3123 // Pagefault address
3124 int ds=regmap!=regs[i].regmap;
3125 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3126 }
f51dc36c 3127#endif
57871462 3128 //emit_add(12,2,2);
3129 //emit_writeword(2,(int)&Count);
3130 //emit_call(((u_int *)ftable)[addr>>16]);
3131 emit_call((int)&indirect_jump);
f51dc36c 3132#ifndef PCSX
57871462 3133 // We really shouldn't need to update the count here,
3134 // but not doing so causes random crashes...
3135 emit_readword((int)&Count,HOST_TEMPREG);
3136 emit_readword((int)&next_interupt,2);
2573466a 3137 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3138 emit_writeword(2,(int)&last_count);
3139 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3140 if(cc<0) {
3141 emit_storereg(CCREG,HOST_TEMPREG);
3142 }
f51dc36c 3143#endif
57871462 3144 //emit_popa();
3145 restore_regs(reglist);
fd99c415 3146 if(rt>=0) {
3147 if(type==LOADB_STUB)
3148 emit_movsbl((int)&readmem_dword,rt);
3149 if(type==LOADBU_STUB)
3150 emit_movzbl((int)&readmem_dword,rt);
3151 if(type==LOADH_STUB)
3152 emit_movswl((int)&readmem_dword,rt);
3153 if(type==LOADHU_STUB)
3154 emit_movzwl((int)&readmem_dword,rt);
3155 if(type==LOADW_STUB)
3156 emit_readword((int)&readmem_dword,rt);
3157 if(type==LOADD_STUB) {
3158 emit_readword((int)&readmem_dword,rt);
3159 if(rth>=0) emit_readword(((int)&readmem_dword)+4,rth);
3160 }
57871462 3161 }
c6c3b1b3 3162#endif // !PCSX
57871462 3163}
3164
3165do_writestub(int n)
3166{
3167 assem_debug("do_writestub %x\n",start+stubs[n][3]*4);
3168 literal_pool(256);
3169 set_jump_target(stubs[n][1],(int)out);
3170 int type=stubs[n][0];
3171 int i=stubs[n][3];
3172 int rs=stubs[n][4];
3173 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3174 u_int reglist=stubs[n][7];
3175 signed char *i_regmap=i_regs->regmap;
3176 int addr=get_reg(i_regmap,AGEN1+(i&1));
3177 int rth,rt,r;
3178 int ds;
b9b61529 3179 if(itype[i]==C1LS||itype[i]==C2LS) {
57871462 3180 rth=get_reg(i_regmap,FTEMP|64);
3181 rt=get_reg(i_regmap,r=FTEMP);
3182 }else{
3183 rth=get_reg(i_regmap,rs2[i]|64);
3184 rt=get_reg(i_regmap,r=rs2[i]);
3185 }
3186 assert(rs>=0);
3187 assert(rt>=0);
b96d3df7 3188#ifdef PCSX
3189 int rtmp,temp=-1,temp2=HOST_TEMPREG,regs_saved=0,restore_jump=0,ra;
3190 int reglist2=reglist|(1<<rs)|(1<<rt);
3191 for(rtmp=0;rtmp<=12;rtmp++) {
3192 if(((1<<rtmp)&0x13ff)&&((1<<rtmp)&reglist2)==0) {
3193 temp=rtmp; break;
3194 }
3195 }
3196 if(temp==-1) {
3197 save_regs(reglist);
3198 regs_saved=1;
3199 for(rtmp=0;rtmp<=3;rtmp++)
3200 if(rtmp!=rs&&rtmp!=rt)
3201 {temp=rtmp;break;}
3202 }
3203 if((regs_saved||(reglist2&8)==0)&&temp!=3&&rs!=3&&rt!=3)
3204 temp2=3;
3205 emit_readword((int)&mem_wtab,temp);
3206 emit_shrimm(rs,12,temp2);
3207 emit_readword_dualindexedx4(temp,temp2,temp2);
3208 emit_lsls_imm(temp2,1,temp2);
3209 switch(type) {
3210 case STOREB_STUB: emit_strccb_dualindexed(temp2,rs,rt); break;
3211 case STOREH_STUB: emit_strcch_dualindexed(temp2,rs,rt); break;
3212 case STOREW_STUB: emit_strcc_dualindexed(temp2,rs,rt); break;
3213 default: assert(0);
3214 }
3215 if(regs_saved) {
3216 restore_jump=(int)out;
3217 emit_jcc(0); // jump to reg restore
3218 }
3219 else
3220 emit_jcc(stubs[n][2]); // return address (invcode check)
3221
3222 if(!regs_saved)
3223 save_regs(reglist);
3224 int handler=0;
3225 switch(type) {
3226 case STOREB_STUB: handler=(int)jump_handler_write8; break;
3227 case STOREH_STUB: handler=(int)jump_handler_write16; break;
3228 case STOREW_STUB: handler=(int)jump_handler_write32; break;
3229 }
3230 assert(handler!=0);
3231 pass_args(rs,rt);
3232 if(temp2!=3)
3233 emit_mov(temp2,3);
3234 int cc=get_reg(i_regmap,CCREG);
3235 if(cc<0)
3236 emit_loadreg(CCREG,2);
2573466a 3237 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3238 // returns new cycle_count
3239 emit_call(handler);
2573466a 3240 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3241 if(cc<0)
3242 emit_storereg(CCREG,2);
3243 if(restore_jump)
3244 set_jump_target(restore_jump,(int)out);
3245 restore_regs(reglist);
3246 ra=stubs[n][2];
b96d3df7 3247 emit_jmp(ra);
3248#else // if !PCSX
57871462 3249 if(addr<0) addr=get_reg(i_regmap,-1);
3250 assert(addr>=0);
3251 int ftable=0;
3252 if(type==STOREB_STUB)
3253 ftable=(int)writememb;
3254 if(type==STOREH_STUB)
3255 ftable=(int)writememh;
3256 if(type==STOREW_STUB)
3257 ftable=(int)writemem;
24385cae 3258#ifndef FORCE32
57871462 3259 if(type==STORED_STUB)
3260 ftable=(int)writememd;
24385cae 3261#endif
3262 assert(ftable!=0);
57871462 3263 emit_writeword(rs,(int)&address);
3264 //emit_shrimm(rs,16,rs);
3265 //emit_movmem_indexedx4(ftable,rs,rs);
3266 if(type==STOREB_STUB)
3267 emit_writebyte(rt,(int)&byte);
3268 if(type==STOREH_STUB)
3269 emit_writehword(rt,(int)&hword);
3270 if(type==STOREW_STUB)
3271 emit_writeword(rt,(int)&word);
3272 if(type==STORED_STUB) {
3d624f89 3273#ifndef FORCE32
57871462 3274 emit_writeword(rt,(int)&dword);
3275 emit_writeword(r?rth:rt,(int)&dword+4);
3d624f89 3276#else
3277 printf("STORED_STUB\n");
3278#endif
57871462 3279 }
3280 //emit_pusha();
3281 save_regs(reglist);
97a238a6 3282#ifndef PCSX
57871462 3283 ds=i_regs!=&regs[i];
3284 int real_rs=get_reg(i_regmap,rs1[i]);
3285 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3286 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3287 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3288 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3289#endif
57871462 3290 emit_shrimm(rs,16,1);
3291 int cc=get_reg(i_regmap,CCREG);
3292 if(cc<0) {
3293 emit_loadreg(CCREG,2);
3294 }
3295 emit_movimm(ftable,0);
3296 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3297#ifndef PCSX
57871462 3298 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
f51dc36c 3299#endif
57871462 3300 //emit_readword((int)&last_count,temp);
3301 //emit_addimm(cc,2*stubs[n][5]+2,cc);
3302 //emit_add(cc,temp,cc);
3303 //emit_writeword(cc,(int)&Count);
3304 emit_call((int)&indirect_jump_indexed);
3305 //emit_callreg(rs);
3306 emit_readword((int)&Count,HOST_TEMPREG);
3307 emit_readword((int)&next_interupt,2);
3308 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3309 emit_writeword(2,(int)&last_count);
3310 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3311 if(cc<0) {
3312 emit_storereg(CCREG,HOST_TEMPREG);
3313 }
3314 //emit_popa();
3315 restore_regs(reglist);
3316 //if((cc=get_reg(regmap,CCREG))>=0) {
3317 // emit_loadreg(CCREG,cc);
3318 //}
3319 emit_jmp(stubs[n][2]); // return address
b96d3df7 3320#endif // !PCSX
57871462 3321}
3322
3323inline_writestub(int type, int i, u_int addr, signed char regmap[], int target, int adj, u_int reglist)
3324{
3325 int rs=get_reg(regmap,-1);
3326 int rth=get_reg(regmap,target|64);
3327 int rt=get_reg(regmap,target);
3328 assert(rs>=0);
3329 assert(rt>=0);
cbbab9cd 3330#ifdef PCSX
b96d3df7 3331 u_int handler,host_addr=0;
b96d3df7 3332 handler=get_direct_memhandler(mem_wtab,addr,type,&host_addr);
3333 if (handler==0) {
13e35c04 3334 if(addr!=host_addr)
3335 emit_movimm_from(addr,rs,host_addr,rs);
b96d3df7 3336 switch(type) {
3337 case STOREB_STUB: emit_writebyte_indexed(rt,0,rs); break;
3338 case STOREH_STUB: emit_writehword_indexed(rt,0,rs); break;
3339 case STOREW_STUB: emit_writeword_indexed(rt,0,rs); break;
3340 default: assert(0);
3341 }
3342 return;
3343 }
3344
3345 // call a memhandler
3346 save_regs(reglist);
13e35c04 3347 pass_args(rs,rt);
b96d3df7 3348 int cc=get_reg(regmap,CCREG);
3349 if(cc<0)
3350 emit_loadreg(CCREG,2);
2573466a 3351 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
b96d3df7 3352 emit_movimm(handler,3);
3353 // returns new cycle_count
3354 emit_call((int)jump_handler_write_h);
2573466a 3355 emit_addimm(0,-CLOCK_ADJUST(adj+1),cc<0?2:cc);
b96d3df7 3356 if(cc<0)
3357 emit_storereg(CCREG,2);
3358 restore_regs(reglist);
3359#else // if !pcsx
57871462 3360 int ftable=0;
3361 if(type==STOREB_STUB)
3362 ftable=(int)writememb;
3363 if(type==STOREH_STUB)
3364 ftable=(int)writememh;
3365 if(type==STOREW_STUB)
3366 ftable=(int)writemem;
24385cae 3367#ifndef FORCE32
57871462 3368 if(type==STORED_STUB)
3369 ftable=(int)writememd;
24385cae 3370#endif
3371 assert(ftable!=0);
57871462 3372 emit_writeword(rs,(int)&address);
3373 //emit_shrimm(rs,16,rs);
3374 //emit_movmem_indexedx4(ftable,rs,rs);
3375 if(type==STOREB_STUB)
3376 emit_writebyte(rt,(int)&byte);
3377 if(type==STOREH_STUB)
3378 emit_writehword(rt,(int)&hword);
3379 if(type==STOREW_STUB)
3380 emit_writeword(rt,(int)&word);
3381 if(type==STORED_STUB) {
3d624f89 3382#ifndef FORCE32
57871462 3383 emit_writeword(rt,(int)&dword);
3384 emit_writeword(target?rth:rt,(int)&dword+4);
3d624f89 3385#else
3386 printf("STORED_STUB\n");
3387#endif
57871462 3388 }
3389 //emit_pusha();
3390 save_regs(reglist);
0c1fe38b 3391#ifndef PCSX
3392 // rearmed note: load_all_consts prevents BIOS boot, some bug?
3393 if((signed int)addr>=(signed int)0xC0000000) {
3394 // Theoretically we can have a pagefault here, if the TLB has never
3395 // been enabled and the address is outside the range 80000000..BFFFFFFF
3396 // Write out the registers so the pagefault can be handled. This is
3397 // a very rare case and likely represents a bug.
3398 int ds=regmap!=regs[i].regmap;
3399 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3400 if(!ds) wb_dirtys(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty);
3401 else wb_dirtys(branch_regs[i-1].regmap_entry,branch_regs[i-1].was32,branch_regs[i-1].wasdirty);
3402 }
3403#endif
57871462 3404 //emit_shrimm(rs,16,1);
3405 int cc=get_reg(regmap,CCREG);
3406 if(cc<0) {
3407 emit_loadreg(CCREG,2);
3408 }
3409 //emit_movimm(ftable,0);
3410 emit_movimm(((u_int *)ftable)[addr>>16],0);
3411 //emit_readword((int)&last_count,12);
2573466a 3412 emit_addimm(cc<0?2:cc,CLOCK_ADJUST(adj+1),2);
f51dc36c 3413#ifndef PCSX
57871462 3414 if((signed int)addr>=(signed int)0xC0000000) {
3415 // Pagefault address
3416 int ds=regmap!=regs[i].regmap;
3417 emit_movimm(start+i*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3418 }
f51dc36c 3419#endif
57871462 3420 //emit_add(12,2,2);
3421 //emit_writeword(2,(int)&Count);
3422 //emit_call(((u_int *)ftable)[addr>>16]);
3423 emit_call((int)&indirect_jump);
3424 emit_readword((int)&Count,HOST_TEMPREG);
3425 emit_readword((int)&next_interupt,2);
2573466a 3426 emit_addimm(HOST_TEMPREG,-CLOCK_ADJUST(adj+1),HOST_TEMPREG);
57871462 3427 emit_writeword(2,(int)&last_count);
3428 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3429 if(cc<0) {
3430 emit_storereg(CCREG,HOST_TEMPREG);
3431 }
3432 //emit_popa();
3433 restore_regs(reglist);
b96d3df7 3434#endif
57871462 3435}
3436
3437do_unalignedwritestub(int n)
3438{
b7918751 3439 assem_debug("do_unalignedwritestub %x\n",start+stubs[n][3]*4);
3440 literal_pool(256);
57871462 3441 set_jump_target(stubs[n][1],(int)out);
b7918751 3442
3443 int i=stubs[n][3];
3444 struct regstat *i_regs=(struct regstat *)stubs[n][4];
3445 int addr=stubs[n][5];
3446 u_int reglist=stubs[n][7];
3447 signed char *i_regmap=i_regs->regmap;
3448 int temp2=get_reg(i_regmap,FTEMP);
3449 int rt;
3450 int ds, real_rs;
3451 rt=get_reg(i_regmap,rs2[i]);
3452 assert(rt>=0);
3453 assert(addr>=0);
3454 assert(opcode[i]==0x2a||opcode[i]==0x2e); // SWL/SWR only implemented
3455 reglist|=(1<<addr);
3456 reglist&=~(1<<temp2);
3457
b96d3df7 3458#if 1
3459 // don't bother with it and call write handler
3460 save_regs(reglist);
3461 pass_args(addr,rt);
3462 int cc=get_reg(i_regmap,CCREG);
3463 if(cc<0)
3464 emit_loadreg(CCREG,2);
2573466a 3465 emit_addimm(cc<0?2:cc,CLOCK_ADJUST((int)stubs[n][6]+1),2);
b96d3df7 3466 emit_call((int)(opcode[i]==0x2a?jump_handle_swl:jump_handle_swr));
2573466a 3467 emit_addimm(0,-CLOCK_ADJUST((int)stubs[n][6]+1),cc<0?2:cc);
b96d3df7 3468 if(cc<0)
3469 emit_storereg(CCREG,2);
3470 restore_regs(reglist);
3471 emit_jmp(stubs[n][2]); // return address
3472#else
b7918751 3473 emit_andimm(addr,0xfffffffc,temp2);
3474 emit_writeword(temp2,(int)&address);
3475
3476 save_regs(reglist);
97a238a6 3477#ifndef PCSX
b7918751 3478 ds=i_regs!=&regs[i];
3479 real_rs=get_reg(i_regmap,rs1[i]);
3480 u_int cmask=ds?-1:(0x100f|~i_regs->wasconst);
3481 if(!ds) load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&0x100f,i);
3482 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty&cmask&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs)));
3483 if(!ds) wb_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty&~(1<<addr)&(real_rs<0?-1:~(1<<real_rs))&~0x100f,i);
97a238a6 3484#endif
b7918751 3485 emit_shrimm(addr,16,1);
3486 int cc=get_reg(i_regmap,CCREG);
3487 if(cc<0) {
3488 emit_loadreg(CCREG,2);
3489 }
3490 emit_movimm((u_int)readmem,0);
3491 emit_addimm(cc<0?2:cc,2*stubs[n][6]+2,2);
f51dc36c 3492#ifndef PCSX
3493 // pagefault address
3494 emit_movimm(start+stubs[n][3]*4+(((regs[i].was32>>rs1[i])&1)<<1)+ds,3);
3495#endif
b7918751 3496 emit_call((int)&indirect_jump_indexed);
3497 restore_regs(reglist);
3498
3499 emit_readword((int)&readmem_dword,temp2);
3500 int temp=addr; //hmh
3501 emit_shlimm(addr,3,temp);
3502 emit_andimm(temp,24,temp);
3503#ifdef BIG_ENDIAN_MIPS
3504 if (opcode[i]==0x2e) // SWR
3505#else
3506 if (opcode[i]==0x2a) // SWL
3507#endif
3508 emit_xorimm(temp,24,temp);
3509 emit_movimm(-1,HOST_TEMPREG);
55439448 3510 if (opcode[i]==0x2a) { // SWL
b7918751 3511 emit_bic_lsr(temp2,HOST_TEMPREG,temp,temp2);
3512 emit_orrshr(rt,temp,temp2);
3513 }else{
3514 emit_bic_lsl(temp2,HOST_TEMPREG,temp,temp2);
3515 emit_orrshl(rt,temp,temp2);
3516 }
3517 emit_readword((int)&address,addr);
3518 emit_writeword(temp2,(int)&word);
3519 //save_regs(reglist); // don't need to, no state changes
3520 emit_shrimm(addr,16,1);
3521 emit_movimm((u_int)writemem,0);
3522 //emit_call((int)&indirect_jump_indexed);
3523 emit_mov(15,14);
3524 emit_readword_dualindexedx4(0,1,15);
3525 emit_readword((int)&Count,HOST_TEMPREG);
3526 emit_readword((int)&next_interupt,2);
3527 emit_addimm(HOST_TEMPREG,-2*stubs[n][6]-2,HOST_TEMPREG);
3528 emit_writeword(2,(int)&last_count);
3529 emit_sub(HOST_TEMPREG,2,cc<0?HOST_TEMPREG:cc);
3530 if(cc<0) {
3531 emit_storereg(CCREG,HOST_TEMPREG);
3532 }
3533 restore_regs(reglist);
57871462 3534 emit_jmp(stubs[n][2]); // return address
b96d3df7 3535#endif
57871462 3536}
3537
3538void printregs(int edi,int esi,int ebp,int esp,int b,int d,int c,int a)
3539{
3540 printf("regs: %x %x %x %x %x %x %x (%x)\n",a,b,c,d,ebp,esi,edi,(&edi)[-1]);
3541}
3542
3543do_invstub(int n)
3544{
3545 literal_pool(20);
3546 u_int reglist=stubs[n][3];
3547 set_jump_target(stubs[n][1],(int)out);
3548 save_regs(reglist);
3549 if(stubs[n][4]!=0) emit_mov(stubs[n][4],0);
3550 emit_call((int)&invalidate_addr);
3551 restore_regs(reglist);
3552 emit_jmp(stubs[n][2]); // return address
3553}
3554
3555int do_dirty_stub(int i)
3556{
3557 assem_debug("do_dirty_stub %x\n",start+i*4);
ac545b3a 3558 u_int addr=(int)start<(int)0xC0000000?(u_int)source:(u_int)start;
3559 #ifdef PCSX
3560 addr=(u_int)source;
3561 #endif
57871462 3562 // Careful about the code output here, verify_dirty needs to parse it.
3563 #ifdef ARMv5_ONLY
ac545b3a 3564 emit_loadlp(addr,1);
57871462 3565 emit_loadlp((int)copy,2);
3566 emit_loadlp(slen*4,3);
3567 #else
ac545b3a 3568 emit_movw(addr&0x0000FFFF,1);
57871462 3569 emit_movw(((u_int)copy)&0x0000FFFF,2);
ac545b3a 3570 emit_movt(addr&0xFFFF0000,1);
57871462 3571 emit_movt(((u_int)copy)&0xFFFF0000,2);
3572 emit_movw(slen*4,3);
3573 #endif
3574 emit_movimm(start+i*4,0);
3575 emit_call((int)start<(int)0xC0000000?(int)&verify_code:(int)&verify_code_vm);
3576 int entry=(int)out;
3577 load_regs_entry(i);
3578 if(entry==(int)out) entry=instr_addr[i];
3579 emit_jmp(instr_addr[i]);
3580 return entry;
3581}
3582
3583void do_dirty_stub_ds()
3584{
3585 // Careful about the code output here, verify_dirty needs to parse it.
3586 #ifdef ARMv5_ONLY
3587 emit_loadlp((int)start<(int)0xC0000000?(int)source:(int)start,1);
3588 emit_loadlp((int)copy,2);
3589 emit_loadlp(slen*4,3);
3590 #else
3591 emit_movw(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0x0000FFFF,1);
3592 emit_movw(((u_int)copy)&0x0000FFFF,2);
3593 emit_movt(((int)start<(int)0xC0000000?(u_int)source:(u_int)start)&0xFFFF0000,1);
3594 emit_movt(((u_int)copy)&0xFFFF0000,2);
3595 emit_movw(slen*4,3);
3596 #endif
3597 emit_movimm(start+1,0);
3598 emit_call((int)&verify_code_ds);
3599}
3600
3601do_cop1stub(int n)
3602{
3603 literal_pool(256);
3604 assem_debug("do_cop1stub %x\n",start+stubs[n][3]*4);
3605 set_jump_target(stubs[n][1],(int)out);
3606 int i=stubs[n][3];
3d624f89 3607// int rs=stubs[n][4];
57871462 3608 struct regstat *i_regs=(struct regstat *)stubs[n][5];
3609 int ds=stubs[n][6];
3610 if(!ds) {
3611 load_all_consts(regs[i].regmap_entry,regs[i].was32,regs[i].wasdirty,i);
3612 //if(i_regs!=&regs[i]) printf("oops: regs[i]=%x i_regs=%x",(int)&regs[i],(int)i_regs);
3613 }
3614 //else {printf("fp exception in delay slot\n");}
3615 wb_dirtys(i_regs->regmap_entry,i_regs->was32,i_regs->wasdirty);
3616 if(regs[i].regmap_entry[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
3617 emit_movimm(start+(i-ds)*4,EAX); // Get PC
2573466a 3618 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG); // CHECK: is this right? There should probably be an extra cycle...
57871462 3619 emit_jmp(ds?(int)fp_exception_ds:(int)fp_exception);
3620}
3621
63cb0298 3622#ifndef DISABLE_TLB
3623
57871462 3624/* TLB */
3625
3626int do_tlb_r(int s,int ar,int map,int x,int a,int shift,int c,u_int addr)
3627{
3628 if(c) {
3629 if((signed int)addr>=(signed int)0xC0000000) {
3630 // address_generation already loaded the const
3631 emit_readword_dualindexedx4(FP,map,map);
3632 }
3633 else
3634 return -1; // No mapping
3635 }
3636 else {
3637 assert(s!=map);
3638 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3639 emit_addsr12(map,s,map);
3640 // Schedule this while we wait on the load
3641 //if(x) emit_xorimm(s,x,ar);
3642 if(shift>=0) emit_shlimm(s,3,shift);
3643 if(~a) emit_andimm(s,a,ar);
3644 emit_readword_dualindexedx4(FP,map,map);
3645 }
3646 return map;
3647}
3648int do_tlb_r_branch(int map, int c, u_int addr, int *jaddr)
3649{
3650 if(!c||(signed int)addr>=(signed int)0xC0000000) {
3651 emit_test(map,map);
3652 *jaddr=(int)out;
3653 emit_js(0);
3654 }
3655 return map;
3656}
3657
3658int gen_tlb_addr_r(int ar, int map) {
3659 if(map>=0) {
3660 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3661 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3662 }
3663}
3664
3665int do_tlb_w(int s,int ar,int map,int x,int c,u_int addr)
3666{
3667 if(c) {
3668 if(addr<0x80800000||addr>=0xC0000000) {
3669 // address_generation already loaded the const
3670 emit_readword_dualindexedx4(FP,map,map);
3671 }
3672 else
3673 return -1; // No mapping
3674 }
3675 else {
3676 assert(s!=map);
3677 emit_movimm(((int)memory_map-(int)&dynarec_local)>>2,map);
3678 emit_addsr12(map,s,map);
3679 // Schedule this while we wait on the load
3680 //if(x) emit_xorimm(s,x,ar);
3681 emit_readword_dualindexedx4(FP,map,map);
3682 }
3683 return map;
3684}
3685int do_tlb_w_branch(int map, int c, u_int addr, int *jaddr)
3686{
3687 if(!c||addr<0x80800000||addr>=0xC0000000) {
3688 emit_testimm(map,0x40000000);
3689 *jaddr=(int)out;
3690 emit_jne(0);
3691 }
3692}
3693
3694int gen_tlb_addr_w(int ar, int map) {
3695 if(map>=0) {
3696 assem_debug("add %s,%s,%s lsl #2\n",regname[ar],regname[ar],regname[map]);
3697 output_w32(0xe0800100|rd_rn_rm(ar,ar,map));
3698 }
3699}
3700
3701// Generate the address of the memory_map entry, relative to dynarec_local
3702generate_map_const(u_int addr,int reg) {
3703 //printf("generate_map_const(%x,%s)\n",addr,regname[reg]);
3704 emit_movimm((addr>>12)+(((u_int)memory_map-(u_int)&dynarec_local)>>2),reg);
3705}
3706
63cb0298 3707#else
3708
3709static int do_tlb_r() { return 0; }
3710static int do_tlb_r_branch() { return 0; }
3711static int gen_tlb_addr_r() { return 0; }
3712static int do_tlb_w() { return 0; }
3713static int do_tlb_w_branch() { return 0; }
3714static int gen_tlb_addr_w() { return 0; }
3715
3716#endif // DISABLE_TLB
3717
57871462 3718/* Special assem */
3719
3720void shift_assemble_arm(int i,struct regstat *i_regs)
3721{
3722 if(rt1[i]) {
3723 if(opcode2[i]<=0x07) // SLLV/SRLV/SRAV
3724 {
3725 signed char s,t,shift;
3726 t=get_reg(i_regs->regmap,rt1[i]);
3727 s=get_reg(i_regs->regmap,rs1[i]);
3728 shift=get_reg(i_regs->regmap,rs2[i]);
3729 if(t>=0){
3730 if(rs1[i]==0)
3731 {
3732 emit_zeroreg(t);
3733 }
3734 else if(rs2[i]==0)
3735 {
3736 assert(s>=0);
3737 if(s!=t) emit_mov(s,t);
3738 }
3739 else
3740 {
3741 emit_andimm(shift,31,HOST_TEMPREG);
3742 if(opcode2[i]==4) // SLLV
3743 {
3744 emit_shl(s,HOST_TEMPREG,t);
3745 }
3746 if(opcode2[i]==6) // SRLV
3747 {
3748 emit_shr(s,HOST_TEMPREG,t);
3749 }
3750 if(opcode2[i]==7) // SRAV
3751 {
3752 emit_sar(s,HOST_TEMPREG,t);
3753 }
3754 }
3755 }
3756 } else { // DSLLV/DSRLV/DSRAV
3757 signed char sh,sl,th,tl,shift;
3758 th=get_reg(i_regs->regmap,rt1[i]|64);
3759 tl=get_reg(i_regs->regmap,rt1[i]);
3760 sh=get_reg(i_regs->regmap,rs1[i]|64);
3761 sl=get_reg(i_regs->regmap,rs1[i]);
3762 shift=get_reg(i_regs->regmap,rs2[i]);
3763 if(tl>=0){
3764 if(rs1[i]==0)
3765 {
3766 emit_zeroreg(tl);
3767 if(th>=0) emit_zeroreg(th);
3768 }
3769 else if(rs2[i]==0)
3770 {
3771 assert(sl>=0);
3772 if(sl!=tl) emit_mov(sl,tl);
3773 if(th>=0&&sh!=th) emit_mov(sh,th);
3774 }
3775 else
3776 {
3777 // FIXME: What if shift==tl ?
3778 assert(shift!=tl);
3779 int temp=get_reg(i_regs->regmap,-1);
3780 int real_th=th;
3781 if(th<0&&opcode2[i]!=0x14) {th=temp;} // DSLLV doesn't need a temporary register
3782 assert(sl>=0);
3783 assert(sh>=0);
3784 emit_andimm(shift,31,HOST_TEMPREG);
3785 if(opcode2[i]==0x14) // DSLLV
3786 {
3787 if(th>=0) emit_shl(sh,HOST_TEMPREG,th);
3788 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3789 emit_orrshr(sl,HOST_TEMPREG,th);
3790 emit_andimm(shift,31,HOST_TEMPREG);
3791 emit_testimm(shift,32);
3792 emit_shl(sl,HOST_TEMPREG,tl);
3793 if(th>=0) emit_cmovne_reg(tl,th);
3794 emit_cmovne_imm(0,tl);
3795 }
3796 if(opcode2[i]==0x16) // DSRLV
3797 {
3798 assert(th>=0);
3799 emit_shr(sl,HOST_TEMPREG,tl);
3800 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3801 emit_orrshl(sh,HOST_TEMPREG,tl);
3802 emit_andimm(shift,31,HOST_TEMPREG);
3803 emit_testimm(shift,32);
3804 emit_shr(sh,HOST_TEMPREG,th);
3805 emit_cmovne_reg(th,tl);
3806 if(real_th>=0) emit_cmovne_imm(0,th);
3807 }
3808 if(opcode2[i]==0x17) // DSRAV
3809 {
3810 assert(th>=0);
3811 emit_shr(sl,HOST_TEMPREG,tl);
3812 emit_rsbimm(HOST_TEMPREG,32,HOST_TEMPREG);
3813 if(real_th>=0) {
3814 assert(temp>=0);
3815 emit_sarimm(th,31,temp);
3816 }
3817 emit_orrshl(sh,HOST_TEMPREG,tl);
3818 emit_andimm(shift,31,HOST_TEMPREG);
3819 emit_testimm(shift,32);
3820 emit_sar(sh,HOST_TEMPREG,th);
3821 emit_cmovne_reg(th,tl);
3822 if(real_th>=0) emit_cmovne_reg(temp,th);
3823 }
3824 }
3825 }
3826 }
3827 }
3828}
ffb0b9e0 3829
3830#ifdef PCSX
3831static void speculate_mov(int rs,int rt)
3832{
3833 if(rt!=0) {
3834 smrv_strong_next|=1<<rt;
3835 smrv[rt]=smrv[rs];
3836 }
3837}
3838
3839static void speculate_mov_weak(int rs,int rt)
3840{
3841 if(rt!=0) {
3842 smrv_weak_next|=1<<rt;
3843 smrv[rt]=smrv[rs];
3844 }
3845}
3846
3847static void speculate_register_values(int i)
3848{
3849 if(i==0) {
3850 memcpy(smrv,psxRegs.GPR.r,sizeof(smrv));
3851 // gp,sp are likely to stay the same throughout the block
3852 smrv_strong_next=(1<<28)|(1<<29)|(1<<30);
3853 smrv_weak_next=~smrv_strong_next;
3854 //printf(" llr %08x\n", smrv[4]);
3855 }
3856 smrv_strong=smrv_strong_next;
3857 smrv_weak=smrv_weak_next;
3858 switch(itype[i]) {
3859 case ALU:
3860 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3861 else if((smrv_strong>>rs2[i])&1) speculate_mov(rs2[i],rt1[i]);
3862 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3863 else if((smrv_weak>>rs2[i])&1) speculate_mov_weak(rs2[i],rt1[i]);
3864 else {
3865 smrv_strong_next&=~(1<<rt1[i]);
3866 smrv_weak_next&=~(1<<rt1[i]);
3867 }
3868 break;
3869 case SHIFTIMM:
3870 smrv_strong_next&=~(1<<rt1[i]);
3871 smrv_weak_next&=~(1<<rt1[i]);
3872 // fallthrough
3873 case IMM16:
3874 if(rt1[i]&&is_const(&regs[i],rt1[i])) {
3875 int value,hr=get_reg(regs[i].regmap,rt1[i]);
3876 if(hr>=0) {
3877 if(get_final_value(hr,i,&value))
3878 smrv[rt1[i]]=value;
3879 else smrv[rt1[i]]=constmap[i][hr];
3880 smrv_strong_next|=1<<rt1[i];
3881 }
3882 }
3883 else {
3884 if ((smrv_strong>>rs1[i])&1) speculate_mov(rs1[i],rt1[i]);
3885 else if((smrv_weak>>rs1[i])&1) speculate_mov_weak(rs1[i],rt1[i]);
3886 }
3887 break;
3888 case LOAD:
3889 if(start<0x2000&&(rt1[i]==26||(smrv[rt1[i]]>>24)==0xa0)) {
3890 // special case for BIOS
3891 smrv[rt1[i]]=0xa0000000;
3892 smrv_strong_next|=1<<rt1[i];
3893 break;
3894 }
3895 // fallthrough
3896 case SHIFT:
3897 case LOADLR:
3898 case MOV:
3899 smrv_strong_next&=~(1<<rt1[i]);
3900 smrv_weak_next&=~(1<<rt1[i]);
3901 break;
3902 case COP0:
3903 case COP2:
3904 if(opcode2[i]==0||opcode2[i]==2) { // MFC/CFC
3905 smrv_strong_next&=~(1<<rt1[i]);
3906 smrv_weak_next&=~(1<<rt1[i]);
3907 }
3908 break;
3909 case C2LS:
3910 if (opcode[i]==0x32) { // LWC2
3911 smrv_strong_next&=~(1<<rt1[i]);
3912 smrv_weak_next&=~(1<<rt1[i]);
3913 }
3914 break;
3915 }
3916#if 0
3917 int r=4;
3918 printf("x %08x %08x %d %d c %08x %08x\n",smrv[r],start+i*4,
3919 ((smrv_strong>>r)&1),(smrv_weak>>r)&1,regs[i].isconst,regs[i].wasconst);
3920#endif
3921}
3922
3923enum {
3924 MTYPE_8000 = 0,
3925 MTYPE_8020,
3926 MTYPE_0000,
3927 MTYPE_A000,
3928 MTYPE_1F80,
3929};
3930
3931static int get_ptr_mem_type(u_int a)
3932{
3933 if(a < 0x00200000) {
3934 if(a<0x1000&&((start>>20)==0xbfc||(start>>24)==0xa0))
3935 // return wrong, must use memhandler for BIOS self-test to pass
3936 // 007 does similar stuff from a00 mirror, weird stuff
3937 return MTYPE_8000;
3938 return MTYPE_0000;
3939 }
3940 if(0x1f800000 <= a && a < 0x1f801000)
3941 return MTYPE_1F80;
3942 if(0x80200000 <= a && a < 0x80800000)
3943 return MTYPE_8020;
3944 if(0xa0000000 <= a && a < 0xa0200000)
3945 return MTYPE_A000;
3946 return MTYPE_8000;
3947}
3948#endif
3949
3950static int emit_fastpath_cmp_jump(int i,int addr,int *addr_reg_override)
3951{
3952 int jaddr,type=0;
3953
3954#ifdef PCSX
3955 int mr=rs1[i];
3956 if(((smrv_strong|smrv_weak)>>mr)&1) {
3957 type=get_ptr_mem_type(smrv[mr]);
3958 //printf("set %08x @%08x r%d %d\n", smrv[mr], start+i*4, mr, type);
3959 }
3960 else {
3961 // use the mirror we are running on
3962 type=get_ptr_mem_type(start);
3963 //printf("set nospec @%08x r%d %d\n", start+i*4, mr, type);
3964 }
3965
3966 if(type==MTYPE_8020) { // RAM 80200000+ mirror
3967 emit_andimm(addr,~0x00e00000,HOST_TEMPREG);
3968 addr=*addr_reg_override=HOST_TEMPREG;
3969 type=0;
3970 }
3971 else if(type==MTYPE_0000) { // RAM 0 mirror
3972 emit_orimm(addr,0x80000000,HOST_TEMPREG);
3973 addr=*addr_reg_override=HOST_TEMPREG;
3974 type=0;
3975 }
3976 else if(type==MTYPE_A000) { // RAM A mirror
3977 emit_andimm(addr,~0x20000000,HOST_TEMPREG);
3978 addr=*addr_reg_override=HOST_TEMPREG;
3979 type=0;
3980 }
3981 else if(type==MTYPE_1F80) { // scratchpad
3982 emit_addimm(addr,-0x1f800000,HOST_TEMPREG);
3983 emit_cmpimm(HOST_TEMPREG,0x1000);
3984 jaddr=(int)out;
3985 emit_jc(0);
3986 }
3987#endif
3988
3989 if(type==0)
3990 {
3991 emit_cmpimm(addr,RAM_SIZE);
3992 jaddr=(int)out;
3993 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
3994 // Hint to branch predictor that the branch is unlikely to be taken
3995 if(rs1[i]>=28)
3996 emit_jno_unlikely(0);
3997 else
3998 #endif
3999 emit_jno(0);
4000 }
4001
4002 return jaddr;
4003}
4004
57871462 4005#define shift_assemble shift_assemble_arm
4006
4007void loadlr_assemble_arm(int i,struct regstat *i_regs)
4008{
4009 int s,th,tl,temp,temp2,addr,map=-1;
4010 int offset;
4011 int jaddr=0;
af4ee1fe 4012 int memtarget=0,c=0;
ffb0b9e0 4013 int fastload_reg_override=0;
57871462 4014 u_int hr,reglist=0;
4015 th=get_reg(i_regs->regmap,rt1[i]|64);
4016 tl=get_reg(i_regs->regmap,rt1[i]);
4017 s=get_reg(i_regs->regmap,rs1[i]);
4018 temp=get_reg(i_regs->regmap,-1);
4019 temp2=get_reg(i_regs->regmap,FTEMP);
4020 addr=get_reg(i_regs->regmap,AGEN1+(i&1));
4021 assert(addr<0);
4022 offset=imm[i];
4023 for(hr=0;hr<HOST_REGS;hr++) {
4024 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4025 }
4026 reglist|=1<<temp;
4027 if(offset||s<0||c) addr=temp2;
4028 else addr=s;
4029 if(s>=0) {
4030 c=(i_regs->wasconst>>s)&1;
af4ee1fe 4031 if(c) {
4032 memtarget=((signed int)(constmap[i][s]+offset))<(signed int)0x80000000+RAM_SIZE;
4033 if(using_tlb&&((signed int)(constmap[i][s]+offset))>=(signed int)0xC0000000) memtarget=1;
4034 }
57871462 4035 }
535d208a 4036 if(!using_tlb) {
4037 if(!c) {
4038 #ifdef RAM_OFFSET
4039 map=get_reg(i_regs->regmap,ROREG);
4040 if(map<0) emit_loadreg(ROREG,map=HOST_TEMPREG);
4041 #endif
4042 emit_shlimm(addr,3,temp);
4043 if (opcode[i]==0x22||opcode[i]==0x26) {
4044 emit_andimm(addr,0xFFFFFFFC,temp2); // LWL/LWR
57871462 4045 }else{
535d208a 4046 emit_andimm(addr,0xFFFFFFF8,temp2); // LDL/LDR
57871462 4047 }
ffb0b9e0 4048 jaddr=emit_fastpath_cmp_jump(i,temp2,&fastload_reg_override);
535d208a 4049 }
4050 else {
4051 if (opcode[i]==0x22||opcode[i]==0x26) {
4052 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4053 }else{
4054 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4055 }
57871462 4056 }
535d208a 4057 }else{ // using tlb
4058 int a;
4059 if(c) {
4060 a=-1;
4061 }else if (opcode[i]==0x22||opcode[i]==0x26) {
4062 a=0xFFFFFFFC; // LWL/LWR
4063 }else{
4064 a=0xFFFFFFF8; // LDL/LDR
4065 }
4066 map=get_reg(i_regs->regmap,TLREG);
4067 assert(map>=0);
ea3d2e6e 4068 reglist&=~(1<<map);
535d208a 4069 map=do_tlb_r(addr,temp2,map,0,a,c?-1:temp,c,constmap[i][s]+offset);
4070 if(c) {
4071 if (opcode[i]==0x22||opcode[i]==0x26) {
4072 emit_movimm(((constmap[i][s]+offset)<<3)&24,temp); // LWL/LWR
4073 }else{
4074 emit_movimm(((constmap[i][s]+offset)<<3)&56,temp); // LDL/LDR
57871462 4075 }
535d208a 4076 }
4077 do_tlb_r_branch(map,c,constmap[i][s]+offset,&jaddr);
4078 }
4079 if (opcode[i]==0x22||opcode[i]==0x26) { // LWL/LWR
4080 if(!c||memtarget) {
ffb0b9e0 4081 int a=temp2;
4082 if(fastload_reg_override) a=fastload_reg_override;
535d208a 4083 //emit_readword_indexed((int)rdram-0x80000000,temp2,temp2);
ffb0b9e0 4084 emit_readword_indexed_tlb(0,a,map,temp2);
535d208a 4085 if(jaddr) add_stub(LOADW_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4086 }
4087 else
4088 inline_readstub(LOADW_STUB,i,(constmap[i][s]+offset)&0xFFFFFFFC,i_regs->regmap,FTEMP,ccadj[i],reglist);
4089 if(rt1[i]) {
4090 assert(tl>=0);
57871462 4091 emit_andimm(temp,24,temp);
2002a1db 4092#ifdef BIG_ENDIAN_MIPS
4093 if (opcode[i]==0x26) // LWR
4094#else
4095 if (opcode[i]==0x22) // LWL
4096#endif
4097 emit_xorimm(temp,24,temp);
57871462 4098 emit_movimm(-1,HOST_TEMPREG);
4099 if (opcode[i]==0x26) {
4100 emit_shr(temp2,temp,temp2);
4101 emit_bic_lsr(tl,HOST_TEMPREG,temp,tl);
4102 }else{
4103 emit_shl(temp2,temp,temp2);
4104 emit_bic_lsl(tl,HOST_TEMPREG,temp,tl);
4105 }
4106 emit_or(temp2,tl,tl);
57871462 4107 }
535d208a 4108 //emit_storereg(rt1[i],tl); // DEBUG
4109 }
4110 if (opcode[i]==0x1A||opcode[i]==0x1B) { // LDL/LDR
ffb0b9e0 4111 // FIXME: little endian, fastload_reg_override
535d208a 4112 int temp2h=get_reg(i_regs->regmap,FTEMP|64);
4113 if(!c||memtarget) {
4114 //if(th>=0) emit_readword_indexed((int)rdram-0x80000000,temp2,temp2h);
4115 //emit_readword_indexed((int)rdram-0x7FFFFFFC,temp2,temp2);
4116 emit_readdword_indexed_tlb(0,temp2,map,temp2h,temp2);
4117 if(jaddr) add_stub(LOADD_STUB,jaddr,(int)out,i,temp2,(int)i_regs,ccadj[i],reglist);
4118 }
4119 else
4120 inline_readstub(LOADD_STUB,i,(constmap[i][s]+offset)&0xFFFFFFF8,i_regs->regmap,FTEMP,ccadj[i],reglist);
4121 if(rt1[i]) {
4122 assert(th>=0);
4123 assert(tl>=0);
57871462 4124 emit_testimm(temp,32);
4125 emit_andimm(temp,24,temp);
4126 if (opcode[i]==0x1A) { // LDL
4127 emit_rsbimm(temp,32,HOST_TEMPREG);
4128 emit_shl(temp2h,temp,temp2h);
4129 emit_orrshr(temp2,HOST_TEMPREG,temp2h);
4130 emit_movimm(-1,HOST_TEMPREG);
4131 emit_shl(temp2,temp,temp2);
4132 emit_cmove_reg(temp2h,th);
4133 emit_biceq_lsl(tl,HOST_TEMPREG,temp,tl);
4134 emit_bicne_lsl(th,HOST_TEMPREG,temp,th);
4135 emit_orreq(temp2,tl,tl);
4136 emit_orrne(temp2,th,th);
4137 }
4138 if (opcode[i]==0x1B) { // LDR
4139 emit_xorimm(temp,24,temp);
4140 emit_rsbimm(temp,32,HOST_TEMPREG);
4141 emit_shr(temp2,temp,temp2);
4142 emit_orrshl(temp2h,HOST_TEMPREG,temp2);
4143 emit_movimm(-1,HOST_TEMPREG);
4144 emit_shr(temp2h,temp,temp2h);
4145 emit_cmovne_reg(temp2,tl);
4146 emit_bicne_lsr(th,HOST_TEMPREG,temp,th);
4147 emit_biceq_lsr(tl,HOST_TEMPREG,temp,tl);
4148 emit_orrne(temp2h,th,th);
4149 emit_orreq(temp2h,tl,tl);
4150 }
4151 }
4152 }
4153}
4154#define loadlr_assemble loadlr_assemble_arm
4155
4156void cop0_assemble(int i,struct regstat *i_regs)
4157{
4158 if(opcode2[i]==0) // MFC0
4159 {
4160 signed char t=get_reg(i_regs->regmap,rt1[i]);
4161 char copr=(source[i]>>11)&0x1f;
4162 //assert(t>=0); // Why does this happen? OOT is weird
f1b3b369 4163 if(t>=0&&rt1[i]!=0) {
7139f3c8 4164#ifdef MUPEN64
57871462 4165 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4166 emit_movimm((source[i]>>11)&0x1f,1);
4167 emit_writeword(0,(int)&PC);
4168 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
4169 if(copr==9) {
4170 emit_readword((int)&last_count,ECX);
4171 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
4172 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4173 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4174 emit_writeword(HOST_CCREG,(int)&Count);
4175 }
4176 emit_call((int)MFC0);
4177 emit_readword((int)&readmem_dword,t);
7139f3c8 4178#else
4179 emit_readword((int)&reg_cop0+copr*4,t);
4180#endif
57871462 4181 }
4182 }
4183 else if(opcode2[i]==4) // MTC0
4184 {
4185 signed char s=get_reg(i_regs->regmap,rs1[i]);
4186 char copr=(source[i]>>11)&0x1f;
4187 assert(s>=0);
63cb0298 4188#ifdef MUPEN64
57871462 4189 emit_writeword(s,(int)&readmem_dword);
4190 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
4191 emit_addimm(FP,(int)&fake_pc-(int)&dynarec_local,0);
4192 emit_movimm((source[i]>>11)&0x1f,1);
4193 emit_writeword(0,(int)&PC);
4194 emit_writebyte(1,(int)&(fake_pc.f.r.nrd));
63cb0298 4195#else
4196 wb_register(rs1[i],i_regs->regmap,i_regs->dirty,i_regs->is32);
7139f3c8 4197#endif
4198 if(copr==9||copr==11||copr==12||copr==13) {
63cb0298 4199 emit_readword((int)&last_count,HOST_TEMPREG);
57871462 4200 emit_loadreg(CCREG,HOST_CCREG); // TODO: do proper reg alloc
63cb0298 4201 emit_add(HOST_CCREG,HOST_TEMPREG,HOST_CCREG);
2573466a 4202 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4203 emit_writeword(HOST_CCREG,(int)&Count);
4204 }
4205 // What a mess. The status register (12) can enable interrupts,
4206 // so needs a special case to handle a pending interrupt.
4207 // The interrupt must be taken immediately, because a subsequent
4208 // instruction might disable interrupts again.
7139f3c8 4209 if(copr==12||copr==13) {
fca1aef2 4210#ifdef PCSX
4211 if (is_delayslot) {
4212 // burn cycles to cause cc_interrupt, which will
4213 // reschedule next_interupt. Relies on CCREG from above.
4214 assem_debug("MTC0 DS %d\n", copr);
4215 emit_writeword(HOST_CCREG,(int)&last_count);
4216 emit_movimm(0,HOST_CCREG);
4217 emit_storereg(CCREG,HOST_CCREG);
caeefe31 4218 emit_loadreg(rs1[i],1);
fca1aef2 4219 emit_movimm(copr,0);
4220 emit_call((int)pcsx_mtc0_ds);
4221 return;
4222 }
4223#endif
63cb0298 4224 emit_movimm(start+i*4+4,HOST_TEMPREG);
4225 emit_writeword(HOST_TEMPREG,(int)&pcaddr);
4226 emit_movimm(0,HOST_TEMPREG);
4227 emit_writeword(HOST_TEMPREG,(int)&pending_exception);
57871462 4228 }
4229 //else if(copr==12&&is_delayslot) emit_call((int)MTC0_R12);
4230 //else
fca1aef2 4231#ifdef PCSX
caeefe31 4232 if(s==HOST_CCREG)
4233 emit_loadreg(rs1[i],1);
4234 else if(s!=1)
63cb0298 4235 emit_mov(s,1);
fca1aef2 4236 emit_movimm(copr,0);
4237 emit_call((int)pcsx_mtc0);
4238#else
57871462 4239 emit_call((int)MTC0);
fca1aef2 4240#endif
7139f3c8 4241 if(copr==9||copr==11||copr==12||copr==13) {
57871462 4242 emit_readword((int)&Count,HOST_CCREG);
4243 emit_readword((int)&next_interupt,ECX);
2573466a 4244 emit_addimm(HOST_CCREG,-CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4245 emit_sub(HOST_CCREG,ECX,HOST_CCREG);
4246 emit_writeword(ECX,(int)&last_count);
4247 emit_storereg(CCREG,HOST_CCREG);
4248 }
7139f3c8 4249 if(copr==12||copr==13) {
57871462 4250 assert(!is_delayslot);
4251 emit_readword((int)&pending_exception,14);
4252 }
4253 emit_loadreg(rs1[i],s);
4254 if(get_reg(i_regs->regmap,rs1[i]|64)>=0)
4255 emit_loadreg(rs1[i]|64,get_reg(i_regs->regmap,rs1[i]|64));
7139f3c8 4256 if(copr==12||copr==13) {
57871462 4257 emit_test(14,14);
4258 emit_jne((int)&do_interrupt);
4259 }
4260 cop1_usable=0;
4261 }
4262 else
4263 {
4264 assert(opcode2[i]==0x10);
3d624f89 4265#ifndef DISABLE_TLB
57871462 4266 if((source[i]&0x3f)==0x01) // TLBR
4267 emit_call((int)TLBR);
4268 if((source[i]&0x3f)==0x02) // TLBWI
4269 emit_call((int)TLBWI_new);
4270 if((source[i]&0x3f)==0x06) { // TLBWR
4271 // The TLB entry written by TLBWR is dependent on the count,
4272 // so update the cycle count
4273 emit_readword((int)&last_count,ECX);
4274 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
4275 emit_add(HOST_CCREG,ECX,HOST_CCREG);
2573466a 4276 emit_addimm(HOST_CCREG,CLOCK_ADJUST(ccadj[i]),HOST_CCREG);
57871462 4277 emit_writeword(HOST_CCREG,(int)&Count);
4278 emit_call((int)TLBWR_new);
4279 }
4280 if((source[i]&0x3f)==0x08) // TLBP
4281 emit_call((int)TLBP);
3d624f89 4282#endif
576bbd8f 4283#ifdef PCSX
4284 if((source[i]&0x3f)==0x10) // RFE
4285 {
4286 emit_readword((int)&Status,0);
4287 emit_andimm(0,0x3c,1);
4288 emit_andimm(0,~0xf,0);
4289 emit_orrshr_imm(1,2,0);
4290 emit_writeword(0,(int)&Status);
4291 }
4292#else
57871462 4293 if((source[i]&0x3f)==0x18) // ERET
4294 {
4295 int count=ccadj[i];
4296 if(i_regs->regmap[HOST_CCREG]!=CCREG) emit_loadreg(CCREG,HOST_CCREG);
2573466a 4297 emit_addimm(HOST_CCREG,CLOCK_ADJUST(count),HOST_CCREG); // TODO: Should there be an extra cycle here?
57871462 4298 emit_jmp((int)jump_eret);
4299 }
576bbd8f 4300#endif
57871462 4301 }
4302}
4303
b9b61529 4304static void cop2_get_dreg(u_int copr,signed char tl,signed char temp)
4305{
4306 switch (copr) {
4307 case 1:
4308 case 3:
4309 case 5:
4310 case 8:
4311 case 9:
4312 case 10:
4313 case 11:
4314 emit_readword((int)&reg_cop2d[copr],tl);
4315 emit_signextend16(tl,tl);
4316 emit_writeword(tl,(int)&reg_cop2d[copr]); // hmh
4317 break;
4318 case 7:
4319 case 16:
4320 case 17:
4321 case 18:
4322 case 19:
4323 emit_readword((int)&reg_cop2d[copr],tl);
4324 emit_andimm(tl,0xffff,tl);
4325 emit_writeword(tl,(int)&reg_cop2d[copr]);
4326 break;
4327 case 15:
4328 emit_readword((int)&reg_cop2d[14],tl); // SXY2
4329 emit_writeword(tl,(int)&reg_cop2d[copr]);
4330 break;
4331 case 28:
b9b61529 4332 case 29:
4333 emit_readword((int)&reg_cop2d[9],temp);
4334 emit_testimm(temp,0x8000); // do we need this?
4335 emit_andimm(temp,0xf80,temp);
4336 emit_andne_imm(temp,0,temp);
f70d384d 4337 emit_shrimm(temp,7,tl);
b9b61529 4338 emit_readword((int)&reg_cop2d[10],temp);
4339 emit_testimm(temp,0x8000);
4340 emit_andimm(temp,0xf80,temp);
4341 emit_andne_imm(temp,0,temp);
f70d384d 4342 emit_orrshr_imm(temp,2,tl);
b9b61529 4343 emit_readword((int)&reg_cop2d[11],temp);
4344 emit_testimm(temp,0x8000);
4345 emit_andimm(temp,0xf80,temp);
4346 emit_andne_imm(temp,0,temp);
f70d384d 4347 emit_orrshl_imm(temp,3,tl);
b9b61529 4348 emit_writeword(tl,(int)&reg_cop2d[copr]);
4349 break;
4350 default:
4351 emit_readword((int)&reg_cop2d[copr],tl);
4352 break;
4353 }
4354}
4355
4356static void cop2_put_dreg(u_int copr,signed char sl,signed char temp)
4357{
4358 switch (copr) {
4359 case 15:
4360 emit_readword((int)&reg_cop2d[13],temp); // SXY1
4361 emit_writeword(sl,(int)&reg_cop2d[copr]);
4362 emit_writeword(temp,(int)&reg_cop2d[12]); // SXY0
4363 emit_readword((int)&reg_cop2d[14],temp); // SXY2
4364 emit_writeword(sl,(int)&reg_cop2d[14]);
4365 emit_writeword(temp,(int)&reg_cop2d[13]); // SXY1
4366 break;
4367 case 28:
4368 emit_andimm(sl,0x001f,temp);
f70d384d 4369 emit_shlimm(temp,7,temp);
b9b61529 4370 emit_writeword(temp,(int)&reg_cop2d[9]);
4371 emit_andimm(sl,0x03e0,temp);
f70d384d 4372 emit_shlimm(temp,2,temp);
b9b61529 4373 emit_writeword(temp,(int)&reg_cop2d[10]);
4374 emit_andimm(sl,0x7c00,temp);
f70d384d 4375 emit_shrimm(temp,3,temp);
b9b61529 4376 emit_writeword(temp,(int)&reg_cop2d[11]);
4377 emit_writeword(sl,(int)&reg_cop2d[28]);
4378 break;
4379 case 30:
4380 emit_movs(sl,temp);
4381 emit_mvnmi(temp,temp);
4382 emit_clz(temp,temp);
4383 emit_writeword(sl,(int)&reg_cop2d[30]);
4384 emit_writeword(temp,(int)&reg_cop2d[31]);
4385 break;
b9b61529 4386 case 31:
4387 break;
4388 default:
4389 emit_writeword(sl,(int)&reg_cop2d[copr]);
4390 break;
4391 }
4392}
4393
4394void cop2_assemble(int i,struct regstat *i_regs)
4395{
4396 u_int copr=(source[i]>>11)&0x1f;
4397 signed char temp=get_reg(i_regs->regmap,-1);
4398 if (opcode2[i]==0) { // MFC2
4399 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4400 if(tl>=0&&rt1[i]!=0)
b9b61529 4401 cop2_get_dreg(copr,tl,temp);
4402 }
4403 else if (opcode2[i]==4) { // MTC2
4404 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4405 cop2_put_dreg(copr,sl,temp);
4406 }
4407 else if (opcode2[i]==2) // CFC2
4408 {
4409 signed char tl=get_reg(i_regs->regmap,rt1[i]);
f1b3b369 4410 if(tl>=0&&rt1[i]!=0)
b9b61529 4411 emit_readword((int)&reg_cop2c[copr],tl);
4412 }
4413 else if (opcode2[i]==6) // CTC2
4414 {
4415 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4416 switch(copr) {
4417 case 4:
4418 case 12:
4419 case 20:
4420 case 26:
4421 case 27:
4422 case 29:
4423 case 30:
4424 emit_signextend16(sl,temp);
4425 break;
4426 case 31:
4427 //value = value & 0x7ffff000;
4428 //if (value & 0x7f87e000) value |= 0x80000000;
4429 emit_shrimm(sl,12,temp);
4430 emit_shlimm(temp,12,temp);
4431 emit_testimm(temp,0x7f000000);
4432 emit_testeqimm(temp,0x00870000);
4433 emit_testeqimm(temp,0x0000e000);
4434 emit_orrne_imm(temp,0x80000000,temp);
4435 break;
4436 default:
4437 temp=sl;
4438 break;
4439 }
4440 emit_writeword(temp,(int)&reg_cop2c[copr]);
4441 assert(sl>=0);
4442 }
4443}
4444
054175e9 4445static void c2op_prologue(u_int op,u_int reglist)
4446{
4447 save_regs_all(reglist);
82ed88eb 4448#ifdef PCNT
4449 emit_movimm(op,0);
4450 emit_call((int)pcnt_gte_start);
4451#endif
054175e9 4452 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0); // cop2 regs
4453}
4454
4455static void c2op_epilogue(u_int op,u_int reglist)
4456{
82ed88eb 4457#ifdef PCNT
4458 emit_movimm(op,0);
4459 emit_call((int)pcnt_gte_end);
4460#endif
054175e9 4461 restore_regs_all(reglist);
4462}
4463
6c0eefaf 4464static void c2op_call_MACtoIR(int lm,int need_flags)
4465{
4466 if(need_flags)
4467 emit_call((int)(lm?gteMACtoIR_lm1:gteMACtoIR_lm0));
4468 else
4469 emit_call((int)(lm?gteMACtoIR_lm1_nf:gteMACtoIR_lm0_nf));
4470}
4471
4472static void c2op_call_rgb_func(void *func,int lm,int need_ir,int need_flags)
4473{
4474 emit_call((int)func);
4475 // func is C code and trashes r0
4476 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4477 if(need_flags||need_ir)
4478 c2op_call_MACtoIR(lm,need_flags);
4479 emit_call((int)(need_flags?gteMACtoRGB:gteMACtoRGB_nf));
4480}
4481
054175e9 4482static void c2op_assemble(int i,struct regstat *i_regs)
b9b61529 4483{
4484 signed char temp=get_reg(i_regs->regmap,-1);
4485 u_int c2op=source[i]&0x3f;
6c0eefaf 4486 u_int hr,reglist_full=0,reglist;
054175e9 4487 int need_flags,need_ir;
b9b61529 4488 for(hr=0;hr<HOST_REGS;hr++) {
6c0eefaf 4489 if(i_regs->regmap[hr]>=0) reglist_full|=1<<hr;
b9b61529 4490 }
6c0eefaf 4491 reglist=reglist_full&0x100f;
b9b61529 4492
4493 if (gte_handlers[c2op]!=NULL) {
bedfea38 4494 need_flags=!(gte_unneeded[i+1]>>63); // +1 because of how liveness detection works
054175e9 4495 need_ir=(gte_unneeded[i+1]&0xe00)!=0xe00;
4496 assem_debug("gte unneeded %016llx, need_flags %d, need_ir %d\n",
4497 gte_unneeded[i+1],need_flags,need_ir);
0ff8c62c 4498 if(new_dynarec_hacks&NDHACK_GTE_NO_FLAGS)
4499 need_flags=0;
6c0eefaf 4500 int shift = (source[i] >> 19) & 1;
4501 int lm = (source[i] >> 10) & 1;
054175e9 4502 switch(c2op) {
19776aef 4503#ifndef DRC_DBG
054175e9 4504 case GTE_MVMVA: {
054175e9 4505 int v = (source[i] >> 15) & 3;
4506 int cv = (source[i] >> 13) & 3;
4507 int mx = (source[i] >> 17) & 3;
6c0eefaf 4508 reglist=reglist_full&0x10ff; // +{r4-r7}
054175e9 4509 c2op_prologue(c2op,reglist);
4510 /* r4,r5 = VXYZ(v) packed; r6 = &MX11(mx); r7 = &CV1(cv) */
4511 if(v<3)
4512 emit_ldrd(v*8,0,4);
4513 else {
4514 emit_movzwl_indexed(9*4,0,4); // gteIR
4515 emit_movzwl_indexed(10*4,0,6);
4516 emit_movzwl_indexed(11*4,0,5);
4517 emit_orrshl_imm(6,16,4);
4518 }
4519 if(mx<3)
4520 emit_addimm(0,32*4+mx*8*4,6);
4521 else
4522 emit_readword((int)&zeromem_ptr,6);
4523 if(cv<3)
4524 emit_addimm(0,32*4+(cv*8+5)*4,7);
4525 else
4526 emit_readword((int)&zeromem_ptr,7);
4527#ifdef __ARM_NEON__
4528 emit_movimm(source[i],1); // opcode
4529 emit_call((int)gteMVMVA_part_neon);
4530 if(need_flags) {
4531 emit_movimm(lm,1);
4532 emit_call((int)gteMACtoIR_flags_neon);
4533 }
4534#else
4535 if(cv==3&&shift)
4536 emit_call((int)gteMVMVA_part_cv3sh12_arm);
4537 else {
4538 emit_movimm(shift,1);
4539 emit_call((int)(need_flags?gteMVMVA_part_arm:gteMVMVA_part_nf_arm));
4540 }
6c0eefaf 4541 if(need_flags||need_ir)
4542 c2op_call_MACtoIR(lm,need_flags);
054175e9 4543#endif
4544 break;
4545 }
6c0eefaf 4546 case GTE_OP:
4547 c2op_prologue(c2op,reglist);
4548 emit_call((int)(shift?gteOP_part_shift:gteOP_part_noshift));
4549 if(need_flags||need_ir) {
4550 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4551 c2op_call_MACtoIR(lm,need_flags);
4552 }
4553 break;
4554 case GTE_DPCS:
4555 c2op_prologue(c2op,reglist);
4556 c2op_call_rgb_func(shift?gteDPCS_part_shift:gteDPCS_part_noshift,lm,need_ir,need_flags);
4557 break;
4558 case GTE_INTPL:
4559 c2op_prologue(c2op,reglist);
4560 c2op_call_rgb_func(shift?gteINTPL_part_shift:gteINTPL_part_noshift,lm,need_ir,need_flags);
4561 break;
4562 case GTE_SQR:
4563 c2op_prologue(c2op,reglist);
4564 emit_call((int)(shift?gteSQR_part_shift:gteSQR_part_noshift));
4565 if(need_flags||need_ir) {
4566 emit_addimm(FP,(int)&psxRegs.CP2D.r[0]-(int)&dynarec_local,0);
4567 c2op_call_MACtoIR(lm,need_flags);
4568 }
4569 break;
4570 case GTE_DCPL:
4571 c2op_prologue(c2op,reglist);
4572 c2op_call_rgb_func(gteDCPL_part,lm,need_ir,need_flags);
4573 break;
4574 case GTE_GPF:
4575 c2op_prologue(c2op,reglist);
4576 c2op_call_rgb_func(shift?gteGPF_part_shift:gteGPF_part_noshift,lm,need_ir,need_flags);
4577 break;
4578 case GTE_GPL:
4579 c2op_prologue(c2op,reglist);
4580 c2op_call_rgb_func(shift?gteGPL_part_shift:gteGPL_part_noshift,lm,need_ir,need_flags);
4581 break;
19776aef 4582#endif
054175e9 4583 default:
054175e9 4584 c2op_prologue(c2op,reglist);
19776aef 4585#ifdef DRC_DBG
4586 emit_movimm(source[i],1); // opcode
4587 emit_writeword(1,(int)&psxRegs.code);
4588#endif
054175e9 4589 emit_call((int)(need_flags?gte_handlers[c2op]:gte_handlers_nf[c2op]));
4590 break;
4591 }
4592 c2op_epilogue(c2op,reglist);
4593 }
b9b61529 4594}
4595
4596void cop1_unusable(int i,struct regstat *i_regs)
3d624f89 4597{
4598 // XXX: should just just do the exception instead
4599 if(!cop1_usable) {
4600 int jaddr=(int)out;
4601 emit_jmp(0);
4602 add_stub(FP_STUB,jaddr,(int)out,i,0,(int)i_regs,is_delayslot,0);
4603 cop1_usable=1;
4604 }
4605}
4606
57871462 4607void cop1_assemble(int i,struct regstat *i_regs)
4608{
3d624f89 4609#ifndef DISABLE_COP1
57871462 4610 // Check cop1 unusable
4611 if(!cop1_usable) {
4612 signed char rs=get_reg(i_regs->regmap,CSREG);
4613 assert(rs>=0);
4614 emit_testimm(rs,0x20000000);
4615 int jaddr=(int)out;
4616 emit_jeq(0);
4617 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4618 cop1_usable=1;
4619 }
4620 if (opcode2[i]==0) { // MFC1
4621 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4622 if(tl>=0) {
4623 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],tl);
4624 emit_readword_indexed(0,tl,tl);
4625 }
4626 }
4627 else if (opcode2[i]==1) { // DMFC1
4628 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4629 signed char th=get_reg(i_regs->regmap,rt1[i]|64);
4630 if(tl>=0) {
4631 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],tl);
4632 if(th>=0) emit_readword_indexed(4,tl,th);
4633 emit_readword_indexed(0,tl,tl);
4634 }
4635 }
4636 else if (opcode2[i]==4) { // MTC1
4637 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4638 signed char temp=get_reg(i_regs->regmap,-1);
4639 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4640 emit_writeword_indexed(sl,0,temp);
4641 }
4642 else if (opcode2[i]==5) { // DMTC1
4643 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4644 signed char sh=rs1[i]>0?get_reg(i_regs->regmap,rs1[i]|64):sl;
4645 signed char temp=get_reg(i_regs->regmap,-1);
4646 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4647 emit_writeword_indexed(sh,4,temp);
4648 emit_writeword_indexed(sl,0,temp);
4649 }
4650 else if (opcode2[i]==2) // CFC1
4651 {
4652 signed char tl=get_reg(i_regs->regmap,rt1[i]);
4653 if(tl>=0) {
4654 u_int copr=(source[i]>>11)&0x1f;
4655 if(copr==0) emit_readword((int)&FCR0,tl);
4656 if(copr==31) emit_readword((int)&FCR31,tl);
4657 }
4658 }
4659 else if (opcode2[i]==6) // CTC1
4660 {
4661 signed char sl=get_reg(i_regs->regmap,rs1[i]);
4662 u_int copr=(source[i]>>11)&0x1f;
4663 assert(sl>=0);
4664 if(copr==31)
4665 {
4666 emit_writeword(sl,(int)&FCR31);
4667 // Set the rounding mode
4668 //FIXME
4669 //char temp=get_reg(i_regs->regmap,-1);
4670 //emit_andimm(sl,3,temp);
4671 //emit_fldcw_indexed((int)&rounding_modes,temp);
4672 }
4673 }
3d624f89 4674#else
4675 cop1_unusable(i, i_regs);
4676#endif
57871462 4677}
4678
4679void fconv_assemble_arm(int i,struct regstat *i_regs)
4680{
3d624f89 4681#ifndef DISABLE_COP1
57871462 4682 signed char temp=get_reg(i_regs->regmap,-1);
4683 assert(temp>=0);
4684 // Check cop1 unusable
4685 if(!cop1_usable) {
4686 signed char rs=get_reg(i_regs->regmap,CSREG);
4687 assert(rs>=0);
4688 emit_testimm(rs,0x20000000);
4689 int jaddr=(int)out;
4690 emit_jeq(0);
4691 add_stub(FP_STUB,jaddr,(int)out,i,rs,(int)i_regs,is_delayslot,0);
4692 cop1_usable=1;
4693 }
4694
4695 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4696 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) { // trunc_w_s
4697 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4698 emit_flds(temp,15);
4699 emit_ftosizs(15,15); // float->int, truncate
4700 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4701 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4702 emit_fsts(15,temp);
4703 return;
4704 }
4705 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) { // trunc_w_d
4706 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4707 emit_vldr(temp,7);
4708 emit_ftosizd(7,13); // double->int, truncate
4709 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4710 emit_fsts(13,temp);
4711 return;
4712 }
4713
4714 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) { // cvt_s_w
4715 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4716 emit_flds(temp,13);
4717 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f))
4718 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4719 emit_fsitos(13,15);
4720 emit_fsts(15,temp);
4721 return;
4722 }
4723 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) { // cvt_d_w
4724 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4725 emit_flds(temp,13);
4726 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4727 emit_fsitod(13,7);
4728 emit_vstr(7,temp);
4729 return;
4730 }
4731
4732 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) { // cvt_d_s
4733 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4734 emit_flds(temp,13);
4735 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
4736 emit_fcvtds(13,7);
4737 emit_vstr(7,temp);
4738 return;
4739 }
4740 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) { // cvt_s_d
4741 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4742 emit_vldr(temp,7);
4743 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
4744 emit_fcvtsd(7,13);
4745 emit_fsts(13,temp);
4746 return;
4747 }
4748 #endif
4749
4750 // C emulation code
4751
4752 u_int hr,reglist=0;
4753 for(hr=0;hr<HOST_REGS;hr++) {
4754 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4755 }
4756 save_regs(reglist);
4757
4758 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x20) {
4759 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4760 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4761 emit_call((int)cvt_s_w);
4762 }
4763 if(opcode2[i]==0x14&&(source[i]&0x3f)==0x21) {
4764 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4765 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4766 emit_call((int)cvt_d_w);
4767 }
4768 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x20) {
4769 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4770 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4771 emit_call((int)cvt_s_l);
4772 }
4773 if(opcode2[i]==0x15&&(source[i]&0x3f)==0x21) {
4774 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4775 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4776 emit_call((int)cvt_d_l);
4777 }
4778
4779 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x21) {
4780 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4781 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4782 emit_call((int)cvt_d_s);
4783 }
4784 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x24) {
4785 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4786 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4787 emit_call((int)cvt_w_s);
4788 }
4789 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x25) {
4790 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4791 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4792 emit_call((int)cvt_l_s);
4793 }
4794
4795 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x20) {
4796 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4797 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4798 emit_call((int)cvt_s_d);
4799 }
4800 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x24) {
4801 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4802 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4803 emit_call((int)cvt_w_d);
4804 }
4805 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x25) {
4806 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4807 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4808 emit_call((int)cvt_l_d);
4809 }
4810
4811 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x08) {
4812 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4813 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4814 emit_call((int)round_l_s);
4815 }
4816 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x09) {
4817 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4818 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4819 emit_call((int)trunc_l_s);
4820 }
4821 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0a) {
4822 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4823 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4824 emit_call((int)ceil_l_s);
4825 }
4826 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0b) {
4827 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4828 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4829 emit_call((int)floor_l_s);
4830 }
4831 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0c) {
4832 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4833 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4834 emit_call((int)round_w_s);
4835 }
4836 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0d) {
4837 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4838 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4839 emit_call((int)trunc_w_s);
4840 }
4841 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0e) {
4842 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4843 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4844 emit_call((int)ceil_w_s);
4845 }
4846 if(opcode2[i]==0x10&&(source[i]&0x3f)==0x0f) {
4847 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4848 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4849 emit_call((int)floor_w_s);
4850 }
4851
4852 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x08) {
4853 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4854 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4855 emit_call((int)round_l_d);
4856 }
4857 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x09) {
4858 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4859 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4860 emit_call((int)trunc_l_d);
4861 }
4862 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0a) {
4863 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4864 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4865 emit_call((int)ceil_l_d);
4866 }
4867 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0b) {
4868 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4869 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
4870 emit_call((int)floor_l_d);
4871 }
4872 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0c) {
4873 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4874 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4875 emit_call((int)round_w_d);
4876 }
4877 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0d) {
4878 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4879 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4880 emit_call((int)trunc_w_d);
4881 }
4882 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0e) {
4883 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4884 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4885 emit_call((int)ceil_w_d);
4886 }
4887 if(opcode2[i]==0x11&&(source[i]&0x3f)==0x0f) {
4888 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
4889 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
4890 emit_call((int)floor_w_d);
4891 }
4892
4893 restore_regs(reglist);
3d624f89 4894#else
4895 cop1_unusable(i, i_regs);
4896#endif
57871462 4897}
4898#define fconv_assemble fconv_assemble_arm
4899
4900void fcomp_assemble(int i,struct regstat *i_regs)
4901{
3d624f89 4902#ifndef DISABLE_COP1
57871462 4903 signed char fs=get_reg(i_regs->regmap,FSREG);
4904 signed char temp=get_reg(i_regs->regmap,-1);
4905 assert(temp>=0);
4906 // Check cop1 unusable
4907 if(!cop1_usable) {
4908 signed char cs=get_reg(i_regs->regmap,CSREG);
4909 assert(cs>=0);
4910 emit_testimm(cs,0x20000000);
4911 int jaddr=(int)out;
4912 emit_jeq(0);
4913 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
4914 cop1_usable=1;
4915 }
4916
4917 if((source[i]&0x3f)==0x30) {
4918 emit_andimm(fs,~0x800000,fs);
4919 return;
4920 }
4921
4922 if((source[i]&0x3e)==0x38) {
4923 // sf/ngle - these should throw exceptions for NaNs
4924 emit_andimm(fs,~0x800000,fs);
4925 return;
4926 }
4927
4928 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
4929 if(opcode2[i]==0x10) {
4930 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
4931 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
4932 emit_orimm(fs,0x800000,fs);
4933 emit_flds(temp,14);
4934 emit_flds(HOST_TEMPREG,15);
4935 emit_fcmps(14,15);
4936 emit_fmstat();
4937 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_s
4938 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_s
4939 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_s
4940 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_s
4941 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_s
4942 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_s
4943 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_s
4944 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_s
4945 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_s
4946 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_s
4947 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_s
4948 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_s
4949 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_s
4950 return;
4951 }
4952 if(opcode2[i]==0x11) {
4953 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
4954 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
4955 emit_orimm(fs,0x800000,fs);
4956 emit_vldr(temp,6);
4957 emit_vldr(HOST_TEMPREG,7);
4958 emit_fcmpd(6,7);
4959 emit_fmstat();
4960 if((source[i]&0x3f)==0x31) emit_bicvc_imm(fs,0x800000,fs); // c_un_d
4961 if((source[i]&0x3f)==0x32) emit_bicne_imm(fs,0x800000,fs); // c_eq_d
4962 if((source[i]&0x3f)==0x33) {emit_bicne_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ueq_d
4963 if((source[i]&0x3f)==0x34) emit_biccs_imm(fs,0x800000,fs); // c_olt_d
4964 if((source[i]&0x3f)==0x35) {emit_biccs_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ult_d
4965 if((source[i]&0x3f)==0x36) emit_bichi_imm(fs,0x800000,fs); // c_ole_d
4966 if((source[i]&0x3f)==0x37) {emit_bichi_imm(fs,0x800000,fs);emit_orrvs_imm(fs,0x800000,fs);} // c_ule_d
4967 if((source[i]&0x3f)==0x3a) emit_bicne_imm(fs,0x800000,fs); // c_seq_d
4968 if((source[i]&0x3f)==0x3b) emit_bicne_imm(fs,0x800000,fs); // c_ngl_d
4969 if((source[i]&0x3f)==0x3c) emit_biccs_imm(fs,0x800000,fs); // c_lt_d
4970 if((source[i]&0x3f)==0x3d) emit_biccs_imm(fs,0x800000,fs); // c_nge_d
4971 if((source[i]&0x3f)==0x3e) emit_bichi_imm(fs,0x800000,fs); // c_le_d
4972 if((source[i]&0x3f)==0x3f) emit_bichi_imm(fs,0x800000,fs); // c_ngt_d
4973 return;
4974 }
4975 #endif
4976
4977 // C only
4978
4979 u_int hr,reglist=0;
4980 for(hr=0;hr<HOST_REGS;hr++) {
4981 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
4982 }
4983 reglist&=~(1<<fs);
4984 save_regs(reglist);
4985 if(opcode2[i]==0x10) {
4986 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
4987 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
4988 if((source[i]&0x3f)==0x30) emit_call((int)c_f_s);
4989 if((source[i]&0x3f)==0x31) emit_call((int)c_un_s);
4990 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_s);
4991 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_s);
4992 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_s);
4993 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_s);
4994 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_s);
4995 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_s);
4996 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_s);
4997 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_s);
4998 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_s);
4999 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_s);
5000 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_s);
5001 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_s);
5002 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_s);
5003 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_s);
5004 }
5005 if(opcode2[i]==0x11) {
5006 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5007 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5008 if((source[i]&0x3f)==0x30) emit_call((int)c_f_d);
5009 if((source[i]&0x3f)==0x31) emit_call((int)c_un_d);
5010 if((source[i]&0x3f)==0x32) emit_call((int)c_eq_d);
5011 if((source[i]&0x3f)==0x33) emit_call((int)c_ueq_d);
5012 if((source[i]&0x3f)==0x34) emit_call((int)c_olt_d);
5013 if((source[i]&0x3f)==0x35) emit_call((int)c_ult_d);
5014 if((source[i]&0x3f)==0x36) emit_call((int)c_ole_d);
5015 if((source[i]&0x3f)==0x37) emit_call((int)c_ule_d);
5016 if((source[i]&0x3f)==0x38) emit_call((int)c_sf_d);
5017 if((source[i]&0x3f)==0x39) emit_call((int)c_ngle_d);
5018 if((source[i]&0x3f)==0x3a) emit_call((int)c_seq_d);
5019 if((source[i]&0x3f)==0x3b) emit_call((int)c_ngl_d);
5020 if((source[i]&0x3f)==0x3c) emit_call((int)c_lt_d);
5021 if((source[i]&0x3f)==0x3d) emit_call((int)c_nge_d);
5022 if((source[i]&0x3f)==0x3e) emit_call((int)c_le_d);
5023 if((source[i]&0x3f)==0x3f) emit_call((int)c_ngt_d);
5024 }
5025 restore_regs(reglist);
5026 emit_loadreg(FSREG,fs);
3d624f89 5027#else
5028 cop1_unusable(i, i_regs);
5029#endif
57871462 5030}
5031
5032void float_assemble(int i,struct regstat *i_regs)
5033{
3d624f89 5034#ifndef DISABLE_COP1
57871462 5035 signed char temp=get_reg(i_regs->regmap,-1);
5036 assert(temp>=0);
5037 // Check cop1 unusable
5038 if(!cop1_usable) {
5039 signed char cs=get_reg(i_regs->regmap,CSREG);
5040 assert(cs>=0);
5041 emit_testimm(cs,0x20000000);
5042 int jaddr=(int)out;
5043 emit_jeq(0);
5044 add_stub(FP_STUB,jaddr,(int)out,i,cs,(int)i_regs,is_delayslot,0);
5045 cop1_usable=1;
5046 }
5047
5048 #if(defined(__VFP_FP__) && !defined(__SOFTFP__))
5049 if((source[i]&0x3f)==6) // mov
5050 {
5051 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5052 if(opcode2[i]==0x10) {
5053 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5054 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],HOST_TEMPREG);
5055 emit_readword_indexed(0,temp,temp);
5056 emit_writeword_indexed(temp,0,HOST_TEMPREG);
5057 }
5058 if(opcode2[i]==0x11) {
5059 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5060 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],HOST_TEMPREG);
5061 emit_vldr(temp,7);
5062 emit_vstr(7,HOST_TEMPREG);
5063 }
5064 }
5065 return;
5066 }
5067
5068 if((source[i]&0x3f)>3)
5069 {
5070 if(opcode2[i]==0x10) {
5071 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5072 emit_flds(temp,15);
5073 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5074 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5075 }
5076 if((source[i]&0x3f)==4) // sqrt
5077 emit_fsqrts(15,15);
5078 if((source[i]&0x3f)==5) // abs
5079 emit_fabss(15,15);
5080 if((source[i]&0x3f)==7) // neg
5081 emit_fnegs(15,15);
5082 emit_fsts(15,temp);
5083 }
5084 if(opcode2[i]==0x11) {
5085 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5086 emit_vldr(temp,7);
5087 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5088 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5089 }
5090 if((source[i]&0x3f)==4) // sqrt
5091 emit_fsqrtd(7,7);
5092 if((source[i]&0x3f)==5) // abs
5093 emit_fabsd(7,7);
5094 if((source[i]&0x3f)==7) // neg
5095 emit_fnegd(7,7);
5096 emit_vstr(7,temp);
5097 }
5098 return;
5099 }
5100 if((source[i]&0x3f)<4)
5101 {
5102 if(opcode2[i]==0x10) {
5103 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],temp);
5104 }
5105 if(opcode2[i]==0x11) {
5106 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],temp);
5107 }
5108 if(((source[i]>>11)&0x1f)!=((source[i]>>16)&0x1f)) {
5109 if(opcode2[i]==0x10) {
5110 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],HOST_TEMPREG);
5111 emit_flds(temp,15);
5112 emit_flds(HOST_TEMPREG,13);
5113 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5114 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5115 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5116 }
5117 }
5118 if((source[i]&0x3f)==0) emit_fadds(15,13,15);
5119 if((source[i]&0x3f)==1) emit_fsubs(15,13,15);
5120 if((source[i]&0x3f)==2) emit_fmuls(15,13,15);
5121 if((source[i]&0x3f)==3) emit_fdivs(15,13,15);
5122 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5123 emit_fsts(15,HOST_TEMPREG);
5124 }else{
5125 emit_fsts(15,temp);
5126 }
5127 }
5128 else if(opcode2[i]==0x11) {
5129 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],HOST_TEMPREG);
5130 emit_vldr(temp,7);
5131 emit_vldr(HOST_TEMPREG,6);
5132 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5133 if(((source[i]>>16)&0x1f)!=((source[i]>>6)&0x1f)) {
5134 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5135 }
5136 }
5137 if((source[i]&0x3f)==0) emit_faddd(7,6,7);
5138 if((source[i]&0x3f)==1) emit_fsubd(7,6,7);
5139 if((source[i]&0x3f)==2) emit_fmuld(7,6,7);
5140 if((source[i]&0x3f)==3) emit_fdivd(7,6,7);
5141 if(((source[i]>>16)&0x1f)==((source[i]>>6)&0x1f)) {
5142 emit_vstr(7,HOST_TEMPREG);
5143 }else{
5144 emit_vstr(7,temp);
5145 }
5146 }
5147 }
5148 else {
5149 if(opcode2[i]==0x10) {
5150 emit_flds(temp,15);
5151 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5152 emit_readword((int)&reg_cop1_simple[(source[i]>>6)&0x1f],temp);
5153 }
5154 if((source[i]&0x3f)==0) emit_fadds(15,15,15);
5155 if((source[i]&0x3f)==1) emit_fsubs(15,15,15);
5156 if((source[i]&0x3f)==2) emit_fmuls(15,15,15);
5157 if((source[i]&0x3f)==3) emit_fdivs(15,15,15);
5158 emit_fsts(15,temp);
5159 }
5160 else if(opcode2[i]==0x11) {
5161 emit_vldr(temp,7);
5162 if(((source[i]>>11)&0x1f)!=((source[i]>>6)&0x1f)) {
5163 emit_readword((int)&reg_cop1_double[(source[i]>>6)&0x1f],temp);
5164 }
5165 if((source[i]&0x3f)==0) emit_faddd(7,7,7);
5166 if((source[i]&0x3f)==1) emit_fsubd(7,7,7);
5167 if((source[i]&0x3f)==2) emit_fmuld(7,7,7);
5168 if((source[i]&0x3f)==3) emit_fdivd(7,7,7);
5169 emit_vstr(7,temp);
5170 }
5171 }
5172 return;
5173 }
5174 #endif
5175
5176 u_int hr,reglist=0;
5177 for(hr=0;hr<HOST_REGS;hr++) {
5178 if(i_regs->regmap[hr]>=0) reglist|=1<<hr;
5179 }
5180 if(opcode2[i]==0x10) { // Single precision
5181 save_regs(reglist);
5182 emit_readword((int)&reg_cop1_simple[(source[i]>>11)&0x1f],ARG1_REG);
5183 if((source[i]&0x3f)<4) {
5184 emit_readword((int)&reg_cop1_simple[(source[i]>>16)&0x1f],ARG2_REG);
5185 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG3_REG);
5186 }else{
5187 emit_readword((int)&reg_cop1_simple[(source[i]>> 6)&0x1f],ARG2_REG);
5188 }
5189 switch(source[i]&0x3f)
5190 {
5191 case 0x00: emit_call((int)add_s);break;
5192 case 0x01: emit_call((int)sub_s);break;
5193 case 0x02: emit_call((int)mul_s);break;
5194 case 0x03: emit_call((int)div_s);break;
5195 case 0x04: emit_call((int)sqrt_s);break;
5196 case 0x05: emit_call((int)abs_s);break;
5197 case 0x06: emit_call((int)mov_s);break;
5198 case 0x07: emit_call((int)neg_s);break;
5199 }
5200 restore_regs(reglist);
5201 }
5202 if(opcode2[i]==0x11) { // Double precision
5203 save_regs(reglist);
5204 emit_readword((int)&reg_cop1_double[(source[i]>>11)&0x1f],ARG1_REG);
5205 if((source[i]&0x3f)<4) {
5206 emit_readword((int)&reg_cop1_double[(source[i]>>16)&0x1f],ARG2_REG);
5207 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG3_REG);
5208 }else{
5209 emit_readword((int)&reg_cop1_double[(source[i]>> 6)&0x1f],ARG2_REG);
5210 }
5211 switch(source[i]&0x3f)
5212 {
5213 case 0x00: emit_call((int)add_d);break;
5214 case 0x01: emit_call((int)sub_d);break;
5215 case 0x02: emit_call((int)mul_d);break;
5216 case 0x03: emit_call((int)div_d);break;
5217 case 0x04: emit_call((int)sqrt_d);break;
5218 case 0x05: emit_call((int)abs_d);break;
5219 case 0x06: emit_call((int)mov_d);break;
5220 case 0x07: emit_call((int)neg_d);break;
5221 }
5222 restore_regs(reglist);
5223 }
3d624f89 5224#else
5225 cop1_unusable(i, i_regs);
5226#endif
57871462 5227}
5228
5229void multdiv_assemble_arm(int i,struct regstat *i_regs)
5230{
5231 // case 0x18: MULT
5232 // case 0x19: MULTU
5233 // case 0x1A: DIV
5234 // case 0x1B: DIVU
5235 // case 0x1C: DMULT
5236 // case 0x1D: DMULTU
5237 // case 0x1E: DDIV
5238 // case 0x1F: DDIVU
5239 if(rs1[i]&&rs2[i])
5240 {
5241 if((opcode2[i]&4)==0) // 32-bit
5242 {
5243 if(opcode2[i]==0x18) // MULT
5244 {
5245 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5246 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5247 signed char hi=get_reg(i_regs->regmap,HIREG);
5248 signed char lo=get_reg(i_regs->regmap,LOREG);
5249 assert(m1>=0);
5250 assert(m2>=0);
5251 assert(hi>=0);
5252 assert(lo>=0);
5253 emit_smull(m1,m2,hi,lo);
5254 }
5255 if(opcode2[i]==0x19) // MULTU
5256 {
5257 signed char m1=get_reg(i_regs->regmap,rs1[i]);
5258 signed char m2=get_reg(i_regs->regmap,rs2[i]);
5259 signed char hi=get_reg(i_regs->regmap,HIREG);
5260 signed char lo=get_reg(i_regs->regmap,LOREG);
5261 assert(m1>=0);
5262 assert(m2>=0);
5263 assert(hi>=0);
5264 assert(lo>=0);
5265 emit_umull(m1,m2,hi,lo);
5266 }
5267 if(opcode2[i]==0x1A) // DIV
5268 {
5269 signed char d1=get_reg(i_regs->regmap,rs1[i]);
5270 signed char d2=get_reg(i_regs->regmap,rs2[i]);
5271 assert(d1>=0);
5272 assert(d2>=0);
5273 signed char quotient=get_reg(i_regs->regmap,LOREG);
5274 signed char remainder=get_reg(i_regs->regmap,HIREG);
5275 assert(quotient>=0);
5276 assert(remainder>=0);
5277 emit_movs(d1,remainder);
44a80f6a 5278 emit_movimm(0xffffffff,quotient);
5279 emit_negmi(quotient,quotient); // .. quotient and ..
5280 emit_negmi(remainder,remainder); // .. remainder for div0 case (will be negated back after jump)
57871462 5281 emit_movs(d2,HOST_TEMPREG);
5282 emit_jeq((int)out+52); // Division by zero
5283 emit_negmi(HOST_TEMPREG,HOST_TEMPREG);
5284 emit_clz(HOST_TEMPREG,quotient);
5285 emit_shl(HOST_TEMPREG,quotient,HOST_TEMPREG);
5286 emit_orimm(quotient,1<<31,quotient);
5287 emit_shr(quotient,quotient,quotient);
5288 emit_cmp(remainder,HOST_TEMPREG);
5289 emit_subcs(remainder,HOST_TEMPREG,remainder);
5290 emit_adcs(quotient,quotient,quotient);
5291 emit_shrimm(HOST_TEMPREG,1,HOST_TEMPREG);
5292 emit_jcc((int)out-16); // -4
5293 emit_teq(d1,d2);
5294 emit_negmi(quotient,quotient);
5295 emit_test(d1,d1);
5296 emit_negmi(remainder,remainder);
5297 }
5298 if(opcode2[i]==0x1B) // DIVU
5299 {
5300 signed char d1=get_reg(i_regs->regmap,rs1[i]); // dividend
5301 signed char d2=get_reg(i_regs->regmap,rs2[i]); // divisor
5302 assert(d1>=0);
5303 assert(d2>=0);
5304 signed char quotient=get_reg(i_regs->regmap,LOREG);
5305 signed char remainder=get_reg(i_regs->regmap,HIREG);
5306 assert(quotient>=0);
5307 assert(remainder>=0);
44a80f6a 5308 emit_mov(d1,remainder);
5309 emit_movimm(0xffffffff,quotient); // div0 case
57871462 5310 emit_test(d2,d2);
44a80f6a 5311 emit_jeq((int)out+40); // Division by zero
57871462 5312 emit_clz(d2,HOST_TEMPREG);
5313 emit_movimm(1<<31,quotient);
5314 emit_shl(d2,HOST_TEMPREG,d2);
57871462 5315 emit_shr(quotient,HOST_TEMPREG,quotient);
5316 emit_cmp(remainder,d2);
5317 emit_subcs(remainder,d2,remainder);
5318 emit_adcs(quotient,quotient,quotient);
5319 emit_shrcc_imm(d2,1,d2);
5320 emit_jcc((int)out-16); // -4
5321 }
5322 }
5323 else // 64-bit
4600ba03 5324#ifndef FORCE32
57871462 5325 {
5326 if(opcode2[i]==0x1C) // DMULT
5327 {
5328 assert(opcode2[i]!=0x1C);
5329 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5330 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5331 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5332 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5333 assert(m1h>=0);
5334 assert(m2h>=0);
5335 assert(m1l>=0);
5336 assert(m2l>=0);
5337 emit_pushreg(m2h);
5338 emit_pushreg(m2l);
5339 emit_pushreg(m1h);
5340 emit_pushreg(m1l);
5341 emit_call((int)&mult64);
5342 emit_popreg(m1l);
5343 emit_popreg(m1h);
5344 emit_popreg(m2l);
5345 emit_popreg(m2h);
5346 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5347 signed char hil=get_reg(i_regs->regmap,HIREG);
5348 if(hih>=0) emit_loadreg(HIREG|64,hih);
5349 if(hil>=0) emit_loadreg(HIREG,hil);
5350 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5351 signed char lol=get_reg(i_regs->regmap,LOREG);
5352 if(loh>=0) emit_loadreg(LOREG|64,loh);
5353 if(lol>=0) emit_loadreg(LOREG,lol);
5354 }
5355 if(opcode2[i]==0x1D) // DMULTU
5356 {
5357 signed char m1h=get_reg(i_regs->regmap,rs1[i]|64);
5358 signed char m1l=get_reg(i_regs->regmap,rs1[i]);
5359 signed char m2h=get_reg(i_regs->regmap,rs2[i]|64);
5360 signed char m2l=get_reg(i_regs->regmap,rs2[i]);
5361 assert(m1h>=0);
5362 assert(m2h>=0);
5363 assert(m1l>=0);
5364 assert(m2l>=0);
5365 save_regs(0x100f);
5366 if(m1l!=0) emit_mov(m1l,0);
5367 if(m1h==0) emit_readword((int)&dynarec_local,1);
5368 else if(m1h>1) emit_mov(m1h,1);
5369 if(m2l<2) emit_readword((int)&dynarec_local+m2l*4,2);
5370 else if(m2l>2) emit_mov(m2l,2);
5371 if(m2h<3) emit_readword((int)&dynarec_local+m2h*4,3);
5372 else if(m2h>3) emit_mov(m2h,3);
5373 emit_call((int)&multu64);
5374 restore_regs(0x100f);
5375 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5376 signed char hil=get_reg(i_regs->regmap,HIREG);
5377 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5378 signed char lol=get_reg(i_regs->regmap,LOREG);
5379 /*signed char temp=get_reg(i_regs->regmap,-1);
5380 signed char rh=get_reg(i_regs->regmap,HIREG|64);
5381 signed char rl=get_reg(i_regs->regmap,HIREG);
5382 assert(m1h>=0);
5383 assert(m2h>=0);
5384 assert(m1l>=0);
5385 assert(m2l>=0);
5386 assert(temp>=0);
5387 //emit_mov(m1l,EAX);
5388 //emit_mul(m2l);
5389 emit_umull(rl,rh,m1l,m2l);
5390 emit_storereg(LOREG,rl);
5391 emit_mov(rh,temp);
5392 //emit_mov(m1h,EAX);
5393 //emit_mul(m2l);
5394 emit_umull(rl,rh,m1h,m2l);
5395 emit_adds(rl,temp,temp);
5396 emit_adcimm(rh,0,rh);
5397 emit_storereg(HIREG,rh);
5398 //emit_mov(m2h,EAX);
5399 //emit_mul(m1l);
5400 emit_umull(rl,rh,m1l,m2h);
5401 emit_adds(rl,temp,temp);
5402 emit_adcimm(rh,0,rh);
5403 emit_storereg(LOREG|64,temp);
5404 emit_mov(rh,temp);
5405 //emit_mov(m2h,EAX);
5406 //emit_mul(m1h);
5407 emit_umull(rl,rh,m1h,m2h);
5408 emit_adds(rl,temp,rl);
5409 emit_loadreg(HIREG,temp);
5410 emit_adcimm(rh,0,rh);
5411 emit_adds(rl,temp,rl);
5412 emit_adcimm(rh,0,rh);
5413 // DEBUG
5414 /*
5415 emit_pushreg(m2h);
5416 emit_pushreg(m2l);
5417 emit_pushreg(m1h);
5418 emit_pushreg(m1l);
5419 emit_call((int)&multu64);
5420 emit_popreg(m1l);
5421 emit_popreg(m1h);
5422 emit_popreg(m2l);
5423 emit_popreg(m2h);
5424 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5425 signed char hil=get_reg(i_regs->regmap,HIREG);
5426 if(hih>=0) emit_loadreg(HIREG|64,hih); // DEBUG
5427 if(hil>=0) emit_loadreg(HIREG,hil); // DEBUG
5428 */
5429 // Shouldn't be necessary
5430 //char loh=get_reg(i_regs->regmap,LOREG|64);
5431 //char lol=get_reg(i_regs->regmap,LOREG);
5432 //if(loh>=0) emit_loadreg(LOREG|64,loh);
5433 //if(lol>=0) emit_loadreg(LOREG,lol);
5434 }
5435 if(opcode2[i]==0x1E) // DDIV
5436 {
5437 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5438 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5439 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5440 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5441 assert(d1h>=0);
5442 assert(d2h>=0);
5443 assert(d1l>=0);
5444 assert(d2l>=0);
5445 save_regs(0x100f);
5446 if(d1l!=0) emit_mov(d1l,0);
5447 if(d1h==0) emit_readword((int)&dynarec_local,1);
5448 else if(d1h>1) emit_mov(d1h,1);
5449 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5450 else if(d2l>2) emit_mov(d2l,2);
5451 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5452 else if(d2h>3) emit_mov(d2h,3);
5453 emit_call((int)&div64);
5454 restore_regs(0x100f);
5455 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5456 signed char hil=get_reg(i_regs->regmap,HIREG);
5457 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5458 signed char lol=get_reg(i_regs->regmap,LOREG);
5459 if(hih>=0) emit_loadreg(HIREG|64,hih);
5460 if(hil>=0) emit_loadreg(HIREG,hil);
5461 if(loh>=0) emit_loadreg(LOREG|64,loh);
5462 if(lol>=0) emit_loadreg(LOREG,lol);
5463 }
5464 if(opcode2[i]==0x1F) // DDIVU
5465 {
5466 //u_int hr,reglist=0;
5467 //for(hr=0;hr<HOST_REGS;hr++) {
5468 // if(i_regs->regmap[hr]>=0 && (i_regs->regmap[hr]&62)!=HIREG) reglist|=1<<hr;
5469 //}
5470 signed char d1h=get_reg(i_regs->regmap,rs1[i]|64);
5471 signed char d1l=get_reg(i_regs->regmap,rs1[i]);
5472 signed char d2h=get_reg(i_regs->regmap,rs2[i]|64);
5473 signed char d2l=get_reg(i_regs->regmap,rs2[i]);
5474 assert(d1h>=0);
5475 assert(d2h>=0);
5476 assert(d1l>=0);
5477 assert(d2l>=0);
5478 save_regs(0x100f);
5479 if(d1l!=0) emit_mov(d1l,0);
5480 if(d1h==0) emit_readword((int)&dynarec_local,1);
5481 else if(d1h>1) emit_mov(d1h,1);
5482 if(d2l<2) emit_readword((int)&dynarec_local+d2l*4,2);
5483 else if(d2l>2) emit_mov(d2l,2);
5484 if(d2h<3) emit_readword((int)&dynarec_local+d2h*4,3);
5485 else if(d2h>3) emit_mov(d2h,3);
5486 emit_call((int)&divu64);
5487 restore_regs(0x100f);
5488 signed char hih=get_reg(i_regs->regmap,HIREG|64);
5489 signed char hil=get_reg(i_regs->regmap,HIREG);
5490 signed char loh=get_reg(i_regs->regmap,LOREG|64);
5491 signed char lol=get_reg(i_regs->regmap,LOREG);
5492 if(hih>=0) emit_loadreg(HIREG|64,hih);
5493 if(hil>=0) emit_loadreg(HIREG,hil);
5494 if(loh>=0) emit_loadreg(LOREG|64,loh);
5495 if(lol>=0) emit_loadreg(LOREG,lol);
5496 }
5497 }
4600ba03 5498#else
5499 assert(0);
5500#endif
57871462 5501 }
5502 else
5503 {
5504 // Multiply by zero is zero.
5505 // MIPS does not have a divide by zero exception.
5506 // The result is undefined, we return zero.
5507 signed char hr=get_reg(i_regs->regmap,HIREG);
5508 signed char lr=get_reg(i_regs->regmap,LOREG);
5509 if(hr>=0) emit_zeroreg(hr);
5510 if(lr>=0) emit_zeroreg(lr);
5511 }
5512}
5513#define multdiv_assemble multdiv_assemble_arm
5514
5515void do_preload_rhash(int r) {
5516 // Don't need this for ARM. On x86, this puts the value 0xf8 into the
5517 // register. On ARM the hash can be done with a single instruction (below)
5518}
5519
5520void do_preload_rhtbl(int ht) {
5521 emit_addimm(FP,(int)&mini_ht-(int)&dynarec_local,ht);
5522}
5523
5524void do_rhash(int rs,int rh) {
5525 emit_andimm(rs,0xf8,rh);
5526}
5527
5528void do_miniht_load(int ht,int rh) {
5529 assem_debug("ldr %s,[%s,%s]!\n",regname[rh],regname[ht],regname[rh]);
5530 output_w32(0xe7b00000|rd_rn_rm(rh,ht,rh));
5531}
5532
5533void do_miniht_jump(int rs,int rh,int ht) {
5534 emit_cmp(rh,rs);
5535 emit_ldreq_indexed(ht,4,15);
5536 #ifdef CORTEX_A8_BRANCH_PREDICTION_HACK
5537 emit_mov(rs,7);
5538 emit_jmp(jump_vaddr_reg[7]);
5539 #else
5540 emit_jmp(jump_vaddr_reg[rs]);
5541 #endif
5542}
5543
5544void do_miniht_insert(u_int return_address,int rt,int temp) {
5545 #ifdef ARMv5_ONLY
5546 emit_movimm(return_address,rt); // PC into link register
5547 add_to_linker((int)out,return_address,1);
5548 emit_pcreladdr(temp);
5549 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5550 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5551 #else
5552 emit_movw(return_address&0x0000FFFF,rt);
5553 add_to_linker((int)out,return_address,1);
5554 emit_pcreladdr(temp);
5555 emit_writeword(temp,(int)&mini_ht[(return_address&0xFF)>>3][1]);
5556 emit_movt(return_address&0xFFFF0000,rt);
5557 emit_writeword(rt,(int)&mini_ht[(return_address&0xFF)>>3][0]);
5558 #endif
5559}
5560
5561// Sign-extend to 64 bits and write out upper half of a register
5562// This is useful where we have a 32-bit value in a register, and want to
5563// keep it in a 32-bit register, but can't guarantee that it won't be read
5564// as a 64-bit value later.
5565void wb_sx(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32_pre,uint64_t is32,uint64_t u,uint64_t uu)
5566{
24385cae 5567#ifndef FORCE32
57871462 5568 if(is32_pre==is32) return;
5569 int hr,reg;
5570 for(hr=0;hr<HOST_REGS;hr++) {
5571 if(hr!=EXCLUDE_REG) {
5572 //if(pre[hr]==entry[hr]) {
5573 if((reg=pre[hr])>=0) {
5574 if((dirty>>hr)&1) {
5575 if( ((is32_pre&~is32&~uu)>>reg)&1 ) {
5576 emit_sarimm(hr,31,HOST_TEMPREG);
5577 emit_storereg(reg|64,HOST_TEMPREG);
5578 }
5579 }
5580 }
5581 //}
5582 }
5583 }
24385cae 5584#endif
57871462 5585}
5586
5587void wb_valid(signed char pre[],signed char entry[],u_int dirty_pre,u_int dirty,uint64_t is32_pre,uint64_t u,uint64_t uu)
5588{
5589 //if(dirty_pre==dirty) return;
5590 int hr,reg,new_hr;
5591 for(hr=0;hr<HOST_REGS;hr++) {
5592 if(hr!=EXCLUDE_REG) {
5593 reg=pre[hr];
5594 if(((~u)>>(reg&63))&1) {
f776eb14 5595 if(reg>0) {
57871462 5596 if(((dirty_pre&~dirty)>>hr)&1) {
5597 if(reg>0&&reg<34) {
5598 emit_storereg(reg,hr);
5599 if( ((is32_pre&~uu)>>reg)&1 ) {
5600 emit_sarimm(hr,31,HOST_TEMPREG);
5601 emit_storereg(reg|64,HOST_TEMPREG);
5602 }
5603 }
5604 else if(reg>=64) {
5605 emit_storereg(reg,hr);
5606 }
5607 }
5608 }
57871462 5609 }
5610 }
5611 }
5612}
5613
5614
5615/* using strd could possibly help but you'd have to allocate registers in pairs
5616void wb_invalidate_arm(signed char pre[],signed char entry[],uint64_t dirty,uint64_t is32,uint64_t u,uint64_t uu)
5617{
5618 int hr;
5619 int wrote=-1;
5620 for(hr=HOST_REGS-1;hr>=0;hr--) {
5621 if(hr!=EXCLUDE_REG) {
5622 if(pre[hr]!=entry[hr]) {
5623 if(pre[hr]>=0) {
5624 if((dirty>>hr)&1) {
5625 if(get_reg(entry,pre[hr])<0) {
5626 if(pre[hr]<64) {
5627 if(!((u>>pre[hr])&1)) {
5628 if(hr<10&&(~hr&1)&&(pre[hr+1]<0||wrote==hr+1)) {
5629 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5630 emit_sarimm(hr,31,hr+1);
5631 emit_strdreg(pre[hr],hr);
5632 }
5633 else
5634 emit_storereg(pre[hr],hr);
5635 }else{
5636 emit_storereg(pre[hr],hr);
5637 if( ((is32>>pre[hr])&1) && !((uu>>pre[hr])&1) ) {
5638 emit_sarimm(hr,31,hr);
5639 emit_storereg(pre[hr]|64,hr);
5640 }
5641 }
5642 }
5643 }else{
5644 if(!((uu>>(pre[hr]&63))&1) && !((is32>>(pre[hr]&63))&1)) {
5645 emit_storereg(pre[hr],hr);
5646 }
5647 }
5648 wrote=hr;
5649 }
5650 }
5651 }
5652 }
5653 }
5654 }
5655 for(hr=0;hr<HOST_REGS;hr++) {
5656 if(hr!=EXCLUDE_REG) {
5657 if(pre[hr]!=entry[hr]) {
5658 if(pre[hr]>=0) {
5659 int nr;
5660 if((nr=get_reg(entry,pre[hr]))>=0) {
5661 emit_mov(hr,nr);
5662 }
5663 }
5664 }
5665 }
5666 }
5667}
5668#define wb_invalidate wb_invalidate_arm
5669*/
5670
dd3a91a1 5671// Clearing the cache is rather slow on ARM Linux, so mark the areas
5672// that need to be cleared, and then only clear these areas once.
5673void do_clear_cache()
5674{
5675 int i,j;
5676 for (i=0;i<(1<<(TARGET_SIZE_2-17));i++)
5677 {
5678 u_int bitmap=needs_clear_cache[i];
5679 if(bitmap) {
5680 u_int start,end;
5681 for(j=0;j<32;j++)
5682 {
5683 if(bitmap&(1<<j)) {
5684 start=BASE_ADDR+i*131072+j*4096;
5685 end=start+4095;
5686 j++;
5687 while(j<32) {
5688 if(bitmap&(1<<j)) {
5689 end+=4096;
5690 j++;
5691 }else{
5692 __clear_cache((void *)start,(void *)end);
5693 break;
5694 }
5695 }
5696 }
5697 }
5698 needs_clear_cache[i]=0;
5699 }
5700 }
5701}
5702
57871462 5703// CPU-architecture-specific initialization
5704void arch_init() {
3d624f89 5705#ifndef DISABLE_COP1
57871462 5706 rounding_modes[0]=0x0<<22; // round
5707 rounding_modes[1]=0x3<<22; // trunc
5708 rounding_modes[2]=0x1<<22; // ceil
5709 rounding_modes[3]=0x2<<22; // floor
3d624f89 5710#endif
57871462 5711}
b9b61529 5712
5713// vim:shiftwidth=2:expandtab